Mercurial > repos > davidvanzessen > mutation_analysis
diff wrapper.sh @ 4:069419cccba4 draft
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 22 Sep 2014 10:19:36 -0400 |
parents | a0b27058dcac |
children | 257e98fd3b54 |
line wrap: on
line diff
--- a/wrapper.sh Wed Sep 17 07:25:17 2014 -0400 +++ b/wrapper.sh Mon Sep 22 10:19:36 2014 -0400 @@ -1,4 +1,5 @@ #!/bin/bash +set -e dir="$(cd "$(dirname "$0")" && pwd)" input=$1 output=$2 @@ -12,108 +13,50 @@ cat $PWD/files/*/8_* > $PWD/mutationstats.txt cat $PWD/files/*/10_* > $PWD/hotspots.txt -cp $dir/HS12RSS.txt $outdir/ -cp $dir/HS23RSS.txt $outdir/ + +echo "identification" +python $dir/gene_identification.py --input $PWD/summary.txt --output $PWD/annotatedsummary.txt +echo "merging" +Rscript $dir/merge_and_filter.r $PWD/annotatedsummary.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/merged.txt $outdir/unmatched.txt -mkdir $outdir/identification/ -python $dir/gene_identification.py --input $PWD/summary.txt --outdir $outdir/identification/ +genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm" +echo "R mutation analysis" +Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir 2>&1 +echo "python mutation analysis" +python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --output $outdir/hotspot_analysis.txt + +cat $outdir/mutations.txt $outdir/hotspot_analysis.txt > $outdir/result.txt + genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) -tmp=$PWD/tmp -tmp2=$PWD/tmp2 -hotspottmp=$PWD/hotspottmp -mutationtmp=$PWD/mutationtmp -touch $outdir/mutationandhotspot.txt -for gene in ${genes[@]} -do - echo "Running $gene <br />" >> $output - mkdir $outdir/$gene - cp $dir/HS12RSS.txt $outdir/$gene/ - cp $dir/HS23RSS.txt $outdir/$gene/ - echo "Filtering input..." >> $output - Rscript $dir/filter.r $PWD/summary.txt $outdir/identification/${gene}.txt $outdir/$gene/summary.txt - Rscript $dir/filter.r $PWD/mutationanalysis.txt $outdir/identification/${gene}.txt $outdir/$gene/mutationanalysis.txt - Rscript $dir/filter.r $PWD/mutationstats.txt $outdir/identification/${gene}.txt $outdir/$gene/mutationstats.txt - Rscript $dir/filter.r $PWD/hotspots.txt $outdir/identification/${gene}.txt $outdir/$gene/hotspots.txt - echo "done <br />" >> $output - - echo "Running R script on $gene..." >> $output - Rscript --verbose $dir/mutation_analysis.r $outdir/$gene/mutationstats.txt $outdir/$gene/summary.txt $outdir/$gene/ 2>&1 - echo "done <br />" >> $output - - echo "Running Python script..." >> $output - python $dir/mutation_analysis.py --mutationfile $outdir/$gene/mutationanalysis.txt --hotspotfile $outdir/$gene/hotspots.txt --output $outdir/$gene/hotspot_analysis.txt - echo "done <br />" >> $output - echo "Done with $gene <br />" >> $output - cut $outdir/$gene/mutations.txt -d, -f2,3,4 > $mutationtmp - cut $outdir/$gene/hotspot_analysis.txt -d, -f2,3,4 > $hotspottmp - cat $mutationtmp $hotspottmp > $tmp - paste $outdir/mutationandhotspot.txt -d, $tmp > $tmp2 - cat $tmp2 > $outdir/mutationandhotspot.txt - rm $outdir/$gene/HS12RSS.txt - rm $outdir/$gene/HS23RSS.txt -done - -Rscript --verbose $dir/mutation_analysis.r $PWD/mutationstats.txt $PWD/summary.txt $outdir/ 2>&1 -python $dir/mutation_analysis.py --mutationfile $PWD/mutationanalysis.txt --hotspotfile $PWD/hotspots.txt --output $outdir/hotspot_analysis.txt - -cut $outdir/mutations.txt -d, -f2,3,4 > $mutationtmp -cut $outdir/hotspot_analysis.txt -d, -f2,3,4 > $hotspottmp -cat $mutationtmp $hotspottmp > $tmp -paste $outdir/mutationandhotspot.txt -d, $tmp > $tmp2 -cat $tmp2 > $outdir/mutationandhotspot.txt - -cut $outdir/ca1/mutations.txt -d, -f1 > $mutationtmp -cut $outdir/ca1/hotspot_analysis.txt -d, -f1 > $hotspottmp -cat $mutationtmp $hotspottmp > $tmp -paste $tmp $outdir/mutationandhotspot.txt -d, > $tmp2 -cat $tmp2 | tr -s "," > $outdir/mutationandhotspot.txt - -ca_n=`cat $outdir/ca/n.txt` -ca1_n=`cat $outdir/ca1/n.txt` -ca2_n=`cat $outdir/ca2/n.txt` -cg_n=`cat $outdir/cg/n.txt` -cg1_n=`cat $outdir/cg1/n.txt` -cg2_n=`cat $outdir/cg2/n.txt` -cg3_n=`cat $outdir/cg3/n.txt` -cg4_n=`cat $outdir/cg4/n.txt` -cm_n=`cat $outdir/cm/n.txt` -#all_n=$((ca_n + cg_n + cm_n)) -all_n=`cat $outdir/n.txt` echo "<html><center><h1>$title</h1></center><table border='1'>" > $output echo "<tr><th>info</th>" >> $output -echo "<th><a href='identification/ca.txt'>ca (N = $ca_n)</a></th>" >> $output -echo "<th><a href='identification/ca1.txt'>ca1 (N = $ca1_n)</a></th>" >> $output -echo "<th><a href='identification/ca2.txt'>ca2 (N = $ca2_n)</a></th>" >> $output -echo "<th><a href='identification/cg.txt'>cg (N = $cg_n)</a></th>" >> $output -echo "<th><a href='identification/cg1.txt'>cg1 (N = $cg1_n)</a></th>" >> $output -echo "<th><a href='identification/cg2.txt'>cg2 (N = $cg2_n)</a></th>" >> $output -echo "<th><a href='identification/cg3.txt'>cg3 (N = $cg3_n)</a></th>" >> $output -echo "<th><a href='identification/cg4.txt'>cg4 (N = $cg4_n)</a></th>" >> $output -echo "<th><a href='identification/cm.txt'>cm (N = $cm_n)</a></th>" >> $output -echo "<th>all (N = $all_n)</th></tr>" >> $output +for gene in ${genes[@]} +do + tmp=`cat $outdir/${gene}_n.txt` + echo "<th><a href='matched_${gene}.txt'>${gene} (N = $tmp)</a></th>" >> $output +done +tmp=`cat $outdir/total_n.txt` +echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output + while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz do echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output -done < $outdir/mutationandhotspot.txt +done < $outdir/result.txt echo "</table>" >> $output -echo "<a href='identification/unmatched.txt'>umatched</a><br />" >> $output - -Rscript $dir/piechart.r "${ca_n},${cg_n},${cm_n}" "IgA - ${ca_n},IgG - ${cg_n},IgM? - ${cm_n}" "Ig* (N = $all_n)" $outdir/all.png 2>&1 -Rscript $dir/piechart.r "${ca1_n},${ca2_n}" "IgA1 - ${ca1_n},IgA2 - ${ca2_n}" "IgA (N = $ca_n)" $outdir/ca.png 2>&1 -Rscript $dir/piechart.r "${cg1_n},${cg2_n},${cg3_n},${cg4_n}" "IgG1 - ${cg1_n},IgG2 - ${cg2_n},IgG3 - ${cg3_n},IgG4 - ${cg4_n}" "IgG (N = $cg_n)" $outdir/cg.png 2>&1 - -$dir/seqlogo -t "HS12RSS" -w 20 -h 5 -p -a -c -n -F PNG -f $outdir/weblogo_in_rs12.txt > $outdir/HS12.png 2>&1 -$dir/seqlogo -t "HS23RSS" -w 20 -h 5 -p -a -c -n -F PNG -f $outdir/weblogo_in_rs23.txt > $outdir/HS23.png 2>&1 +echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output -echo "<img src='all.png'/>" >> $output -echo "<img src='ca.png'/>" >> $output -echo "<img src='cg.png'/>" >> $output - -echo "<img src='HS12.png'/>" >> $output -echo "<img src='HS23.png'/>" >> $output +echo "<img src='all.png'/><br />" >> $output +if [ -a $outdir/ca.png ] +then + echo "<img src='ca.png'/><br />" >> $output +fi +if [ -a $outdir/cg.png ] +then + echo "<img src='cg.png'/><br />" >> $output +fi for gene in ${genes[@]} do @@ -121,7 +64,7 @@ while IFS=, read from a c g t do echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output - done < $outdir/$gene/transitions.txt + done < $outdir/transitions_${gene}.txt echo "</table>" >> $output done