Mercurial > repos > davidvanzessen > mutation_analysis
comparison wrapper.sh @ 4:069419cccba4 draft
Uploaded
author | davidvanzessen |
---|---|
date | Mon, 22 Sep 2014 10:19:36 -0400 |
parents | a0b27058dcac |
children | 257e98fd3b54 |
comparison
equal
deleted
inserted
replaced
3:a0b27058dcac | 4:069419cccba4 |
---|---|
1 #!/bin/bash | 1 #!/bin/bash |
2 set -e | |
2 dir="$(cd "$(dirname "$0")" && pwd)" | 3 dir="$(cd "$(dirname "$0")" && pwd)" |
3 input=$1 | 4 input=$1 |
4 output=$2 | 5 output=$2 |
5 outdir=$3 | 6 outdir=$3 |
6 title=$4 | 7 title=$4 |
10 cat $PWD/files/*/1_* > $PWD/summary.txt | 11 cat $PWD/files/*/1_* > $PWD/summary.txt |
11 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt | 12 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt |
12 cat $PWD/files/*/8_* > $PWD/mutationstats.txt | 13 cat $PWD/files/*/8_* > $PWD/mutationstats.txt |
13 cat $PWD/files/*/10_* > $PWD/hotspots.txt | 14 cat $PWD/files/*/10_* > $PWD/hotspots.txt |
14 | 15 |
15 cp $dir/HS12RSS.txt $outdir/ | |
16 cp $dir/HS23RSS.txt $outdir/ | |
17 | 16 |
18 mkdir $outdir/identification/ | 17 echo "identification" |
19 python $dir/gene_identification.py --input $PWD/summary.txt --outdir $outdir/identification/ | 18 python $dir/gene_identification.py --input $PWD/summary.txt --output $PWD/annotatedsummary.txt |
19 echo "merging" | |
20 Rscript $dir/merge_and_filter.r $PWD/annotatedsummary.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/merged.txt $outdir/unmatched.txt | |
21 | |
22 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm" | |
23 echo "R mutation analysis" | |
24 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir 2>&1 | |
25 echo "python mutation analysis" | |
26 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --output $outdir/hotspot_analysis.txt | |
27 | |
28 cat $outdir/mutations.txt $outdir/hotspot_analysis.txt > $outdir/result.txt | |
29 | |
20 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) | 30 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) |
21 tmp=$PWD/tmp | |
22 tmp2=$PWD/tmp2 | |
23 hotspottmp=$PWD/hotspottmp | |
24 mutationtmp=$PWD/mutationtmp | |
25 touch $outdir/mutationandhotspot.txt | |
26 for gene in ${genes[@]} | |
27 do | |
28 echo "Running $gene <br />" >> $output | |
29 mkdir $outdir/$gene | |
30 cp $dir/HS12RSS.txt $outdir/$gene/ | |
31 cp $dir/HS23RSS.txt $outdir/$gene/ | |
32 echo "Filtering input..." >> $output | |
33 Rscript $dir/filter.r $PWD/summary.txt $outdir/identification/${gene}.txt $outdir/$gene/summary.txt | |
34 Rscript $dir/filter.r $PWD/mutationanalysis.txt $outdir/identification/${gene}.txt $outdir/$gene/mutationanalysis.txt | |
35 Rscript $dir/filter.r $PWD/mutationstats.txt $outdir/identification/${gene}.txt $outdir/$gene/mutationstats.txt | |
36 Rscript $dir/filter.r $PWD/hotspots.txt $outdir/identification/${gene}.txt $outdir/$gene/hotspots.txt | |
37 echo "done <br />" >> $output | |
38 | |
39 echo "Running R script on $gene..." >> $output | |
40 Rscript --verbose $dir/mutation_analysis.r $outdir/$gene/mutationstats.txt $outdir/$gene/summary.txt $outdir/$gene/ 2>&1 | |
41 echo "done <br />" >> $output | |
42 | |
43 echo "Running Python script..." >> $output | |
44 python $dir/mutation_analysis.py --mutationfile $outdir/$gene/mutationanalysis.txt --hotspotfile $outdir/$gene/hotspots.txt --output $outdir/$gene/hotspot_analysis.txt | |
45 echo "done <br />" >> $output | |
46 echo "Done with $gene <br />" >> $output | |
47 cut $outdir/$gene/mutations.txt -d, -f2,3,4 > $mutationtmp | |
48 cut $outdir/$gene/hotspot_analysis.txt -d, -f2,3,4 > $hotspottmp | |
49 cat $mutationtmp $hotspottmp > $tmp | |
50 paste $outdir/mutationandhotspot.txt -d, $tmp > $tmp2 | |
51 cat $tmp2 > $outdir/mutationandhotspot.txt | |
52 rm $outdir/$gene/HS12RSS.txt | |
53 rm $outdir/$gene/HS23RSS.txt | |
54 done | |
55 | |
56 Rscript --verbose $dir/mutation_analysis.r $PWD/mutationstats.txt $PWD/summary.txt $outdir/ 2>&1 | |
57 python $dir/mutation_analysis.py --mutationfile $PWD/mutationanalysis.txt --hotspotfile $PWD/hotspots.txt --output $outdir/hotspot_analysis.txt | |
58 | |
59 cut $outdir/mutations.txt -d, -f2,3,4 > $mutationtmp | |
60 cut $outdir/hotspot_analysis.txt -d, -f2,3,4 > $hotspottmp | |
61 cat $mutationtmp $hotspottmp > $tmp | |
62 paste $outdir/mutationandhotspot.txt -d, $tmp > $tmp2 | |
63 cat $tmp2 > $outdir/mutationandhotspot.txt | |
64 | |
65 cut $outdir/ca1/mutations.txt -d, -f1 > $mutationtmp | |
66 cut $outdir/ca1/hotspot_analysis.txt -d, -f1 > $hotspottmp | |
67 cat $mutationtmp $hotspottmp > $tmp | |
68 paste $tmp $outdir/mutationandhotspot.txt -d, > $tmp2 | |
69 cat $tmp2 | tr -s "," > $outdir/mutationandhotspot.txt | |
70 | |
71 ca_n=`cat $outdir/ca/n.txt` | |
72 ca1_n=`cat $outdir/ca1/n.txt` | |
73 ca2_n=`cat $outdir/ca2/n.txt` | |
74 cg_n=`cat $outdir/cg/n.txt` | |
75 cg1_n=`cat $outdir/cg1/n.txt` | |
76 cg2_n=`cat $outdir/cg2/n.txt` | |
77 cg3_n=`cat $outdir/cg3/n.txt` | |
78 cg4_n=`cat $outdir/cg4/n.txt` | |
79 cm_n=`cat $outdir/cm/n.txt` | |
80 #all_n=$((ca_n + cg_n + cm_n)) | |
81 all_n=`cat $outdir/n.txt` | |
82 | 31 |
83 | 32 |
84 echo "<html><center><h1>$title</h1></center><table border='1'>" > $output | 33 echo "<html><center><h1>$title</h1></center><table border='1'>" > $output |
85 echo "<tr><th>info</th>" >> $output | 34 echo "<tr><th>info</th>" >> $output |
86 echo "<th><a href='identification/ca.txt'>ca (N = $ca_n)</a></th>" >> $output | 35 for gene in ${genes[@]} |
87 echo "<th><a href='identification/ca1.txt'>ca1 (N = $ca1_n)</a></th>" >> $output | 36 do |
88 echo "<th><a href='identification/ca2.txt'>ca2 (N = $ca2_n)</a></th>" >> $output | 37 tmp=`cat $outdir/${gene}_n.txt` |
89 echo "<th><a href='identification/cg.txt'>cg (N = $cg_n)</a></th>" >> $output | 38 echo "<th><a href='matched_${gene}.txt'>${gene} (N = $tmp)</a></th>" >> $output |
90 echo "<th><a href='identification/cg1.txt'>cg1 (N = $cg1_n)</a></th>" >> $output | 39 done |
91 echo "<th><a href='identification/cg2.txt'>cg2 (N = $cg2_n)</a></th>" >> $output | 40 tmp=`cat $outdir/total_n.txt` |
92 echo "<th><a href='identification/cg3.txt'>cg3 (N = $cg3_n)</a></th>" >> $output | 41 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output |
93 echo "<th><a href='identification/cg4.txt'>cg4 (N = $cg4_n)</a></th>" >> $output | 42 |
94 echo "<th><a href='identification/cm.txt'>cm (N = $cm_n)</a></th>" >> $output | |
95 echo "<th>all (N = $all_n)</th></tr>" >> $output | |
96 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz | 43 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz |
97 do | 44 do |
98 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output | 45 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output |
99 done < $outdir/mutationandhotspot.txt | 46 done < $outdir/result.txt |
100 echo "</table>" >> $output | 47 echo "</table>" >> $output |
101 echo "<a href='identification/unmatched.txt'>umatched</a><br />" >> $output | 48 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output |
102 | |
103 Rscript $dir/piechart.r "${ca_n},${cg_n},${cm_n}" "IgA - ${ca_n},IgG - ${cg_n},IgM? - ${cm_n}" "Ig* (N = $all_n)" $outdir/all.png 2>&1 | |
104 Rscript $dir/piechart.r "${ca1_n},${ca2_n}" "IgA1 - ${ca1_n},IgA2 - ${ca2_n}" "IgA (N = $ca_n)" $outdir/ca.png 2>&1 | |
105 Rscript $dir/piechart.r "${cg1_n},${cg2_n},${cg3_n},${cg4_n}" "IgG1 - ${cg1_n},IgG2 - ${cg2_n},IgG3 - ${cg3_n},IgG4 - ${cg4_n}" "IgG (N = $cg_n)" $outdir/cg.png 2>&1 | |
106 | |
107 $dir/seqlogo -t "HS12RSS" -w 20 -h 5 -p -a -c -n -F PNG -f $outdir/weblogo_in_rs12.txt > $outdir/HS12.png 2>&1 | |
108 $dir/seqlogo -t "HS23RSS" -w 20 -h 5 -p -a -c -n -F PNG -f $outdir/weblogo_in_rs23.txt > $outdir/HS23.png 2>&1 | |
109 | 49 |
110 | 50 |
111 echo "<img src='all.png'/>" >> $output | 51 echo "<img src='all.png'/><br />" >> $output |
112 echo "<img src='ca.png'/>" >> $output | 52 if [ -a $outdir/ca.png ] |
113 echo "<img src='cg.png'/>" >> $output | 53 then |
114 | 54 echo "<img src='ca.png'/><br />" >> $output |
115 echo "<img src='HS12.png'/>" >> $output | 55 fi |
116 echo "<img src='HS23.png'/>" >> $output | 56 if [ -a $outdir/cg.png ] |
57 then | |
58 echo "<img src='cg.png'/><br />" >> $output | |
59 fi | |
117 | 60 |
118 for gene in ${genes[@]} | 61 for gene in ${genes[@]} |
119 do | 62 do |
120 echo "<table border='1'><caption>$gene transition table</caption>" >> $output | 63 echo "<table border='1'><caption>$gene transition table</caption>" >> $output |
121 while IFS=, read from a c g t | 64 while IFS=, read from a c g t |
122 do | 65 do |
123 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | 66 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output |
124 done < $outdir/$gene/transitions.txt | 67 done < $outdir/transitions_${gene}.txt |
125 echo "</table>" >> $output | 68 echo "</table>" >> $output |
126 done | 69 done |
127 | 70 |
128 echo "<table border='1'><caption>All transition table</caption>" >> $output | 71 echo "<table border='1'><caption>All transition table</caption>" >> $output |
129 while IFS=, read from a c g t | 72 while IFS=, read from a c g t |