comparison wrapper.sh @ 4:069419cccba4 draft

Uploaded
author davidvanzessen
date Mon, 22 Sep 2014 10:19:36 -0400
parents a0b27058dcac
children 257e98fd3b54
comparison
equal deleted inserted replaced
3:a0b27058dcac 4:069419cccba4
1 #!/bin/bash 1 #!/bin/bash
2 set -e
2 dir="$(cd "$(dirname "$0")" && pwd)" 3 dir="$(cd "$(dirname "$0")" && pwd)"
3 input=$1 4 input=$1
4 output=$2 5 output=$2
5 outdir=$3 6 outdir=$3
6 title=$4 7 title=$4
10 cat $PWD/files/*/1_* > $PWD/summary.txt 11 cat $PWD/files/*/1_* > $PWD/summary.txt
11 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt 12 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
12 cat $PWD/files/*/8_* > $PWD/mutationstats.txt 13 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
13 cat $PWD/files/*/10_* > $PWD/hotspots.txt 14 cat $PWD/files/*/10_* > $PWD/hotspots.txt
14 15
15 cp $dir/HS12RSS.txt $outdir/
16 cp $dir/HS23RSS.txt $outdir/
17 16
18 mkdir $outdir/identification/ 17 echo "identification"
19 python $dir/gene_identification.py --input $PWD/summary.txt --outdir $outdir/identification/ 18 python $dir/gene_identification.py --input $PWD/summary.txt --output $PWD/annotatedsummary.txt
19 echo "merging"
20 Rscript $dir/merge_and_filter.r $PWD/annotatedsummary.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/merged.txt $outdir/unmatched.txt
21
22 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
23 echo "R mutation analysis"
24 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir 2>&1
25 echo "python mutation analysis"
26 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --output $outdir/hotspot_analysis.txt
27
28 cat $outdir/mutations.txt $outdir/hotspot_analysis.txt > $outdir/result.txt
29
20 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) 30 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
21 tmp=$PWD/tmp
22 tmp2=$PWD/tmp2
23 hotspottmp=$PWD/hotspottmp
24 mutationtmp=$PWD/mutationtmp
25 touch $outdir/mutationandhotspot.txt
26 for gene in ${genes[@]}
27 do
28 echo "Running $gene <br />" >> $output
29 mkdir $outdir/$gene
30 cp $dir/HS12RSS.txt $outdir/$gene/
31 cp $dir/HS23RSS.txt $outdir/$gene/
32 echo "Filtering input..." >> $output
33 Rscript $dir/filter.r $PWD/summary.txt $outdir/identification/${gene}.txt $outdir/$gene/summary.txt
34 Rscript $dir/filter.r $PWD/mutationanalysis.txt $outdir/identification/${gene}.txt $outdir/$gene/mutationanalysis.txt
35 Rscript $dir/filter.r $PWD/mutationstats.txt $outdir/identification/${gene}.txt $outdir/$gene/mutationstats.txt
36 Rscript $dir/filter.r $PWD/hotspots.txt $outdir/identification/${gene}.txt $outdir/$gene/hotspots.txt
37 echo "done <br />" >> $output
38
39 echo "Running R script on $gene..." >> $output
40 Rscript --verbose $dir/mutation_analysis.r $outdir/$gene/mutationstats.txt $outdir/$gene/summary.txt $outdir/$gene/ 2>&1
41 echo "done <br />" >> $output
42
43 echo "Running Python script..." >> $output
44 python $dir/mutation_analysis.py --mutationfile $outdir/$gene/mutationanalysis.txt --hotspotfile $outdir/$gene/hotspots.txt --output $outdir/$gene/hotspot_analysis.txt
45 echo "done <br />" >> $output
46 echo "Done with $gene <br />" >> $output
47 cut $outdir/$gene/mutations.txt -d, -f2,3,4 > $mutationtmp
48 cut $outdir/$gene/hotspot_analysis.txt -d, -f2,3,4 > $hotspottmp
49 cat $mutationtmp $hotspottmp > $tmp
50 paste $outdir/mutationandhotspot.txt -d, $tmp > $tmp2
51 cat $tmp2 > $outdir/mutationandhotspot.txt
52 rm $outdir/$gene/HS12RSS.txt
53 rm $outdir/$gene/HS23RSS.txt
54 done
55
56 Rscript --verbose $dir/mutation_analysis.r $PWD/mutationstats.txt $PWD/summary.txt $outdir/ 2>&1
57 python $dir/mutation_analysis.py --mutationfile $PWD/mutationanalysis.txt --hotspotfile $PWD/hotspots.txt --output $outdir/hotspot_analysis.txt
58
59 cut $outdir/mutations.txt -d, -f2,3,4 > $mutationtmp
60 cut $outdir/hotspot_analysis.txt -d, -f2,3,4 > $hotspottmp
61 cat $mutationtmp $hotspottmp > $tmp
62 paste $outdir/mutationandhotspot.txt -d, $tmp > $tmp2
63 cat $tmp2 > $outdir/mutationandhotspot.txt
64
65 cut $outdir/ca1/mutations.txt -d, -f1 > $mutationtmp
66 cut $outdir/ca1/hotspot_analysis.txt -d, -f1 > $hotspottmp
67 cat $mutationtmp $hotspottmp > $tmp
68 paste $tmp $outdir/mutationandhotspot.txt -d, > $tmp2
69 cat $tmp2 | tr -s "," > $outdir/mutationandhotspot.txt
70
71 ca_n=`cat $outdir/ca/n.txt`
72 ca1_n=`cat $outdir/ca1/n.txt`
73 ca2_n=`cat $outdir/ca2/n.txt`
74 cg_n=`cat $outdir/cg/n.txt`
75 cg1_n=`cat $outdir/cg1/n.txt`
76 cg2_n=`cat $outdir/cg2/n.txt`
77 cg3_n=`cat $outdir/cg3/n.txt`
78 cg4_n=`cat $outdir/cg4/n.txt`
79 cm_n=`cat $outdir/cm/n.txt`
80 #all_n=$((ca_n + cg_n + cm_n))
81 all_n=`cat $outdir/n.txt`
82 31
83 32
84 echo "<html><center><h1>$title</h1></center><table border='1'>" > $output 33 echo "<html><center><h1>$title</h1></center><table border='1'>" > $output
85 echo "<tr><th>info</th>" >> $output 34 echo "<tr><th>info</th>" >> $output
86 echo "<th><a href='identification/ca.txt'>ca (N = $ca_n)</a></th>" >> $output 35 for gene in ${genes[@]}
87 echo "<th><a href='identification/ca1.txt'>ca1 (N = $ca1_n)</a></th>" >> $output 36 do
88 echo "<th><a href='identification/ca2.txt'>ca2 (N = $ca2_n)</a></th>" >> $output 37 tmp=`cat $outdir/${gene}_n.txt`
89 echo "<th><a href='identification/cg.txt'>cg (N = $cg_n)</a></th>" >> $output 38 echo "<th><a href='matched_${gene}.txt'>${gene} (N = $tmp)</a></th>" >> $output
90 echo "<th><a href='identification/cg1.txt'>cg1 (N = $cg1_n)</a></th>" >> $output 39 done
91 echo "<th><a href='identification/cg2.txt'>cg2 (N = $cg2_n)</a></th>" >> $output 40 tmp=`cat $outdir/total_n.txt`
92 echo "<th><a href='identification/cg3.txt'>cg3 (N = $cg3_n)</a></th>" >> $output 41 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
93 echo "<th><a href='identification/cg4.txt'>cg4 (N = $cg4_n)</a></th>" >> $output 42
94 echo "<th><a href='identification/cm.txt'>cm (N = $cm_n)</a></th>" >> $output
95 echo "<th>all (N = $all_n)</th></tr>" >> $output
96 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz 43 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
97 do 44 do
98 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output 45 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
99 done < $outdir/mutationandhotspot.txt 46 done < $outdir/result.txt
100 echo "</table>" >> $output 47 echo "</table>" >> $output
101 echo "<a href='identification/unmatched.txt'>umatched</a><br />" >> $output 48 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
102
103 Rscript $dir/piechart.r "${ca_n},${cg_n},${cm_n}" "IgA - ${ca_n},IgG - ${cg_n},IgM? - ${cm_n}" "Ig* (N = $all_n)" $outdir/all.png 2>&1
104 Rscript $dir/piechart.r "${ca1_n},${ca2_n}" "IgA1 - ${ca1_n},IgA2 - ${ca2_n}" "IgA (N = $ca_n)" $outdir/ca.png 2>&1
105 Rscript $dir/piechart.r "${cg1_n},${cg2_n},${cg3_n},${cg4_n}" "IgG1 - ${cg1_n},IgG2 - ${cg2_n},IgG3 - ${cg3_n},IgG4 - ${cg4_n}" "IgG (N = $cg_n)" $outdir/cg.png 2>&1
106
107 $dir/seqlogo -t "HS12RSS" -w 20 -h 5 -p -a -c -n -F PNG -f $outdir/weblogo_in_rs12.txt > $outdir/HS12.png 2>&1
108 $dir/seqlogo -t "HS23RSS" -w 20 -h 5 -p -a -c -n -F PNG -f $outdir/weblogo_in_rs23.txt > $outdir/HS23.png 2>&1
109 49
110 50
111 echo "<img src='all.png'/>" >> $output 51 echo "<img src='all.png'/><br />" >> $output
112 echo "<img src='ca.png'/>" >> $output 52 if [ -a $outdir/ca.png ]
113 echo "<img src='cg.png'/>" >> $output 53 then
114 54 echo "<img src='ca.png'/><br />" >> $output
115 echo "<img src='HS12.png'/>" >> $output 55 fi
116 echo "<img src='HS23.png'/>" >> $output 56 if [ -a $outdir/cg.png ]
57 then
58 echo "<img src='cg.png'/><br />" >> $output
59 fi
117 60
118 for gene in ${genes[@]} 61 for gene in ${genes[@]}
119 do 62 do
120 echo "<table border='1'><caption>$gene transition table</caption>" >> $output 63 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
121 while IFS=, read from a c g t 64 while IFS=, read from a c g t
122 do 65 do
123 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output 66 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
124 done < $outdir/$gene/transitions.txt 67 done < $outdir/transitions_${gene}.txt
125 echo "</table>" >> $output 68 echo "</table>" >> $output
126 done 69 done
127 70
128 echo "<table border='1'><caption>All transition table</caption>" >> $output 71 echo "<table border='1'><caption>All transition table</caption>" >> $output
129 while IFS=, read from a c g t 72 while IFS=, read from a c g t