comparison wrapper.sh @ 57:16c7fc1c4bf8 draft

Uploaded
author davidvanzessen
date Fri, 18 Mar 2016 07:50:34 -0400
parents
children
comparison
equal deleted inserted replaced
56:2eb94c08e550 57:16c7fc1c4bf8
1 #!/bin/bash
2 set -e
3 dir="$(cd "$(dirname "$0")" && pwd)"
4 input=$1
5 method=$2
6 output=$3
7 outdir=$4
8 title=$5
9 include_fr1=$6
10 functionality=$7
11 unique=$8
12 naive_output=$9
13 filter_unique=${10}
14 mkdir $outdir
15
16 echo "---------------- read parameters ----------------"
17
18 echo "unpacking IMGT file"
19
20 type="`file $input`"
21 if [[ "$type" == *"Zip archive"* ]] ; then
22 echo "Zip archive"
23 echo "unzip $input -d $PWD/files/"
24 unzip $input -d $PWD/files/
25 elif [[ "$type" == *"XZ compressed data"* ]] ; then
26 echo "ZX archive"
27 echo "tar -xJf $input -C $PWD/files/"
28 mkdir -p $PWD/files/$title
29 tar -xJf $input -C $PWD/files/$title
30 fi
31
32 cat $PWD/files/*/1_* > $PWD/summary.txt
33 cat $PWD/files/*/3_* > $PWD/sequences.txt
34 cat $PWD/files/*/5_* > $PWD/aa.txt
35 cat $PWD/files/*/6_* > $PWD/junction.txt
36 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
37 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
38 cat $PWD/files/*/10_* > $PWD/hotspots.txt
39
40 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
41
42 echo "${BLASTN_DIR}"
43
44 echo "identification ($method)"
45
46 echo "blast or custom"
47
48 if [[ "${method}" == "custom" ]] ; then
49 echo "custom"
50 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
51 else
52 echo "blast"
53 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
54 ID_index=$((ID_index+1))
55 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
56 sequence_index=$((sequence_index+1))
57
58 echo "$ID_index ${sequence_index}"
59
60 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
61
62 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
63 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
64 fi
65
66 echo "---------------- merge_and_filter.r ----------------"
67
68 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique}
69
70 echo "---------------- mutation_analysis.r ----------------"
71
72 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
73 echo "R mutation analysis"
74 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
75
76 #echo "." > $output
77 #exit 0
78
79
80
81 echo "---------------- mutation_analysis.py ----------------"
82
83 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
84 echo "R AA histogram"
85
86 echo "---------------- aa_histogram.r ----------------"
87
88 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
89
90 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
91
92 funcs=(sum mean median)
93
94
95 echo "<html><center><h1>$title</h1></center>" >> $output
96 echo "---------------- main tables ----------------"
97 for func in ${funcs[@]}
98 do
99
100 echo "---------------- $func table ----------------"
101
102 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/result.txt
103
104 echo "<table border='1' width='100%'><caption><h3>${func} table</h3></caption>" >> $output
105 echo "<tr><th>info</th>" >> $output
106 for gene in ${genes[@]}
107 do
108 tmp=`cat $outdir/${gene}_${func}_n.txt`
109 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
110 done
111 tmp=`cat $outdir/all_${func}_n.txt`
112 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
113
114 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
115 do
116 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
117 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
118 else
119 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
120 fi
121 done < $outdir/result.txt
122
123 done
124
125 echo "---------------- download links ----------------"
126
127 echo "</table>" >> $output
128 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
129 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
130 echo "<a href='merged.txt'>all data</a><br />" >> $output
131 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output
132 echo "<a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output
133 echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output
134
135 echo "---------------- images ----------------"
136
137 echo "<img src='all.png'/><br />" >> $output
138 echo "<a href='all.txt'>download data</a><br />" >> $output
139 if [ -a $outdir/ca.png ]
140 then
141 echo "<img src='ca.png'/><br />" >> $output
142 echo "<a href='ca.txt'>download data</a><br />" >> $output
143 fi
144 if [ -a $outdir/cg.png ]
145 then
146 echo "<img src='cg.png'/><br />" >> $output
147 echo "<a href='cg.txt'>download data</a><br />" >> $output
148 fi
149 if [ -a $outdir/scatter.png ]
150 then
151 echo "<img src='scatter.png'/><br />" >> $output
152 echo "<a href='scatter.txt'>download data</a><br />" >> $output
153 fi
154 if [ -a $outdir/frequency_ranges.png ]
155 then
156 echo "<img src='frequency_ranges.png'/><br />" >> $output
157 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
158 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
159 fi
160 if [ -a $outdir/aa_histogram.png ]
161 then
162 echo "<img src='aa_histogram.png'/><br />" >> $output
163 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
164 fi
165
166 for gene in ${genes[@]}
167 do
168 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
169 while IFS=, read from a c g t
170 do
171 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
172 done < $outdir/transitions_${gene}_sum.txt
173 echo "</table>" >> $output
174 done
175
176 echo "<table border='1'><caption>All transition table</caption>" >> $output
177 while IFS=, read from a c g t
178 do
179 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
180 done < $outdir/transitions_all_sum.txt
181 echo "</table>" >> $output
182
183 echo "</html>" >> $output
184
185
186 #optional output for naive
187
188 echo "---------------- aa_histogram.r ----------------"
189
190 if [[ "$naive_output" != "None" ]]
191 then
192 echo "---------------- imgt_loader.r ----------------"
193 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
194 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt ${naive_output} 2>&1
195 echo "---------------- naive_output.r ----------------"
196 Rscript $dir/naive_output.r $naive_output $outdir/merged.txt $naive_output 2>&1
197 fi
198
199
200
201
202
203
204
205
206
207
208
209 #rm $outdir/HS12RSS.txt
210 #rm $outdir/HS23RSS.txt