Mercurial > repos > davidvanzessen > mutation_analysis
comparison wrapper.sh @ 110:ade5cf6fd2dc draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 02 Aug 2016 08:30:23 -0400 |
parents | 6add3e66f4fa |
children | e7b550d52eb7 |
comparison
equal
deleted
inserted
replaced
109:0096cd454380 | 110:ade5cf6fd2dc |
---|---|
1 #!/bin/bash | 1 #!/bin/bash |
2 set -e | 2 #set -e |
3 dir="$(cd "$(dirname "$0")" && pwd)" | 3 dir="$(cd "$(dirname "$0")" && pwd)" |
4 input=$1 | 4 input=$1 |
5 method=$2 | 5 method=$2 |
6 log=$3 #becomes the main html page at the end | 6 log=$3 #becomes the main html page at the end |
7 outdir=$4 | 7 outdir=$4 |
15 naive_output_cm=${11} | 15 naive_output_cm=${11} |
16 filter_unique=${12} | 16 filter_unique=${12} |
17 class_filter=${13} | 17 class_filter=${13} |
18 mkdir $outdir | 18 mkdir $outdir |
19 | 19 |
20 tar -xzf $dir/style.tar.gz -C $outdir | |
21 | |
20 echo "---------------- read parameters ----------------" | 22 echo "---------------- read parameters ----------------" |
21 echo "---------------- read parameters ----------------<br />" > $log | 23 echo "---------------- read parameters ----------------<br />" > $log |
22 | 24 |
23 echo "unpacking IMGT file" | 25 echo "unpacking IMGT file" |
24 | |
25 | |
26 | 26 |
27 type="`file $input`" | 27 type="`file $input`" |
28 if [[ "$type" == *"Zip archive"* ]] ; then | 28 if [[ "$type" == *"Zip archive"* ]] ; then |
29 echo "Zip archive" | 29 echo "Zip archive" |
30 echo "unzip $input -d $PWD/files/" | 30 echo "unzip $input -d $PWD/files/" |
65 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) | 65 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) |
66 ID_index=$((ID_index+1)) | 66 ID_index=$((ID_index+1)) |
67 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) | 67 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) |
68 sequence_index=$((sequence_index+1)) | 68 sequence_index=$((sequence_index+1)) |
69 | 69 |
70 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta | 70 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp |
71 | |
72 cat $PWD/sequences.tmp | grep -B1 -vE "^$" sequences.fasta #filter out empty sequences | |
73 | |
74 rm $PWD/sequences.tmp | |
71 | 75 |
72 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt | 76 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt |
73 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt | 77 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt |
74 fi | 78 fi |
75 | 79 |
147 | 151 |
148 echo "---------------- aa_histogram.r ----------------" | 152 echo "---------------- aa_histogram.r ----------------" |
149 echo "---------------- aa_histogram.r ----------------<br />" >> $log | 153 echo "---------------- aa_histogram.r ----------------<br />" >> $log |
150 | 154 |
151 Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "ca,cg,cm" $outdir/ 2>&1 | 155 Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "ca,cg,cm" $outdir/ 2>&1 |
152 mv $outdir/aa_histogram_.png $outdir/aa_histogram.png | 156 if [ -e "$outdir/aa_histogram_.png" ]; then |
153 mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt | 157 mv $outdir/aa_histogram_.png $outdir/aa_histogram.png |
158 mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt | |
159 fi | |
154 | 160 |
155 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) | 161 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) |
156 | 162 |
157 funcs=(sum mean median) | 163 funcs=(sum mean median) |
164 funcs=(sum) | |
158 | 165 |
159 echo "---------------- sequence_overview.r ----------------" | 166 echo "---------------- sequence_overview.r ----------------" |
160 echo "---------------- sequence_overview.r ----------------<br />" >> $log | 167 echo "---------------- sequence_overview.r ----------------<br />" >> $log |
161 | 168 |
162 mkdir $outdir/sequence_overview | 169 mkdir $outdir/sequence_overview |
170 do | 177 do |
171 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html | 178 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html |
172 done < $outdir/sequence_overview/ntoverview.txt | 179 done < $outdir/sequence_overview/ntoverview.txt |
173 | 180 |
174 echo "<html><center><h1>$title</h1></center>" > $output | 181 echo "<html><center><h1>$title</h1></center>" > $output |
182 echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output | |
183 echo "<script type='text/javascript' src='tabber.js'></script>" >> $output | |
184 echo "<script type='text/javascript' src='script.js'></script>" >> $output | |
185 echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output | |
175 | 186 |
176 #display the matched/unmatched for clearity | 187 #display the matched/unmatched for clearity |
177 | 188 |
178 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`" | 189 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`" |
179 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`" | 190 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`" |
186 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output | 197 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output |
187 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output | 198 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output |
188 | 199 |
189 echo "---------------- main tables ----------------" | 200 echo "---------------- main tables ----------------" |
190 echo "---------------- main tables ----------------<br />" >> $log | 201 echo "---------------- main tables ----------------<br />" >> $log |
202 | |
203 echo "<div class='tabber'>" >> $output | |
204 echo "<div class='tabbertab' title='SHM Overview'>" >> $output | |
205 | |
191 for func in ${funcs[@]} | 206 for func in ${funcs[@]} |
192 do | 207 do |
193 | 208 |
194 echo "---------------- $func table ----------------" | 209 echo "---------------- $func table ----------------" |
195 echo "---------------- $func table ----------------<br />" >> $log | 210 echo "---------------- $func table ----------------<br />" >> $log |
219 done < $outdir/data_${func}.txt | 234 done < $outdir/data_${func}.txt |
220 echo "</table>" >> $output | 235 echo "</table>" >> $output |
221 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output | 236 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output |
222 done | 237 done |
223 | 238 |
224 echo "---------------- download links ----------------" | 239 echo "</div>" >> $output #SHM overview tab end |
225 echo "---------------- download links ----------------<br />" >> $log | 240 |
226 | 241 echo "---------------- images ----------------" |
242 echo "---------------- images ----------------<br />" >> $log | |
243 | |
244 echo "<div class='tabbertab' title='SHM Frequency'>" >> $output | |
245 | |
246 if [ -a $outdir/scatter.png ] | |
247 then | |
248 echo "<img src='scatter.png'/><br />" >> $output | |
249 echo "<a href='scatter.txt'>download data</a><br />" >> $output | |
250 fi | |
251 if [ -a $outdir/frequency_ranges.png ] | |
252 then | |
253 echo "<img src='frequency_ranges.png'/><br />" >> $output | |
254 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output | |
255 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output | |
256 fi | |
257 | |
258 echo "</div>" >> $output #SHM frequency tab end | |
259 | |
260 echo "<div class='tabbertab' title='Transition tables'>" >> $output | |
261 | |
262 for gene in ${genes[@]} | |
263 do | |
264 echo "<table border='1'><caption>$gene transition table</caption>" >> $output | |
265 while IFS=, read from a c g t | |
266 do | |
267 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | |
268 done < $outdir/transitions_${gene}_sum.txt | |
269 echo "</table>" >> $output | |
270 done | |
271 | |
272 echo "<table border='1'><caption>All transition table</caption>" >> $output | |
273 while IFS=, read from a c g t | |
274 do | |
275 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | |
276 done < $outdir/transitions_all_sum.txt | |
277 echo "</table>" >> $output | |
278 | |
279 echo "</div>" >> $output #transition tables tab end | |
280 | |
281 echo "<div class='tabbertab' title='Antigen Selection'>" >> $output | |
282 | |
283 if [ -a $outdir/aa_histogram.png ] | |
284 then | |
285 echo "<img src='aa_histogram.png'/><br />" >> $output | |
286 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output | |
287 echo "<img src='aa_histogram_ca.png'/><br />" >> $output | |
288 echo "<a href='aa_histogram_ca.txt'>download data</a><br />" >> $output | |
289 echo "<img src='aa_histogram_cg.png'/><br />" >> $output | |
290 echo "<a href='aa_histogram_cg.txt'>download data</a><br />" >> $output | |
291 echo "<img src='aa_histogram_cm.png'/><br />" >> $output | |
292 echo "<a href='aa_histogram_cm.txt'>download data</a><br />" >> $output | |
293 fi | |
294 | |
295 echo "<embed src='baseline_ca.pdf' width='700px' height='1000px'>" >> $output | |
296 echo "<embed src='baseline_cg.pdf' width='700px' height='1000px'>" >> $output | |
297 echo "<embed src='baseline_cm.pdf' width='700px' height='1000px'>" >> $output | |
298 | |
299 echo "</div>" >> $output #antigen selection tab end | |
300 | |
301 echo "<div class='tabbertab' title='CSR'>" >> $output | |
302 | |
303 if [ -a $outdir/ca.png ] | |
304 then | |
305 echo "<img src='ca.png'/><br />" >> $output | |
306 echo "<a href='ca.txt'>download data</a><br />" >> $output | |
307 fi | |
308 if [ -a $outdir/cg.png ] | |
309 then | |
310 echo "<img src='cg.png'/><br />" >> $output | |
311 echo "<a href='cg.txt'>download data</a><br />" >> $output | |
312 fi | |
313 | |
314 echo "</div>" >> $output #CSR tab end | |
315 | |
316 echo "<div class='tabbertab' title='Downloads'>" >> $output | |
227 | 317 |
228 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output | 318 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output |
229 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output | 319 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output |
230 echo "<a href='merged.txt'>all data</a><br />" >> $output | 320 echo "<a href='merged.txt'>all data</a><br />" >> $output |
231 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output | 321 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output |
248 echo "<a href='new_IMGT.txz'>Filtered IMGT zip</a><br />" >> $output | 338 echo "<a href='new_IMGT.txz'>Filtered IMGT zip</a><br />" >> $output |
249 echo "<a href='new_IMGT_ca.txz'>Filtered ca IMGT zip</a><br />" >> $output | 339 echo "<a href='new_IMGT_ca.txz'>Filtered ca IMGT zip</a><br />" >> $output |
250 echo "<a href='new_IMGT_cg.txz'>Filtered cg IMGT zip</a><br />" >> $output | 340 echo "<a href='new_IMGT_cg.txz'>Filtered cg IMGT zip</a><br />" >> $output |
251 echo "<a href='new_IMGT_cm.txz'>Filtered cm IMGT zip</a><br />" >> $output | 341 echo "<a href='new_IMGT_cm.txz'>Filtered cm IMGT zip</a><br />" >> $output |
252 | 342 |
253 | 343 echo "</div>" >> $output #downloads tab end |
254 echo "---------------- images ----------------" | 344 |
255 echo "---------------- images ----------------<br />" >> $log | 345 echo "</div>" >> $output #tabs end |
256 | |
257 echo "<img src='all.png'/><br />" >> $output | |
258 echo "<a href='all.txt'>download data</a><br />" >> $output | |
259 if [ -a $outdir/ca.png ] | |
260 then | |
261 echo "<img src='ca.png'/><br />" >> $output | |
262 echo "<a href='ca.txt'>download data</a><br />" >> $output | |
263 fi | |
264 if [ -a $outdir/cg.png ] | |
265 then | |
266 echo "<img src='cg.png'/><br />" >> $output | |
267 echo "<a href='cg.txt'>download data</a><br />" >> $output | |
268 fi | |
269 if [ -a $outdir/scatter.png ] | |
270 then | |
271 echo "<img src='scatter.png'/><br />" >> $output | |
272 echo "<a href='scatter.txt'>download data</a><br />" >> $output | |
273 fi | |
274 if [ -a $outdir/frequency_ranges.png ] | |
275 then | |
276 echo "<img src='frequency_ranges.png'/><br />" >> $output | |
277 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output | |
278 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output | |
279 fi | |
280 if [ -a $outdir/aa_histogram.png ] | |
281 then | |
282 echo "<img src='aa_histogram.png'/><br />" >> $output | |
283 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output | |
284 echo "<img src='aa_histogram_ca.png'/><br />" >> $output | |
285 echo "<a href='aa_histogram_ca.txt'>download data</a><br />" >> $output | |
286 echo "<img src='aa_histogram_cg.png'/><br />" >> $output | |
287 echo "<a href='aa_histogram_cg.txt'>download data</a><br />" >> $output | |
288 echo "<img src='aa_histogram_cm.png'/><br />" >> $output | |
289 echo "<a href='aa_histogram_cm.txt'>download data</a><br />" >> $output | |
290 fi | |
291 | |
292 for gene in ${genes[@]} | |
293 do | |
294 echo "<table border='1'><caption>$gene transition table</caption>" >> $output | |
295 while IFS=, read from a c g t | |
296 do | |
297 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | |
298 done < $outdir/transitions_${gene}_sum.txt | |
299 echo "</table>" >> $output | |
300 done | |
301 | |
302 echo "<table border='1'><caption>All transition table</caption>" >> $output | |
303 while IFS=, read from a c g t | |
304 do | |
305 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | |
306 done < $outdir/transitions_all_sum.txt | |
307 echo "</table>" >> $output | |
308 | 346 |
309 echo "</html>" >> $output | 347 echo "</html>" >> $output |
310 | 348 |
311 echo "---------------- baseline ----------------" | 349 echo "---------------- baseline ----------------" |
312 echo "---------------- baseline ----------------<br />" >> $log | 350 echo "---------------- baseline ----------------<br />" >> $log |
366 | 404 |
367 echo "</table>" >> $outdir/base_overview.html | 405 echo "</table>" >> $outdir/base_overview.html |
368 | 406 |
369 mv $log $outdir/log.html | 407 mv $log $outdir/log.html |
370 | 408 |
371 cp $outdir/index.html $log | 409 echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log |
410 echo "<table border = 1>" >> $log | |
411 echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log | |
412 tIFS="$TMP" | |
413 IFS=$'\t' | |
414 while read step seq perc | |
415 do | |
416 echo "<tr>" >> $log | |
417 echo "<td>$step</td>" >> $log | |
418 echo "<td>$seq</td>" >> $log | |
419 echo "<td>${perc}%</td>" >> $log | |
420 echo "</tr>" >> $log | |
421 done < $outdir/filtering_steps.txt | |
422 echo "</table border></center></html>" >> $log | |
423 | |
424 IFS="$tIFS" | |
425 | |
372 | 426 |
373 echo "---------------- Done! ----------------" | 427 echo "---------------- Done! ----------------" |
374 echo "---------------- Done! ----------------<br />" >> $outdir/log.html | 428 echo "---------------- Done! ----------------<br />" >> $outdir/log.html |
375 | 429 |
430 | |
431 | |
432 | |
433 | |
434 | |
435 | |
436 | |
437 | |
438 | |
439 | |
440 | |
441 | |
442 | |
443 | |
444 | |
445 | |
446 | |
447 | |
448 | |
449 |