comparison wrapper.sh @ 110:ade5cf6fd2dc draft

Uploaded
author davidvanzessen
date Tue, 02 Aug 2016 08:30:23 -0400
parents 6add3e66f4fa
children e7b550d52eb7
comparison
equal deleted inserted replaced
109:0096cd454380 110:ade5cf6fd2dc
1 #!/bin/bash 1 #!/bin/bash
2 set -e 2 #set -e
3 dir="$(cd "$(dirname "$0")" && pwd)" 3 dir="$(cd "$(dirname "$0")" && pwd)"
4 input=$1 4 input=$1
5 method=$2 5 method=$2
6 log=$3 #becomes the main html page at the end 6 log=$3 #becomes the main html page at the end
7 outdir=$4 7 outdir=$4
15 naive_output_cm=${11} 15 naive_output_cm=${11}
16 filter_unique=${12} 16 filter_unique=${12}
17 class_filter=${13} 17 class_filter=${13}
18 mkdir $outdir 18 mkdir $outdir
19 19
20 tar -xzf $dir/style.tar.gz -C $outdir
21
20 echo "---------------- read parameters ----------------" 22 echo "---------------- read parameters ----------------"
21 echo "---------------- read parameters ----------------<br />" > $log 23 echo "---------------- read parameters ----------------<br />" > $log
22 24
23 echo "unpacking IMGT file" 25 echo "unpacking IMGT file"
24
25
26 26
27 type="`file $input`" 27 type="`file $input`"
28 if [[ "$type" == *"Zip archive"* ]] ; then 28 if [[ "$type" == *"Zip archive"* ]] ; then
29 echo "Zip archive" 29 echo "Zip archive"
30 echo "unzip $input -d $PWD/files/" 30 echo "unzip $input -d $PWD/files/"
65 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) 65 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
66 ID_index=$((ID_index+1)) 66 ID_index=$((ID_index+1))
67 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) 67 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
68 sequence_index=$((sequence_index+1)) 68 sequence_index=$((sequence_index+1))
69 69
70 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta 70 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp
71
72 cat $PWD/sequences.tmp | grep -B1 -vE "^$" sequences.fasta #filter out empty sequences
73
74 rm $PWD/sequences.tmp
71 75
72 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt 76 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
73 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt 77 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
74 fi 78 fi
75 79
147 151
148 echo "---------------- aa_histogram.r ----------------" 152 echo "---------------- aa_histogram.r ----------------"
149 echo "---------------- aa_histogram.r ----------------<br />" >> $log 153 echo "---------------- aa_histogram.r ----------------<br />" >> $log
150 154
151 Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "ca,cg,cm" $outdir/ 2>&1 155 Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "ca,cg,cm" $outdir/ 2>&1
152 mv $outdir/aa_histogram_.png $outdir/aa_histogram.png 156 if [ -e "$outdir/aa_histogram_.png" ]; then
153 mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt 157 mv $outdir/aa_histogram_.png $outdir/aa_histogram.png
158 mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt
159 fi
154 160
155 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) 161 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
156 162
157 funcs=(sum mean median) 163 funcs=(sum mean median)
164 funcs=(sum)
158 165
159 echo "---------------- sequence_overview.r ----------------" 166 echo "---------------- sequence_overview.r ----------------"
160 echo "---------------- sequence_overview.r ----------------<br />" >> $log 167 echo "---------------- sequence_overview.r ----------------<br />" >> $log
161 168
162 mkdir $outdir/sequence_overview 169 mkdir $outdir/sequence_overview
170 do 177 do
171 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html 178 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
172 done < $outdir/sequence_overview/ntoverview.txt 179 done < $outdir/sequence_overview/ntoverview.txt
173 180
174 echo "<html><center><h1>$title</h1></center>" > $output 181 echo "<html><center><h1>$title</h1></center>" > $output
182 echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output
183 echo "<script type='text/javascript' src='tabber.js'></script>" >> $output
184 echo "<script type='text/javascript' src='script.js'></script>" >> $output
185 echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output
175 186
176 #display the matched/unmatched for clearity 187 #display the matched/unmatched for clearity
177 188
178 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`" 189 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`"
179 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`" 190 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
186 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output 197 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
187 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output 198 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
188 199
189 echo "---------------- main tables ----------------" 200 echo "---------------- main tables ----------------"
190 echo "---------------- main tables ----------------<br />" >> $log 201 echo "---------------- main tables ----------------<br />" >> $log
202
203 echo "<div class='tabber'>" >> $output
204 echo "<div class='tabbertab' title='SHM Overview'>" >> $output
205
191 for func in ${funcs[@]} 206 for func in ${funcs[@]}
192 do 207 do
193 208
194 echo "---------------- $func table ----------------" 209 echo "---------------- $func table ----------------"
195 echo "---------------- $func table ----------------<br />" >> $log 210 echo "---------------- $func table ----------------<br />" >> $log
219 done < $outdir/data_${func}.txt 234 done < $outdir/data_${func}.txt
220 echo "</table>" >> $output 235 echo "</table>" >> $output
221 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output 236 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output
222 done 237 done
223 238
224 echo "---------------- download links ----------------" 239 echo "</div>" >> $output #SHM overview tab end
225 echo "---------------- download links ----------------<br />" >> $log 240
226 241 echo "---------------- images ----------------"
242 echo "---------------- images ----------------<br />" >> $log
243
244 echo "<div class='tabbertab' title='SHM Frequency'>" >> $output
245
246 if [ -a $outdir/scatter.png ]
247 then
248 echo "<img src='scatter.png'/><br />" >> $output
249 echo "<a href='scatter.txt'>download data</a><br />" >> $output
250 fi
251 if [ -a $outdir/frequency_ranges.png ]
252 then
253 echo "<img src='frequency_ranges.png'/><br />" >> $output
254 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
255 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
256 fi
257
258 echo "</div>" >> $output #SHM frequency tab end
259
260 echo "<div class='tabbertab' title='Transition tables'>" >> $output
261
262 for gene in ${genes[@]}
263 do
264 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
265 while IFS=, read from a c g t
266 do
267 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
268 done < $outdir/transitions_${gene}_sum.txt
269 echo "</table>" >> $output
270 done
271
272 echo "<table border='1'><caption>All transition table</caption>" >> $output
273 while IFS=, read from a c g t
274 do
275 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
276 done < $outdir/transitions_all_sum.txt
277 echo "</table>" >> $output
278
279 echo "</div>" >> $output #transition tables tab end
280
281 echo "<div class='tabbertab' title='Antigen Selection'>" >> $output
282
283 if [ -a $outdir/aa_histogram.png ]
284 then
285 echo "<img src='aa_histogram.png'/><br />" >> $output
286 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
287 echo "<img src='aa_histogram_ca.png'/><br />" >> $output
288 echo "<a href='aa_histogram_ca.txt'>download data</a><br />" >> $output
289 echo "<img src='aa_histogram_cg.png'/><br />" >> $output
290 echo "<a href='aa_histogram_cg.txt'>download data</a><br />" >> $output
291 echo "<img src='aa_histogram_cm.png'/><br />" >> $output
292 echo "<a href='aa_histogram_cm.txt'>download data</a><br />" >> $output
293 fi
294
295 echo "<embed src='baseline_ca.pdf' width='700px' height='1000px'>" >> $output
296 echo "<embed src='baseline_cg.pdf' width='700px' height='1000px'>" >> $output
297 echo "<embed src='baseline_cm.pdf' width='700px' height='1000px'>" >> $output
298
299 echo "</div>" >> $output #antigen selection tab end
300
301 echo "<div class='tabbertab' title='CSR'>" >> $output
302
303 if [ -a $outdir/ca.png ]
304 then
305 echo "<img src='ca.png'/><br />" >> $output
306 echo "<a href='ca.txt'>download data</a><br />" >> $output
307 fi
308 if [ -a $outdir/cg.png ]
309 then
310 echo "<img src='cg.png'/><br />" >> $output
311 echo "<a href='cg.txt'>download data</a><br />" >> $output
312 fi
313
314 echo "</div>" >> $output #CSR tab end
315
316 echo "<div class='tabbertab' title='Downloads'>" >> $output
227 317
228 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output 318 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
229 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output 319 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
230 echo "<a href='merged.txt'>all data</a><br />" >> $output 320 echo "<a href='merged.txt'>all data</a><br />" >> $output
231 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output 321 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output
248 echo "<a href='new_IMGT.txz'>Filtered IMGT zip</a><br />" >> $output 338 echo "<a href='new_IMGT.txz'>Filtered IMGT zip</a><br />" >> $output
249 echo "<a href='new_IMGT_ca.txz'>Filtered ca IMGT zip</a><br />" >> $output 339 echo "<a href='new_IMGT_ca.txz'>Filtered ca IMGT zip</a><br />" >> $output
250 echo "<a href='new_IMGT_cg.txz'>Filtered cg IMGT zip</a><br />" >> $output 340 echo "<a href='new_IMGT_cg.txz'>Filtered cg IMGT zip</a><br />" >> $output
251 echo "<a href='new_IMGT_cm.txz'>Filtered cm IMGT zip</a><br />" >> $output 341 echo "<a href='new_IMGT_cm.txz'>Filtered cm IMGT zip</a><br />" >> $output
252 342
253 343 echo "</div>" >> $output #downloads tab end
254 echo "---------------- images ----------------" 344
255 echo "---------------- images ----------------<br />" >> $log 345 echo "</div>" >> $output #tabs end
256
257 echo "<img src='all.png'/><br />" >> $output
258 echo "<a href='all.txt'>download data</a><br />" >> $output
259 if [ -a $outdir/ca.png ]
260 then
261 echo "<img src='ca.png'/><br />" >> $output
262 echo "<a href='ca.txt'>download data</a><br />" >> $output
263 fi
264 if [ -a $outdir/cg.png ]
265 then
266 echo "<img src='cg.png'/><br />" >> $output
267 echo "<a href='cg.txt'>download data</a><br />" >> $output
268 fi
269 if [ -a $outdir/scatter.png ]
270 then
271 echo "<img src='scatter.png'/><br />" >> $output
272 echo "<a href='scatter.txt'>download data</a><br />" >> $output
273 fi
274 if [ -a $outdir/frequency_ranges.png ]
275 then
276 echo "<img src='frequency_ranges.png'/><br />" >> $output
277 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
278 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
279 fi
280 if [ -a $outdir/aa_histogram.png ]
281 then
282 echo "<img src='aa_histogram.png'/><br />" >> $output
283 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
284 echo "<img src='aa_histogram_ca.png'/><br />" >> $output
285 echo "<a href='aa_histogram_ca.txt'>download data</a><br />" >> $output
286 echo "<img src='aa_histogram_cg.png'/><br />" >> $output
287 echo "<a href='aa_histogram_cg.txt'>download data</a><br />" >> $output
288 echo "<img src='aa_histogram_cm.png'/><br />" >> $output
289 echo "<a href='aa_histogram_cm.txt'>download data</a><br />" >> $output
290 fi
291
292 for gene in ${genes[@]}
293 do
294 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
295 while IFS=, read from a c g t
296 do
297 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
298 done < $outdir/transitions_${gene}_sum.txt
299 echo "</table>" >> $output
300 done
301
302 echo "<table border='1'><caption>All transition table</caption>" >> $output
303 while IFS=, read from a c g t
304 do
305 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
306 done < $outdir/transitions_all_sum.txt
307 echo "</table>" >> $output
308 346
309 echo "</html>" >> $output 347 echo "</html>" >> $output
310 348
311 echo "---------------- baseline ----------------" 349 echo "---------------- baseline ----------------"
312 echo "---------------- baseline ----------------<br />" >> $log 350 echo "---------------- baseline ----------------<br />" >> $log
366 404
367 echo "</table>" >> $outdir/base_overview.html 405 echo "</table>" >> $outdir/base_overview.html
368 406
369 mv $log $outdir/log.html 407 mv $log $outdir/log.html
370 408
371 cp $outdir/index.html $log 409 echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log
410 echo "<table border = 1>" >> $log
411 echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log
412 tIFS="$TMP"
413 IFS=$'\t'
414 while read step seq perc
415 do
416 echo "<tr>" >> $log
417 echo "<td>$step</td>" >> $log
418 echo "<td>$seq</td>" >> $log
419 echo "<td>${perc}%</td>" >> $log
420 echo "</tr>" >> $log
421 done < $outdir/filtering_steps.txt
422 echo "</table border></center></html>" >> $log
423
424 IFS="$tIFS"
425
372 426
373 echo "---------------- Done! ----------------" 427 echo "---------------- Done! ----------------"
374 echo "---------------- Done! ----------------<br />" >> $outdir/log.html 428 echo "---------------- Done! ----------------<br />" >> $outdir/log.html
375 429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449