Mercurial > repos > davidvanzessen > mutation_analysis
comparison wrapper.sh @ 110:ade5cf6fd2dc draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Tue, 02 Aug 2016 08:30:23 -0400 |
| parents | 6add3e66f4fa |
| children | e7b550d52eb7 |
comparison
equal
deleted
inserted
replaced
| 109:0096cd454380 | 110:ade5cf6fd2dc |
|---|---|
| 1 #!/bin/bash | 1 #!/bin/bash |
| 2 set -e | 2 #set -e |
| 3 dir="$(cd "$(dirname "$0")" && pwd)" | 3 dir="$(cd "$(dirname "$0")" && pwd)" |
| 4 input=$1 | 4 input=$1 |
| 5 method=$2 | 5 method=$2 |
| 6 log=$3 #becomes the main html page at the end | 6 log=$3 #becomes the main html page at the end |
| 7 outdir=$4 | 7 outdir=$4 |
| 15 naive_output_cm=${11} | 15 naive_output_cm=${11} |
| 16 filter_unique=${12} | 16 filter_unique=${12} |
| 17 class_filter=${13} | 17 class_filter=${13} |
| 18 mkdir $outdir | 18 mkdir $outdir |
| 19 | 19 |
| 20 tar -xzf $dir/style.tar.gz -C $outdir | |
| 21 | |
| 20 echo "---------------- read parameters ----------------" | 22 echo "---------------- read parameters ----------------" |
| 21 echo "---------------- read parameters ----------------<br />" > $log | 23 echo "---------------- read parameters ----------------<br />" > $log |
| 22 | 24 |
| 23 echo "unpacking IMGT file" | 25 echo "unpacking IMGT file" |
| 24 | |
| 25 | |
| 26 | 26 |
| 27 type="`file $input`" | 27 type="`file $input`" |
| 28 if [[ "$type" == *"Zip archive"* ]] ; then | 28 if [[ "$type" == *"Zip archive"* ]] ; then |
| 29 echo "Zip archive" | 29 echo "Zip archive" |
| 30 echo "unzip $input -d $PWD/files/" | 30 echo "unzip $input -d $PWD/files/" |
| 65 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) | 65 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) |
| 66 ID_index=$((ID_index+1)) | 66 ID_index=$((ID_index+1)) |
| 67 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) | 67 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) |
| 68 sequence_index=$((sequence_index+1)) | 68 sequence_index=$((sequence_index+1)) |
| 69 | 69 |
| 70 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta | 70 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp |
| 71 | |
| 72 cat $PWD/sequences.tmp | grep -B1 -vE "^$" sequences.fasta #filter out empty sequences | |
| 73 | |
| 74 rm $PWD/sequences.tmp | |
| 71 | 75 |
| 72 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt | 76 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt |
| 73 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt | 77 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt |
| 74 fi | 78 fi |
| 75 | 79 |
| 147 | 151 |
| 148 echo "---------------- aa_histogram.r ----------------" | 152 echo "---------------- aa_histogram.r ----------------" |
| 149 echo "---------------- aa_histogram.r ----------------<br />" >> $log | 153 echo "---------------- aa_histogram.r ----------------<br />" >> $log |
| 150 | 154 |
| 151 Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "ca,cg,cm" $outdir/ 2>&1 | 155 Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "ca,cg,cm" $outdir/ 2>&1 |
| 152 mv $outdir/aa_histogram_.png $outdir/aa_histogram.png | 156 if [ -e "$outdir/aa_histogram_.png" ]; then |
| 153 mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt | 157 mv $outdir/aa_histogram_.png $outdir/aa_histogram.png |
| 158 mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt | |
| 159 fi | |
| 154 | 160 |
| 155 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) | 161 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) |
| 156 | 162 |
| 157 funcs=(sum mean median) | 163 funcs=(sum mean median) |
| 164 funcs=(sum) | |
| 158 | 165 |
| 159 echo "---------------- sequence_overview.r ----------------" | 166 echo "---------------- sequence_overview.r ----------------" |
| 160 echo "---------------- sequence_overview.r ----------------<br />" >> $log | 167 echo "---------------- sequence_overview.r ----------------<br />" >> $log |
| 161 | 168 |
| 162 mkdir $outdir/sequence_overview | 169 mkdir $outdir/sequence_overview |
| 170 do | 177 do |
| 171 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html | 178 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html |
| 172 done < $outdir/sequence_overview/ntoverview.txt | 179 done < $outdir/sequence_overview/ntoverview.txt |
| 173 | 180 |
| 174 echo "<html><center><h1>$title</h1></center>" > $output | 181 echo "<html><center><h1>$title</h1></center>" > $output |
| 182 echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output | |
| 183 echo "<script type='text/javascript' src='tabber.js'></script>" >> $output | |
| 184 echo "<script type='text/javascript' src='script.js'></script>" >> $output | |
| 185 echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output | |
| 175 | 186 |
| 176 #display the matched/unmatched for clearity | 187 #display the matched/unmatched for clearity |
| 177 | 188 |
| 178 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`" | 189 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`" |
| 179 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`" | 190 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`" |
| 186 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output | 197 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output |
| 187 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output | 198 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output |
| 188 | 199 |
| 189 echo "---------------- main tables ----------------" | 200 echo "---------------- main tables ----------------" |
| 190 echo "---------------- main tables ----------------<br />" >> $log | 201 echo "---------------- main tables ----------------<br />" >> $log |
| 202 | |
| 203 echo "<div class='tabber'>" >> $output | |
| 204 echo "<div class='tabbertab' title='SHM Overview'>" >> $output | |
| 205 | |
| 191 for func in ${funcs[@]} | 206 for func in ${funcs[@]} |
| 192 do | 207 do |
| 193 | 208 |
| 194 echo "---------------- $func table ----------------" | 209 echo "---------------- $func table ----------------" |
| 195 echo "---------------- $func table ----------------<br />" >> $log | 210 echo "---------------- $func table ----------------<br />" >> $log |
| 219 done < $outdir/data_${func}.txt | 234 done < $outdir/data_${func}.txt |
| 220 echo "</table>" >> $output | 235 echo "</table>" >> $output |
| 221 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output | 236 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output |
| 222 done | 237 done |
| 223 | 238 |
| 224 echo "---------------- download links ----------------" | 239 echo "</div>" >> $output #SHM overview tab end |
| 225 echo "---------------- download links ----------------<br />" >> $log | 240 |
| 226 | 241 echo "---------------- images ----------------" |
| 242 echo "---------------- images ----------------<br />" >> $log | |
| 243 | |
| 244 echo "<div class='tabbertab' title='SHM Frequency'>" >> $output | |
| 245 | |
| 246 if [ -a $outdir/scatter.png ] | |
| 247 then | |
| 248 echo "<img src='scatter.png'/><br />" >> $output | |
| 249 echo "<a href='scatter.txt'>download data</a><br />" >> $output | |
| 250 fi | |
| 251 if [ -a $outdir/frequency_ranges.png ] | |
| 252 then | |
| 253 echo "<img src='frequency_ranges.png'/><br />" >> $output | |
| 254 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output | |
| 255 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output | |
| 256 fi | |
| 257 | |
| 258 echo "</div>" >> $output #SHM frequency tab end | |
| 259 | |
| 260 echo "<div class='tabbertab' title='Transition tables'>" >> $output | |
| 261 | |
| 262 for gene in ${genes[@]} | |
| 263 do | |
| 264 echo "<table border='1'><caption>$gene transition table</caption>" >> $output | |
| 265 while IFS=, read from a c g t | |
| 266 do | |
| 267 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | |
| 268 done < $outdir/transitions_${gene}_sum.txt | |
| 269 echo "</table>" >> $output | |
| 270 done | |
| 271 | |
| 272 echo "<table border='1'><caption>All transition table</caption>" >> $output | |
| 273 while IFS=, read from a c g t | |
| 274 do | |
| 275 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | |
| 276 done < $outdir/transitions_all_sum.txt | |
| 277 echo "</table>" >> $output | |
| 278 | |
| 279 echo "</div>" >> $output #transition tables tab end | |
| 280 | |
| 281 echo "<div class='tabbertab' title='Antigen Selection'>" >> $output | |
| 282 | |
| 283 if [ -a $outdir/aa_histogram.png ] | |
| 284 then | |
| 285 echo "<img src='aa_histogram.png'/><br />" >> $output | |
| 286 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output | |
| 287 echo "<img src='aa_histogram_ca.png'/><br />" >> $output | |
| 288 echo "<a href='aa_histogram_ca.txt'>download data</a><br />" >> $output | |
| 289 echo "<img src='aa_histogram_cg.png'/><br />" >> $output | |
| 290 echo "<a href='aa_histogram_cg.txt'>download data</a><br />" >> $output | |
| 291 echo "<img src='aa_histogram_cm.png'/><br />" >> $output | |
| 292 echo "<a href='aa_histogram_cm.txt'>download data</a><br />" >> $output | |
| 293 fi | |
| 294 | |
| 295 echo "<embed src='baseline_ca.pdf' width='700px' height='1000px'>" >> $output | |
| 296 echo "<embed src='baseline_cg.pdf' width='700px' height='1000px'>" >> $output | |
| 297 echo "<embed src='baseline_cm.pdf' width='700px' height='1000px'>" >> $output | |
| 298 | |
| 299 echo "</div>" >> $output #antigen selection tab end | |
| 300 | |
| 301 echo "<div class='tabbertab' title='CSR'>" >> $output | |
| 302 | |
| 303 if [ -a $outdir/ca.png ] | |
| 304 then | |
| 305 echo "<img src='ca.png'/><br />" >> $output | |
| 306 echo "<a href='ca.txt'>download data</a><br />" >> $output | |
| 307 fi | |
| 308 if [ -a $outdir/cg.png ] | |
| 309 then | |
| 310 echo "<img src='cg.png'/><br />" >> $output | |
| 311 echo "<a href='cg.txt'>download data</a><br />" >> $output | |
| 312 fi | |
| 313 | |
| 314 echo "</div>" >> $output #CSR tab end | |
| 315 | |
| 316 echo "<div class='tabbertab' title='Downloads'>" >> $output | |
| 227 | 317 |
| 228 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output | 318 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output |
| 229 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output | 319 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output |
| 230 echo "<a href='merged.txt'>all data</a><br />" >> $output | 320 echo "<a href='merged.txt'>all data</a><br />" >> $output |
| 231 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output | 321 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output |
| 248 echo "<a href='new_IMGT.txz'>Filtered IMGT zip</a><br />" >> $output | 338 echo "<a href='new_IMGT.txz'>Filtered IMGT zip</a><br />" >> $output |
| 249 echo "<a href='new_IMGT_ca.txz'>Filtered ca IMGT zip</a><br />" >> $output | 339 echo "<a href='new_IMGT_ca.txz'>Filtered ca IMGT zip</a><br />" >> $output |
| 250 echo "<a href='new_IMGT_cg.txz'>Filtered cg IMGT zip</a><br />" >> $output | 340 echo "<a href='new_IMGT_cg.txz'>Filtered cg IMGT zip</a><br />" >> $output |
| 251 echo "<a href='new_IMGT_cm.txz'>Filtered cm IMGT zip</a><br />" >> $output | 341 echo "<a href='new_IMGT_cm.txz'>Filtered cm IMGT zip</a><br />" >> $output |
| 252 | 342 |
| 253 | 343 echo "</div>" >> $output #downloads tab end |
| 254 echo "---------------- images ----------------" | 344 |
| 255 echo "---------------- images ----------------<br />" >> $log | 345 echo "</div>" >> $output #tabs end |
| 256 | |
| 257 echo "<img src='all.png'/><br />" >> $output | |
| 258 echo "<a href='all.txt'>download data</a><br />" >> $output | |
| 259 if [ -a $outdir/ca.png ] | |
| 260 then | |
| 261 echo "<img src='ca.png'/><br />" >> $output | |
| 262 echo "<a href='ca.txt'>download data</a><br />" >> $output | |
| 263 fi | |
| 264 if [ -a $outdir/cg.png ] | |
| 265 then | |
| 266 echo "<img src='cg.png'/><br />" >> $output | |
| 267 echo "<a href='cg.txt'>download data</a><br />" >> $output | |
| 268 fi | |
| 269 if [ -a $outdir/scatter.png ] | |
| 270 then | |
| 271 echo "<img src='scatter.png'/><br />" >> $output | |
| 272 echo "<a href='scatter.txt'>download data</a><br />" >> $output | |
| 273 fi | |
| 274 if [ -a $outdir/frequency_ranges.png ] | |
| 275 then | |
| 276 echo "<img src='frequency_ranges.png'/><br />" >> $output | |
| 277 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output | |
| 278 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output | |
| 279 fi | |
| 280 if [ -a $outdir/aa_histogram.png ] | |
| 281 then | |
| 282 echo "<img src='aa_histogram.png'/><br />" >> $output | |
| 283 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output | |
| 284 echo "<img src='aa_histogram_ca.png'/><br />" >> $output | |
| 285 echo "<a href='aa_histogram_ca.txt'>download data</a><br />" >> $output | |
| 286 echo "<img src='aa_histogram_cg.png'/><br />" >> $output | |
| 287 echo "<a href='aa_histogram_cg.txt'>download data</a><br />" >> $output | |
| 288 echo "<img src='aa_histogram_cm.png'/><br />" >> $output | |
| 289 echo "<a href='aa_histogram_cm.txt'>download data</a><br />" >> $output | |
| 290 fi | |
| 291 | |
| 292 for gene in ${genes[@]} | |
| 293 do | |
| 294 echo "<table border='1'><caption>$gene transition table</caption>" >> $output | |
| 295 while IFS=, read from a c g t | |
| 296 do | |
| 297 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | |
| 298 done < $outdir/transitions_${gene}_sum.txt | |
| 299 echo "</table>" >> $output | |
| 300 done | |
| 301 | |
| 302 echo "<table border='1'><caption>All transition table</caption>" >> $output | |
| 303 while IFS=, read from a c g t | |
| 304 do | |
| 305 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | |
| 306 done < $outdir/transitions_all_sum.txt | |
| 307 echo "</table>" >> $output | |
| 308 | 346 |
| 309 echo "</html>" >> $output | 347 echo "</html>" >> $output |
| 310 | 348 |
| 311 echo "---------------- baseline ----------------" | 349 echo "---------------- baseline ----------------" |
| 312 echo "---------------- baseline ----------------<br />" >> $log | 350 echo "---------------- baseline ----------------<br />" >> $log |
| 366 | 404 |
| 367 echo "</table>" >> $outdir/base_overview.html | 405 echo "</table>" >> $outdir/base_overview.html |
| 368 | 406 |
| 369 mv $log $outdir/log.html | 407 mv $log $outdir/log.html |
| 370 | 408 |
| 371 cp $outdir/index.html $log | 409 echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log |
| 410 echo "<table border = 1>" >> $log | |
| 411 echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log | |
| 412 tIFS="$TMP" | |
| 413 IFS=$'\t' | |
| 414 while read step seq perc | |
| 415 do | |
| 416 echo "<tr>" >> $log | |
| 417 echo "<td>$step</td>" >> $log | |
| 418 echo "<td>$seq</td>" >> $log | |
| 419 echo "<td>${perc}%</td>" >> $log | |
| 420 echo "</tr>" >> $log | |
| 421 done < $outdir/filtering_steps.txt | |
| 422 echo "</table border></center></html>" >> $log | |
| 423 | |
| 424 IFS="$tIFS" | |
| 425 | |
| 372 | 426 |
| 373 echo "---------------- Done! ----------------" | 427 echo "---------------- Done! ----------------" |
| 374 echo "---------------- Done! ----------------<br />" >> $outdir/log.html | 428 echo "---------------- Done! ----------------<br />" >> $outdir/log.html |
| 375 | 429 |
| 430 | |
| 431 | |
| 432 | |
| 433 | |
| 434 | |
| 435 | |
| 436 | |
| 437 | |
| 438 | |
| 439 | |
| 440 | |
| 441 | |
| 442 | |
| 443 | |
| 444 | |
| 445 | |
| 446 | |
| 447 | |
| 448 | |
| 449 |
