Mercurial > repos > davidvanzessen > mutation_analysis
comparison wrapper.sh @ 102:e6bc976760d4 draft
Uploaded
| author | davidvanzessen |
|---|---|
| date | Tue, 21 Jun 2016 03:32:50 -0400 |
| parents | 3cffb8a38bb1 |
| children | 603a10976e9c |
comparison
equal
deleted
inserted
replaced
| 101:3cffb8a38bb1 | 102:e6bc976760d4 |
|---|---|
| 1 #!/bin/bash | 1 #!/bin/bash |
| 2 set -e | 2 set -e |
| 3 dir="$(cd "$(dirname "$0")" && pwd)" | 3 dir="$(cd "$(dirname "$0")" && pwd)" |
| 4 input=$1 | 4 input=$1 |
| 5 method=$2 | 5 method=$2 |
| 6 output=$3 | 6 log=$3 #becomes the main html page at the end |
| 7 outdir=$4 | 7 outdir=$4 |
| 8 output="$outdir/index.html" #copied to $log location at the end | |
| 8 title=$5 | 9 title=$5 |
| 9 include_fr1=$6 | 10 include_fr1=$6 |
| 10 functionality=$7 | 11 functionality=$7 |
| 11 unique=$8 | 12 unique=$8 |
| 12 naive_output_ca=$9 | 13 naive_output_ca=$9 |
| 15 filter_unique=${12} | 16 filter_unique=${12} |
| 16 class_filter=${13} | 17 class_filter=${13} |
| 17 mkdir $outdir | 18 mkdir $outdir |
| 18 | 19 |
| 19 echo "---------------- read parameters ----------------" | 20 echo "---------------- read parameters ----------------" |
| 20 echo "---------------- read parameters ----------------<br />" > $output | 21 echo "---------------- read parameters ----------------<br />" > $log |
| 21 | 22 |
| 22 echo "unpacking IMGT file" | 23 echo "unpacking IMGT file" |
| 23 | 24 |
| 24 | 25 |
| 25 | 26 |
| 54 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin" | 55 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin" |
| 55 | 56 |
| 56 echo "${BLASTN_DIR}" | 57 echo "${BLASTN_DIR}" |
| 57 | 58 |
| 58 echo "---------------- identification ($method) ----------------" | 59 echo "---------------- identification ($method) ----------------" |
| 59 echo "---------------- identification ($method) ----------------<br />" >> $output | 60 echo "---------------- identification ($method) ----------------<br />" >> $log |
| 60 | 61 |
| 61 if [[ "${method}" == "custom" ]] ; then | 62 if [[ "${method}" == "custom" ]] ; then |
| 62 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt | 63 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt |
| 63 else | 64 else |
| 64 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) | 65 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) |
| 71 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt | 72 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt |
| 72 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt | 73 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt |
| 73 fi | 74 fi |
| 74 | 75 |
| 75 echo "---------------- merge_and_filter.r ----------------" | 76 echo "---------------- merge_and_filter.r ----------------" |
| 76 echo "---------------- merge_and_filter.r ----------------<br />" >> $output | 77 echo "---------------- merge_and_filter.r ----------------<br />" >> $log |
| 77 | 78 |
| 78 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} 2>&1 | 79 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} 2>&1 |
| 79 | 80 |
| 80 echo "---------------- creating new IMGT zip ----------------" | 81 echo "---------------- creating new IMGT zip ----------------" |
| 81 echo "---------------- creating new IMGT zip ----------------<br />" >> $output | 82 echo "---------------- creating new IMGT zip ----------------<br />" >> $log |
| 82 | 83 |
| 83 mkdir $outdir/new_IMGT | 84 mkdir $outdir/new_IMGT |
| 84 | 85 |
| 85 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt" | 86 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt" |
| 86 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt" | 87 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt" |
| 130 zip -r ../IgAT_cm.zip * | 131 zip -r ../IgAT_cm.zip * |
| 131 | 132 |
| 132 cd $tmp | 133 cd $tmp |
| 133 | 134 |
| 134 echo "---------------- mutation_analysis.r ----------------" | 135 echo "---------------- mutation_analysis.r ----------------" |
| 135 echo "---------------- mutation_analysis.r ----------------<br />" >> $output | 136 echo "---------------- mutation_analysis.r ----------------<br />" >> $log |
| 136 | 137 |
| 137 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched" | 138 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched" |
| 138 echo "R mutation analysis" | 139 echo "R mutation analysis" |
| 139 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1 | 140 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1 |
| 140 | 141 |
| 141 | 142 |
| 142 echo "---------------- mutation_analysis.py ----------------" | 143 echo "---------------- mutation_analysis.py ----------------" |
| 143 echo "---------------- mutation_analysis.py ----------------<br />" >> $output | 144 echo "---------------- mutation_analysis.py ----------------<br />" >> $log |
| 144 | 145 |
| 145 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt | 146 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt |
| 146 | 147 |
| 147 echo "---------------- aa_histogram.r ----------------" | 148 echo "---------------- aa_histogram.r ----------------" |
| 148 echo "---------------- aa_histogram.r ----------------<br />" >> $output | 149 echo "---------------- aa_histogram.r ----------------<br />" >> $output |
| 152 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) | 153 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) |
| 153 | 154 |
| 154 funcs=(sum mean median) | 155 funcs=(sum mean median) |
| 155 | 156 |
| 156 echo "---------------- sequence_overview.r ----------------" | 157 echo "---------------- sequence_overview.r ----------------" |
| 157 echo "---------------- sequence_overview.r ----------------" >> $output | 158 echo "---------------- sequence_overview.r ----------------<br />" >> $log |
| 158 | 159 |
| 159 mkdir $outdir/sequence_overview | 160 mkdir $outdir/sequence_overview |
| 160 | 161 |
| 161 #Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 | 162 #Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 |
| 162 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 | 163 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 |
| 165 | 166 |
| 166 while IFS=$'\t' read ID class seq A C G T | 167 while IFS=$'\t' read ID class seq A C G T |
| 167 do | 168 do |
| 168 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html | 169 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html |
| 169 done < $outdir/sequence_overview/ntoverview.txt | 170 done < $outdir/sequence_overview/ntoverview.txt |
| 170 | |
| 171 | 171 |
| 172 echo "<html><center><h1>$title</h1></center>" > $output | 172 echo "<html><center><h1>$title</h1></center>" > $output |
| 173 | 173 |
| 174 #display the matched/unmatched for clearity | 174 #display the matched/unmatched for clearity |
| 175 | 175 |
| 183 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output | 183 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output |
| 184 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output | 184 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output |
| 185 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output | 185 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output |
| 186 | 186 |
| 187 echo "---------------- main tables ----------------" | 187 echo "---------------- main tables ----------------" |
| 188 echo "---------------- main tables ----------------<br />" >> $log | |
| 188 for func in ${funcs[@]} | 189 for func in ${funcs[@]} |
| 189 do | 190 do |
| 190 | 191 |
| 191 echo "---------------- $func table ----------------" | 192 echo "---------------- $func table ----------------" |
| 193 echo "---------------- $func table ----------------<br />" >> $log | |
| 192 | 194 |
| 193 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt | 195 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt |
| 194 | 196 |
| 195 echo "<table border='1' width='100%'><caption><h3><a href='data_${func}.txt'>${func} table</a></h3></caption>" >> $output | 197 echo "<table border='1' width='100%'><caption><h3><a href='data_${func}.txt'>${func} table</a></h3></caption>" >> $output |
| 196 echo "<tr><th>info</th>" >> $output | 198 echo "<tr><th>info</th>" >> $output |
| 216 echo "</table>" >> $output | 218 echo "</table>" >> $output |
| 217 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output | 219 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output |
| 218 done | 220 done |
| 219 | 221 |
| 220 echo "---------------- download links ----------------" | 222 echo "---------------- download links ----------------" |
| 223 echo "---------------- download links ----------------<br />" >> $log | |
| 221 | 224 |
| 222 | 225 |
| 223 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output | 226 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output |
| 224 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output | 227 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output |
| 225 echo "<a href='merged.txt'>all data</a><br />" >> $output | 228 echo "<a href='merged.txt'>all data</a><br />" >> $output |
| 245 echo "<a href='new_IMGT_cg.txz'>Filtered cg IMGT zip</a><br />" >> $output | 248 echo "<a href='new_IMGT_cg.txz'>Filtered cg IMGT zip</a><br />" >> $output |
| 246 echo "<a href='new_IMGT_cm.txz'>Filtered cm IMGT zip</a><br />" >> $output | 249 echo "<a href='new_IMGT_cm.txz'>Filtered cm IMGT zip</a><br />" >> $output |
| 247 | 250 |
| 248 | 251 |
| 249 echo "---------------- images ----------------" | 252 echo "---------------- images ----------------" |
| 253 echo "---------------- images ----------------<br />" >> $log | |
| 250 | 254 |
| 251 echo "<img src='all.png'/><br />" >> $output | 255 echo "<img src='all.png'/><br />" >> $output |
| 252 echo "<a href='all.txt'>download data</a><br />" >> $output | 256 echo "<a href='all.txt'>download data</a><br />" >> $output |
| 253 if [ -a $outdir/ca.png ] | 257 if [ -a $outdir/ca.png ] |
| 254 then | 258 then |
| 295 echo "</table>" >> $output | 299 echo "</table>" >> $output |
| 296 | 300 |
| 297 echo "</html>" >> $output | 301 echo "</html>" >> $output |
| 298 | 302 |
| 299 echo "---------------- baseline ----------------" | 303 echo "---------------- baseline ----------------" |
| 304 echo "---------------- baseline ----------------<br />" >> $log | |
| 300 tmp="$PWD" | 305 tmp="$PWD" |
| 301 | 306 |
| 302 mkdir $outdir/baseline | 307 mkdir $outdir/baseline |
| 303 | 308 |
| 304 | 309 |
| 305 mkdir $outdir/baseline/ca_cg_cm | 310 mkdir $outdir/baseline/ca_cg_cm |
| 306 cd $outdir/baseline/ca_cg_cm | 311 if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then |
| 307 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" | 312 cd $outdir/baseline/ca_cg_cm |
| 313 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" | |
| 314 else | |
| 315 echo "No sequences" > "$outdir/baseline.txt" | |
| 316 fi | |
| 308 | 317 |
| 309 mkdir $outdir/baseline/ca | 318 mkdir $outdir/baseline/ca |
| 310 cd $outdir/baseline/ca | 319 if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then |
| 311 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt" | 320 cd $outdir/baseline/ca |
| 321 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt" | |
| 322 else | |
| 323 echo "No ca sequences" > "$outdir/baseline_ca.txt" | |
| 324 fi | |
| 312 | 325 |
| 313 mkdir $outdir/baseline/cg | 326 mkdir $outdir/baseline/cg |
| 314 cd $outdir/baseline/cg | 327 if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then |
| 315 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt" | 328 cd $outdir/baseline/cg |
| 329 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt" | |
| 330 else | |
| 331 echo "No cg sequences" > "$outdir/baseline_cg.txt" | |
| 332 fi | |
| 316 | 333 |
| 317 mkdir $outdir/baseline/cm | 334 mkdir $outdir/baseline/cm |
| 318 cd $outdir/baseline/cm | 335 if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then |
| 319 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt" | 336 cd $outdir/baseline/cm |
| 337 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt" | |
| 338 else | |
| 339 echo "No cm sequences" > "$outdir/baseline_cm.txt" | |
| 340 fi | |
| 320 | 341 |
| 321 cd $tmp | 342 cd $tmp |
| 322 | 343 |
| 323 #optional output for naive | |
| 324 | |
| 325 echo "---------------- naive_output.r ----------------" | 344 echo "---------------- naive_output.r ----------------" |
| 345 echo "---------------- naive_output.r ----------------<br />" >> $log | |
| 326 | 346 |
| 327 if [[ "$naive_output" != "None" ]] | 347 if [[ "$naive_output" != "None" ]] |
| 328 then | 348 then |
| 329 echo "---------------- imgt_loader.r ----------------" | 349 echo "---------------- imgt_loader.r ----------------" |
| 350 echo "---------------- imgt_loader.r ----------------<br />" >> $log | |
| 330 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output | 351 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output |
| 331 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1 | 352 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1 |
| 332 | 353 |
| 333 echo "---------------- naive_output.r ----------------" | 354 echo "---------------- naive_output.r ----------------" |
| 355 echo "---------------- naive_output.r ----------------<br />" >> $log | |
| 334 Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1 | 356 Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1 |
| 335 fi | 357 fi |
| 336 | 358 |
| 337 echo "</table>" >> $outdir/base_overview.html | 359 echo "</table>" >> $outdir/base_overview.html |
| 338 | 360 |
| 339 echo "---------------- Done! ----------------" | 361 echo "---------------- Done! ----------------" |
| 340 | 362 echo "---------------- Done! ----------------<br />" >> $log |
| 363 | |
| 364 mv $log $outdir/log.html | |
| 365 | |
| 366 mv $outdir/index.html $log | |
| 367 |
