Mercurial > repos > davidvanzessen > mutation_analysis
comparison wrapper.sh @ 102:e6bc976760d4 draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 21 Jun 2016 03:32:50 -0400 |
parents | 3cffb8a38bb1 |
children | 603a10976e9c |
comparison
equal
deleted
inserted
replaced
101:3cffb8a38bb1 | 102:e6bc976760d4 |
---|---|
1 #!/bin/bash | 1 #!/bin/bash |
2 set -e | 2 set -e |
3 dir="$(cd "$(dirname "$0")" && pwd)" | 3 dir="$(cd "$(dirname "$0")" && pwd)" |
4 input=$1 | 4 input=$1 |
5 method=$2 | 5 method=$2 |
6 output=$3 | 6 log=$3 #becomes the main html page at the end |
7 outdir=$4 | 7 outdir=$4 |
8 output="$outdir/index.html" #copied to $log location at the end | |
8 title=$5 | 9 title=$5 |
9 include_fr1=$6 | 10 include_fr1=$6 |
10 functionality=$7 | 11 functionality=$7 |
11 unique=$8 | 12 unique=$8 |
12 naive_output_ca=$9 | 13 naive_output_ca=$9 |
15 filter_unique=${12} | 16 filter_unique=${12} |
16 class_filter=${13} | 17 class_filter=${13} |
17 mkdir $outdir | 18 mkdir $outdir |
18 | 19 |
19 echo "---------------- read parameters ----------------" | 20 echo "---------------- read parameters ----------------" |
20 echo "---------------- read parameters ----------------<br />" > $output | 21 echo "---------------- read parameters ----------------<br />" > $log |
21 | 22 |
22 echo "unpacking IMGT file" | 23 echo "unpacking IMGT file" |
23 | 24 |
24 | 25 |
25 | 26 |
54 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin" | 55 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin" |
55 | 56 |
56 echo "${BLASTN_DIR}" | 57 echo "${BLASTN_DIR}" |
57 | 58 |
58 echo "---------------- identification ($method) ----------------" | 59 echo "---------------- identification ($method) ----------------" |
59 echo "---------------- identification ($method) ----------------<br />" >> $output | 60 echo "---------------- identification ($method) ----------------<br />" >> $log |
60 | 61 |
61 if [[ "${method}" == "custom" ]] ; then | 62 if [[ "${method}" == "custom" ]] ; then |
62 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt | 63 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt |
63 else | 64 else |
64 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) | 65 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) |
71 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt | 72 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt |
72 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt | 73 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt |
73 fi | 74 fi |
74 | 75 |
75 echo "---------------- merge_and_filter.r ----------------" | 76 echo "---------------- merge_and_filter.r ----------------" |
76 echo "---------------- merge_and_filter.r ----------------<br />" >> $output | 77 echo "---------------- merge_and_filter.r ----------------<br />" >> $log |
77 | 78 |
78 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} 2>&1 | 79 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} 2>&1 |
79 | 80 |
80 echo "---------------- creating new IMGT zip ----------------" | 81 echo "---------------- creating new IMGT zip ----------------" |
81 echo "---------------- creating new IMGT zip ----------------<br />" >> $output | 82 echo "---------------- creating new IMGT zip ----------------<br />" >> $log |
82 | 83 |
83 mkdir $outdir/new_IMGT | 84 mkdir $outdir/new_IMGT |
84 | 85 |
85 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt" | 86 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt" |
86 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt" | 87 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt" |
130 zip -r ../IgAT_cm.zip * | 131 zip -r ../IgAT_cm.zip * |
131 | 132 |
132 cd $tmp | 133 cd $tmp |
133 | 134 |
134 echo "---------------- mutation_analysis.r ----------------" | 135 echo "---------------- mutation_analysis.r ----------------" |
135 echo "---------------- mutation_analysis.r ----------------<br />" >> $output | 136 echo "---------------- mutation_analysis.r ----------------<br />" >> $log |
136 | 137 |
137 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched" | 138 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched" |
138 echo "R mutation analysis" | 139 echo "R mutation analysis" |
139 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1 | 140 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1 |
140 | 141 |
141 | 142 |
142 echo "---------------- mutation_analysis.py ----------------" | 143 echo "---------------- mutation_analysis.py ----------------" |
143 echo "---------------- mutation_analysis.py ----------------<br />" >> $output | 144 echo "---------------- mutation_analysis.py ----------------<br />" >> $log |
144 | 145 |
145 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt | 146 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt |
146 | 147 |
147 echo "---------------- aa_histogram.r ----------------" | 148 echo "---------------- aa_histogram.r ----------------" |
148 echo "---------------- aa_histogram.r ----------------<br />" >> $output | 149 echo "---------------- aa_histogram.r ----------------<br />" >> $output |
152 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) | 153 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) |
153 | 154 |
154 funcs=(sum mean median) | 155 funcs=(sum mean median) |
155 | 156 |
156 echo "---------------- sequence_overview.r ----------------" | 157 echo "---------------- sequence_overview.r ----------------" |
157 echo "---------------- sequence_overview.r ----------------" >> $output | 158 echo "---------------- sequence_overview.r ----------------<br />" >> $log |
158 | 159 |
159 mkdir $outdir/sequence_overview | 160 mkdir $outdir/sequence_overview |
160 | 161 |
161 #Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 | 162 #Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 |
162 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 | 163 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 |
165 | 166 |
166 while IFS=$'\t' read ID class seq A C G T | 167 while IFS=$'\t' read ID class seq A C G T |
167 do | 168 do |
168 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html | 169 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html |
169 done < $outdir/sequence_overview/ntoverview.txt | 170 done < $outdir/sequence_overview/ntoverview.txt |
170 | |
171 | 171 |
172 echo "<html><center><h1>$title</h1></center>" > $output | 172 echo "<html><center><h1>$title</h1></center>" > $output |
173 | 173 |
174 #display the matched/unmatched for clearity | 174 #display the matched/unmatched for clearity |
175 | 175 |
183 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output | 183 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output |
184 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output | 184 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output |
185 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output | 185 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output |
186 | 186 |
187 echo "---------------- main tables ----------------" | 187 echo "---------------- main tables ----------------" |
188 echo "---------------- main tables ----------------<br />" >> $log | |
188 for func in ${funcs[@]} | 189 for func in ${funcs[@]} |
189 do | 190 do |
190 | 191 |
191 echo "---------------- $func table ----------------" | 192 echo "---------------- $func table ----------------" |
193 echo "---------------- $func table ----------------<br />" >> $log | |
192 | 194 |
193 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt | 195 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt |
194 | 196 |
195 echo "<table border='1' width='100%'><caption><h3><a href='data_${func}.txt'>${func} table</a></h3></caption>" >> $output | 197 echo "<table border='1' width='100%'><caption><h3><a href='data_${func}.txt'>${func} table</a></h3></caption>" >> $output |
196 echo "<tr><th>info</th>" >> $output | 198 echo "<tr><th>info</th>" >> $output |
216 echo "</table>" >> $output | 218 echo "</table>" >> $output |
217 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output | 219 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output |
218 done | 220 done |
219 | 221 |
220 echo "---------------- download links ----------------" | 222 echo "---------------- download links ----------------" |
223 echo "---------------- download links ----------------<br />" >> $log | |
221 | 224 |
222 | 225 |
223 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output | 226 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output |
224 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output | 227 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output |
225 echo "<a href='merged.txt'>all data</a><br />" >> $output | 228 echo "<a href='merged.txt'>all data</a><br />" >> $output |
245 echo "<a href='new_IMGT_cg.txz'>Filtered cg IMGT zip</a><br />" >> $output | 248 echo "<a href='new_IMGT_cg.txz'>Filtered cg IMGT zip</a><br />" >> $output |
246 echo "<a href='new_IMGT_cm.txz'>Filtered cm IMGT zip</a><br />" >> $output | 249 echo "<a href='new_IMGT_cm.txz'>Filtered cm IMGT zip</a><br />" >> $output |
247 | 250 |
248 | 251 |
249 echo "---------------- images ----------------" | 252 echo "---------------- images ----------------" |
253 echo "---------------- images ----------------<br />" >> $log | |
250 | 254 |
251 echo "<img src='all.png'/><br />" >> $output | 255 echo "<img src='all.png'/><br />" >> $output |
252 echo "<a href='all.txt'>download data</a><br />" >> $output | 256 echo "<a href='all.txt'>download data</a><br />" >> $output |
253 if [ -a $outdir/ca.png ] | 257 if [ -a $outdir/ca.png ] |
254 then | 258 then |
295 echo "</table>" >> $output | 299 echo "</table>" >> $output |
296 | 300 |
297 echo "</html>" >> $output | 301 echo "</html>" >> $output |
298 | 302 |
299 echo "---------------- baseline ----------------" | 303 echo "---------------- baseline ----------------" |
304 echo "---------------- baseline ----------------<br />" >> $log | |
300 tmp="$PWD" | 305 tmp="$PWD" |
301 | 306 |
302 mkdir $outdir/baseline | 307 mkdir $outdir/baseline |
303 | 308 |
304 | 309 |
305 mkdir $outdir/baseline/ca_cg_cm | 310 mkdir $outdir/baseline/ca_cg_cm |
306 cd $outdir/baseline/ca_cg_cm | 311 if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then |
307 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" | 312 cd $outdir/baseline/ca_cg_cm |
313 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" | |
314 else | |
315 echo "No sequences" > "$outdir/baseline.txt" | |
316 fi | |
308 | 317 |
309 mkdir $outdir/baseline/ca | 318 mkdir $outdir/baseline/ca |
310 cd $outdir/baseline/ca | 319 if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then |
311 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt" | 320 cd $outdir/baseline/ca |
321 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt" | |
322 else | |
323 echo "No ca sequences" > "$outdir/baseline_ca.txt" | |
324 fi | |
312 | 325 |
313 mkdir $outdir/baseline/cg | 326 mkdir $outdir/baseline/cg |
314 cd $outdir/baseline/cg | 327 if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then |
315 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt" | 328 cd $outdir/baseline/cg |
329 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt" | |
330 else | |
331 echo "No cg sequences" > "$outdir/baseline_cg.txt" | |
332 fi | |
316 | 333 |
317 mkdir $outdir/baseline/cm | 334 mkdir $outdir/baseline/cm |
318 cd $outdir/baseline/cm | 335 if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then |
319 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt" | 336 cd $outdir/baseline/cm |
337 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt" | |
338 else | |
339 echo "No cm sequences" > "$outdir/baseline_cm.txt" | |
340 fi | |
320 | 341 |
321 cd $tmp | 342 cd $tmp |
322 | 343 |
323 #optional output for naive | |
324 | |
325 echo "---------------- naive_output.r ----------------" | 344 echo "---------------- naive_output.r ----------------" |
345 echo "---------------- naive_output.r ----------------<br />" >> $log | |
326 | 346 |
327 if [[ "$naive_output" != "None" ]] | 347 if [[ "$naive_output" != "None" ]] |
328 then | 348 then |
329 echo "---------------- imgt_loader.r ----------------" | 349 echo "---------------- imgt_loader.r ----------------" |
350 echo "---------------- imgt_loader.r ----------------<br />" >> $log | |
330 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output | 351 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output |
331 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1 | 352 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1 |
332 | 353 |
333 echo "---------------- naive_output.r ----------------" | 354 echo "---------------- naive_output.r ----------------" |
355 echo "---------------- naive_output.r ----------------<br />" >> $log | |
334 Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1 | 356 Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1 |
335 fi | 357 fi |
336 | 358 |
337 echo "</table>" >> $outdir/base_overview.html | 359 echo "</table>" >> $outdir/base_overview.html |
338 | 360 |
339 echo "---------------- Done! ----------------" | 361 echo "---------------- Done! ----------------" |
340 | 362 echo "---------------- Done! ----------------<br />" >> $log |
363 | |
364 mv $log $outdir/log.html | |
365 | |
366 mv $outdir/index.html $log | |
367 |