comparison wrapper.sh @ 102:e6bc976760d4 draft

Uploaded
author davidvanzessen
date Tue, 21 Jun 2016 03:32:50 -0400
parents 3cffb8a38bb1
children 603a10976e9c
comparison
equal deleted inserted replaced
101:3cffb8a38bb1 102:e6bc976760d4
1 #!/bin/bash 1 #!/bin/bash
2 set -e 2 set -e
3 dir="$(cd "$(dirname "$0")" && pwd)" 3 dir="$(cd "$(dirname "$0")" && pwd)"
4 input=$1 4 input=$1
5 method=$2 5 method=$2
6 output=$3 6 log=$3 #becomes the main html page at the end
7 outdir=$4 7 outdir=$4
8 output="$outdir/index.html" #copied to $log location at the end
8 title=$5 9 title=$5
9 include_fr1=$6 10 include_fr1=$6
10 functionality=$7 11 functionality=$7
11 unique=$8 12 unique=$8
12 naive_output_ca=$9 13 naive_output_ca=$9
15 filter_unique=${12} 16 filter_unique=${12}
16 class_filter=${13} 17 class_filter=${13}
17 mkdir $outdir 18 mkdir $outdir
18 19
19 echo "---------------- read parameters ----------------" 20 echo "---------------- read parameters ----------------"
20 echo "---------------- read parameters ----------------<br />" > $output 21 echo "---------------- read parameters ----------------<br />" > $log
21 22
22 echo "unpacking IMGT file" 23 echo "unpacking IMGT file"
23 24
24 25
25 26
54 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin" 55 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
55 56
56 echo "${BLASTN_DIR}" 57 echo "${BLASTN_DIR}"
57 58
58 echo "---------------- identification ($method) ----------------" 59 echo "---------------- identification ($method) ----------------"
59 echo "---------------- identification ($method) ----------------<br />" >> $output 60 echo "---------------- identification ($method) ----------------<br />" >> $log
60 61
61 if [[ "${method}" == "custom" ]] ; then 62 if [[ "${method}" == "custom" ]] ; then
62 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt 63 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
63 else 64 else
64 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) 65 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
71 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt 72 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
72 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt 73 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
73 fi 74 fi
74 75
75 echo "---------------- merge_and_filter.r ----------------" 76 echo "---------------- merge_and_filter.r ----------------"
76 echo "---------------- merge_and_filter.r ----------------<br />" >> $output 77 echo "---------------- merge_and_filter.r ----------------<br />" >> $log
77 78
78 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} 2>&1 79 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} 2>&1
79 80
80 echo "---------------- creating new IMGT zip ----------------" 81 echo "---------------- creating new IMGT zip ----------------"
81 echo "---------------- creating new IMGT zip ----------------<br />" >> $output 82 echo "---------------- creating new IMGT zip ----------------<br />" >> $log
82 83
83 mkdir $outdir/new_IMGT 84 mkdir $outdir/new_IMGT
84 85
85 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt" 86 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt"
86 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt" 87 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
130 zip -r ../IgAT_cm.zip * 131 zip -r ../IgAT_cm.zip *
131 132
132 cd $tmp 133 cd $tmp
133 134
134 echo "---------------- mutation_analysis.r ----------------" 135 echo "---------------- mutation_analysis.r ----------------"
135 echo "---------------- mutation_analysis.r ----------------<br />" >> $output 136 echo "---------------- mutation_analysis.r ----------------<br />" >> $log
136 137
137 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched" 138 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched"
138 echo "R mutation analysis" 139 echo "R mutation analysis"
139 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1 140 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1
140 141
141 142
142 echo "---------------- mutation_analysis.py ----------------" 143 echo "---------------- mutation_analysis.py ----------------"
143 echo "---------------- mutation_analysis.py ----------------<br />" >> $output 144 echo "---------------- mutation_analysis.py ----------------<br />" >> $log
144 145
145 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt 146 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
146 147
147 echo "---------------- aa_histogram.r ----------------" 148 echo "---------------- aa_histogram.r ----------------"
148 echo "---------------- aa_histogram.r ----------------<br />" >> $output 149 echo "---------------- aa_histogram.r ----------------<br />" >> $output
152 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) 153 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
153 154
154 funcs=(sum mean median) 155 funcs=(sum mean median)
155 156
156 echo "---------------- sequence_overview.r ----------------" 157 echo "---------------- sequence_overview.r ----------------"
157 echo "---------------- sequence_overview.r ----------------" >> $output 158 echo "---------------- sequence_overview.r ----------------<br />" >> $log
158 159
159 mkdir $outdir/sequence_overview 160 mkdir $outdir/sequence_overview
160 161
161 #Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 162 #Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
162 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 163 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
165 166
166 while IFS=$'\t' read ID class seq A C G T 167 while IFS=$'\t' read ID class seq A C G T
167 do 168 do
168 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html 169 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
169 done < $outdir/sequence_overview/ntoverview.txt 170 done < $outdir/sequence_overview/ntoverview.txt
170
171 171
172 echo "<html><center><h1>$title</h1></center>" > $output 172 echo "<html><center><h1>$title</h1></center>" > $output
173 173
174 #display the matched/unmatched for clearity 174 #display the matched/unmatched for clearity
175 175
183 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output 183 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
184 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output 184 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
185 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output 185 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
186 186
187 echo "---------------- main tables ----------------" 187 echo "---------------- main tables ----------------"
188 echo "---------------- main tables ----------------<br />" >> $log
188 for func in ${funcs[@]} 189 for func in ${funcs[@]}
189 do 190 do
190 191
191 echo "---------------- $func table ----------------" 192 echo "---------------- $func table ----------------"
193 echo "---------------- $func table ----------------<br />" >> $log
192 194
193 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt 195 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt
194 196
195 echo "<table border='1' width='100%'><caption><h3><a href='data_${func}.txt'>${func} table</a></h3></caption>" >> $output 197 echo "<table border='1' width='100%'><caption><h3><a href='data_${func}.txt'>${func} table</a></h3></caption>" >> $output
196 echo "<tr><th>info</th>" >> $output 198 echo "<tr><th>info</th>" >> $output
216 echo "</table>" >> $output 218 echo "</table>" >> $output
217 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output 219 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output
218 done 220 done
219 221
220 echo "---------------- download links ----------------" 222 echo "---------------- download links ----------------"
223 echo "---------------- download links ----------------<br />" >> $log
221 224
222 225
223 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output 226 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
224 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output 227 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
225 echo "<a href='merged.txt'>all data</a><br />" >> $output 228 echo "<a href='merged.txt'>all data</a><br />" >> $output
245 echo "<a href='new_IMGT_cg.txz'>Filtered cg IMGT zip</a><br />" >> $output 248 echo "<a href='new_IMGT_cg.txz'>Filtered cg IMGT zip</a><br />" >> $output
246 echo "<a href='new_IMGT_cm.txz'>Filtered cm IMGT zip</a><br />" >> $output 249 echo "<a href='new_IMGT_cm.txz'>Filtered cm IMGT zip</a><br />" >> $output
247 250
248 251
249 echo "---------------- images ----------------" 252 echo "---------------- images ----------------"
253 echo "---------------- images ----------------<br />" >> $log
250 254
251 echo "<img src='all.png'/><br />" >> $output 255 echo "<img src='all.png'/><br />" >> $output
252 echo "<a href='all.txt'>download data</a><br />" >> $output 256 echo "<a href='all.txt'>download data</a><br />" >> $output
253 if [ -a $outdir/ca.png ] 257 if [ -a $outdir/ca.png ]
254 then 258 then
295 echo "</table>" >> $output 299 echo "</table>" >> $output
296 300
297 echo "</html>" >> $output 301 echo "</html>" >> $output
298 302
299 echo "---------------- baseline ----------------" 303 echo "---------------- baseline ----------------"
304 echo "---------------- baseline ----------------<br />" >> $log
300 tmp="$PWD" 305 tmp="$PWD"
301 306
302 mkdir $outdir/baseline 307 mkdir $outdir/baseline
303 308
304 309
305 mkdir $outdir/baseline/ca_cg_cm 310 mkdir $outdir/baseline/ca_cg_cm
306 cd $outdir/baseline/ca_cg_cm 311 if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
307 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" 312 cd $outdir/baseline/ca_cg_cm
313 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
314 else
315 echo "No sequences" > "$outdir/baseline.txt"
316 fi
308 317
309 mkdir $outdir/baseline/ca 318 mkdir $outdir/baseline/ca
310 cd $outdir/baseline/ca 319 if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then
311 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt" 320 cd $outdir/baseline/ca
321 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt"
322 else
323 echo "No ca sequences" > "$outdir/baseline_ca.txt"
324 fi
312 325
313 mkdir $outdir/baseline/cg 326 mkdir $outdir/baseline/cg
314 cd $outdir/baseline/cg 327 if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then
315 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt" 328 cd $outdir/baseline/cg
329 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt"
330 else
331 echo "No cg sequences" > "$outdir/baseline_cg.txt"
332 fi
316 333
317 mkdir $outdir/baseline/cm 334 mkdir $outdir/baseline/cm
318 cd $outdir/baseline/cm 335 if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then
319 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt" 336 cd $outdir/baseline/cm
337 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt"
338 else
339 echo "No cm sequences" > "$outdir/baseline_cm.txt"
340 fi
320 341
321 cd $tmp 342 cd $tmp
322 343
323 #optional output for naive
324
325 echo "---------------- naive_output.r ----------------" 344 echo "---------------- naive_output.r ----------------"
345 echo "---------------- naive_output.r ----------------<br />" >> $log
326 346
327 if [[ "$naive_output" != "None" ]] 347 if [[ "$naive_output" != "None" ]]
328 then 348 then
329 echo "---------------- imgt_loader.r ----------------" 349 echo "---------------- imgt_loader.r ----------------"
350 echo "---------------- imgt_loader.r ----------------<br />" >> $log
330 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output 351 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
331 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1 352 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1
332 353
333 echo "---------------- naive_output.r ----------------" 354 echo "---------------- naive_output.r ----------------"
355 echo "---------------- naive_output.r ----------------<br />" >> $log
334 Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1 356 Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1
335 fi 357 fi
336 358
337 echo "</table>" >> $outdir/base_overview.html 359 echo "</table>" >> $outdir/base_overview.html
338 360
339 echo "---------------- Done! ----------------" 361 echo "---------------- Done! ----------------"
340 362 echo "---------------- Done! ----------------<br />" >> $log
363
364 mv $log $outdir/log.html
365
366 mv $outdir/index.html $log
367