comparison wrapper.sh @ 55:0d5add1a9800 draft

Uploaded
author davidvanzessen
date Tue, 01 Mar 2016 04:53:06 -0500
parents d8daf8ed39e1
children 8bb4d6009e08
comparison
equal deleted inserted replaced
54:d8daf8ed39e1 55:0d5add1a9800
11 unique=$8 11 unique=$8
12 naive_output=$9 12 naive_output=$9
13 filter_unique=${10} 13 filter_unique=${10}
14 mkdir $outdir 14 mkdir $outdir
15 15
16 echo "---------------- read parameters ----------------"
17
18 echo "unpacking IMGT file"
19
16 type="`file $input`" 20 type="`file $input`"
17 if [[ "$type" == *"Zip archive"* ]] ; then 21 if [[ "$type" == *"Zip archive"* ]] ; then
18 echo "Zip archive" 22 echo "Zip archive"
19 echo "unzip $input -d $PWD/files/" 23 echo "unzip $input -d $PWD/files/"
20 unzip $input -d $PWD/files/ 24 unzip $input -d $PWD/files/
37 41
38 echo "${BLASTN_DIR}" 42 echo "${BLASTN_DIR}"
39 43
40 echo "identification ($method)" 44 echo "identification ($method)"
41 45
46 echo "blast or custom"
47
42 if [[ "${method}" == "custom" ]] ; then 48 if [[ "${method}" == "custom" ]] ; then
49 echo "custom"
43 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt 50 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
44 else 51 else
52 echo "blast"
45 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) 53 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
46 ID_index=$((ID_index+1)) 54 ID_index=$((ID_index+1))
47 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) 55 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
48 sequence_index=$((sequence_index+1)) 56 sequence_index=$((sequence_index+1))
49 57
53 61
54 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt 62 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
55 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt 63 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
56 fi 64 fi
57 65
58 66 echo "---------------- merge_and_filter.r ----------------"
59 67
60 echo "merging"
61 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} 68 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique}
69
70 echo "---------------- mutation_analysis.r ----------------"
62 71
63 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm" 72 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
64 echo "R mutation analysis" 73 echo "R mutation analysis"
65 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1 74 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
66 75
67 #echo "." > $output 76 #echo "." > $output
68 #exit 0 77 #exit 0
69 78
70 echo "python mutation analysis" 79
80
81 echo "---------------- mutation_analysis.py ----------------"
82
71 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt 83 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
72 echo "R AA histogram" 84 echo "R AA histogram"
85
86 echo "---------------- aa_histogram.r ----------------"
87
73 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1 88 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
74 89
75 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) 90 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
76 91
77 funcs=(sum mean median) 92 funcs=(sum mean median)
78 93
79 94
80 echo "<html><center><h1>$title</h1></center>" >> $output 95 echo "<html><center><h1>$title</h1></center>" >> $output
81 96 echo "---------------- main tables ----------------"
82 for func in ${funcs[@]} 97 for func in ${funcs[@]}
83 do 98 do
99
100 echo "---------------- $func table ----------------"
101
84 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/result.txt 102 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/result.txt
85 103
86 echo "<table border='1' width='100%'><caption><h3>${func} table</h3></caption>" >> $output 104 echo "<table border='1' width='100%'><caption><h3>${func} table</h3></caption>" >> $output
87 echo "<tr><th'>info</th>" >> $output 105 echo "<tr><th'>info</th>" >> $output
88 for gene in ${genes[@]} 106 for gene in ${genes[@]}
102 fi 120 fi
103 done < $outdir/result.txt 121 done < $outdir/result.txt
104 122
105 done 123 done
106 124
125 echo "---------------- download links ----------------"
126
107 echo "</table>" >> $output 127 echo "</table>" >> $output
108 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output 128 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
109 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output 129 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
110 echo "<a href='merged.txt'>all data</a><br />" >> $output 130 echo "<a href='merged.txt'>all data</a><br />" >> $output
111 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output 131 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output
112 echo "<a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output 132 echo "<a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output
113 echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output 133 echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output
134
135 echo "---------------- images ----------------"
114 136
115 echo "<img src='all.png'/><br />" >> $output 137 echo "<img src='all.png'/><br />" >> $output
116 echo "<a href='all.txt'>download data</a><br />" >> $output 138 echo "<a href='all.txt'>download data</a><br />" >> $output
117 if [ -a $outdir/ca.png ] 139 if [ -a $outdir/ca.png ]
118 then 140 then
161 echo "</html>" >> $output 183 echo "</html>" >> $output
162 184
163 185
164 #optional output for naive 186 #optional output for naive
165 187
188 echo "---------------- aa_histogram.r ----------------"
189
166 if [[ "$naive_output" != "None" ]] 190 if [[ "$naive_output" != "None" ]]
167 then 191 then
168 echo "naive_output: $naive_output" 192 echo "---------------- imgt_loader.r ----------------"
169 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output 193 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
170 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt ${naive_output} 2>&1 194 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt ${naive_output} 2>&1
195 echo "---------------- naive_output.r ----------------"
171 Rscript $dir/naive_output.r $naive_output $outdir/merged.txt $naive_output 2>&1 196 Rscript $dir/naive_output.r $naive_output $outdir/merged.txt $naive_output 2>&1
172 fi 197 fi
173 198
174 199
175 200