0
|
1 #!/bin/bash
|
4
|
2 set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
|
6 output=$3
|
|
7 outdir=$4
|
|
8 title=$5
|
22
|
9 include_fr1=$6
|
34
|
10 functionality=$7
|
|
11 unique=$8
|
47
|
12 naive_output=$9
|
52
|
13 filter_unique=${10}
|
0
|
14 mkdir $outdir
|
|
15
|
55
|
16 echo "---------------- read parameters ----------------"
|
63
|
17 echo "---------------- read parameters ----------------<br />" > $output
|
55
|
18
|
|
19 echo "unpacking IMGT file"
|
|
20
|
35
|
21 type="`file $input`"
|
|
22 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
23 echo "Zip archive"
|
|
24 echo "unzip $input -d $PWD/files/"
|
|
25 unzip $input -d $PWD/files/
|
|
26 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
27 echo "ZX archive"
|
|
28 echo "tar -xJf $input -C $PWD/files/"
|
|
29 mkdir -p $PWD/files/$title
|
|
30 tar -xJf $input -C $PWD/files/$title
|
|
31 fi
|
|
32
|
64
|
33 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt
|
|
34 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt
|
|
35 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt
|
|
36 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt
|
|
37 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt
|
|
38 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt
|
|
39 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt
|
|
40
|
|
41
|
|
42
|
|
43 #cat $PWD/files/*/1_* > $PWD/summary.txt
|
|
44 #cat $PWD/files/*/3_* > $PWD/sequences.txt
|
|
45 #cat $PWD/files/*/5_* > $PWD/aa.txt
|
|
46 #cat $PWD/files/*/6_* > $PWD/junction.txt
|
|
47 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
48 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
49 #cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
50
|
26
|
51 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
52
|
|
53 echo "${BLASTN_DIR}"
|
|
54
|
|
55 echo "identification ($method)"
|
63
|
56 echo "identification ($method)<br />" >> $output
|
19
|
57
|
55
|
58 echo "blast or custom"
|
|
59
|
19
|
60 if [[ "${method}" == "custom" ]] ; then
|
55
|
61 echo "custom"
|
19
|
62 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
63 else
|
55
|
64 echo "blast"
|
19
|
65 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
66 ID_index=$((ID_index+1))
|
|
67 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
68 sequence_index=$((sequence_index+1))
|
|
69
|
|
70 echo "$ID_index ${sequence_index}"
|
|
71
|
|
72 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
|
|
73
|
|
74 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
75 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
76 fi
|
|
77
|
55
|
78 echo "---------------- merge_and_filter.r ----------------"
|
63
|
79 echo "---------------- merge_and_filter.r ----------------<br />" >> $output
|
19
|
80
|
52
|
81 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique}
|
0
|
82
|
55
|
83 echo "---------------- mutation_analysis.r ----------------"
|
63
|
84 echo "---------------- mutation_analysis.r ----------------<br />" >> $output
|
55
|
85
|
4
|
86 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
|
|
87 echo "R mutation analysis"
|
22
|
88 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
|
53
|
89
|
|
90 #echo "." > $output
|
|
91 #exit 0
|
|
92
|
55
|
93
|
|
94
|
|
95 echo "---------------- mutation_analysis.py ----------------"
|
63
|
96 echo "---------------- mutation_analysis.py ----------------<br />" >> $output
|
55
|
97
|
32
|
98 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
26
|
99 echo "R AA histogram"
|
55
|
100
|
|
101 echo "---------------- aa_histogram.r ----------------"
|
63
|
102 echo "---------------- aa_histogram.r ----------------<br />" >> $output
|
55
|
103
|
26
|
104 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
|
4
|
105
|
0
|
106 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
107
|
53
|
108 funcs=(sum mean median)
|
0
|
109
|
53
|
110
|
62
|
111 echo "<html><center><h1>$title</h1></center>" > $output
|
|
112
|
|
113 #display the matched/unmatched for clearity
|
|
114
|
|
115 matched_count="`cat $outdir/merged.txt | tail -n +2 | wc -l`"
|
|
116 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
|
|
117 total_count=$((matched_count + unmatched_count))
|
|
118 perc_count=$((unmatched_count / total_count * 100))
|
|
119 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
|
|
120 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
|
|
121
|
|
122 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
|
|
123 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
|
|
124 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
|
|
125
|
55
|
126 echo "---------------- main tables ----------------"
|
53
|
127 for func in ${funcs[@]}
|
4
|
128 do
|
55
|
129
|
|
130 echo "---------------- $func table ----------------"
|
|
131
|
53
|
132 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/result.txt
|
|
133
|
|
134 echo "<table border='1' width='100%'><caption><h3>${func} table</h3></caption>" >> $output
|
58
|
135 echo "<tr><th>info</th>" >> $output
|
53
|
136 for gene in ${genes[@]}
|
|
137 do
|
|
138 tmp=`cat $outdir/${gene}_${func}_n.txt`
|
|
139 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
140 done
|
|
141 tmp=`cat $outdir/all_${func}_n.txt`
|
|
142 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
|
4
|
143
|
53
|
144 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
|
|
145 do
|
|
146 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
147 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
148 else
|
|
149 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
|
|
150 fi
|
|
151 done < $outdir/result.txt
|
|
152
|
|
153 done
|
|
154
|
55
|
155 echo "---------------- download links ----------------"
|
|
156
|
0
|
157 echo "</table>" >> $output
|
53
|
158 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
|
|
159 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
|
|
160 echo "<a href='merged.txt'>all data</a><br />" >> $output
|
|
161 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output
|
|
162 echo "<a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output
|
|
163 echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output
|
2
|
164
|
55
|
165 echo "---------------- images ----------------"
|
|
166
|
4
|
167 echo "<img src='all.png'/><br />" >> $output
|
26
|
168 echo "<a href='all.txt'>download data</a><br />" >> $output
|
4
|
169 if [ -a $outdir/ca.png ]
|
|
170 then
|
|
171 echo "<img src='ca.png'/><br />" >> $output
|
26
|
172 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
4
|
173 fi
|
|
174 if [ -a $outdir/cg.png ]
|
|
175 then
|
|
176 echo "<img src='cg.png'/><br />" >> $output
|
26
|
177 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
4
|
178 fi
|
22
|
179 if [ -a $outdir/scatter.png ]
|
|
180 then
|
|
181 echo "<img src='scatter.png'/><br />" >> $output
|
26
|
182 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
183 fi
|
49
|
184 if [ -a $outdir/frequency_ranges.png ]
|
|
185 then
|
|
186 echo "<img src='frequency_ranges.png'/><br />" >> $output
|
|
187 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
|
|
188 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
|
|
189 fi
|
26
|
190 if [ -a $outdir/aa_histogram.png ]
|
|
191 then
|
|
192 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
193 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
22
|
194 fi
|
2
|
195
|
0
|
196 for gene in ${genes[@]}
|
|
197 do
|
|
198 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
|
|
199 while IFS=, read from a c g t
|
|
200 do
|
|
201 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
53
|
202 done < $outdir/transitions_${gene}_sum.txt
|
0
|
203 echo "</table>" >> $output
|
|
204 done
|
|
205
|
|
206 echo "<table border='1'><caption>All transition table</caption>" >> $output
|
|
207 while IFS=, read from a c g t
|
|
208 do
|
|
209 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
53
|
210 done < $outdir/transitions_all_sum.txt
|
0
|
211 echo "</table>" >> $output
|
|
212
|
|
213 echo "</html>" >> $output
|
2
|
214
|
47
|
215
|
|
216 #optional output for naive
|
|
217
|
55
|
218 echo "---------------- aa_histogram.r ----------------"
|
|
219
|
47
|
220 if [[ "$naive_output" != "None" ]]
|
|
221 then
|
55
|
222 echo "---------------- imgt_loader.r ----------------"
|
50
|
223 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
|
|
224 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt ${naive_output} 2>&1
|
55
|
225 echo "---------------- naive_output.r ----------------"
|
47
|
226 Rscript $dir/naive_output.r $naive_output $outdir/merged.txt $naive_output 2>&1
|
|
227 fi
|
|
228
|
|
229
|
|
230
|
|
231
|
|
232
|
|
233
|
|
234
|
|
235
|
|
236
|
|
237
|
|
238
|
2
|
239 #rm $outdir/HS12RSS.txt
|
|
240 #rm $outdir/HS23RSS.txt
|