0
|
1 #!/bin/bash
|
4
|
2 set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
|
6 output=$3
|
|
7 outdir=$4
|
|
8 title=$5
|
22
|
9 include_fr1=$6
|
34
|
10 functionality=$7
|
|
11 unique=$8
|
47
|
12 naive_output=$9
|
52
|
13 filter_unique=${10}
|
66
|
14 class_filter=${11}
|
0
|
15 mkdir $outdir
|
|
16
|
55
|
17 echo "---------------- read parameters ----------------"
|
63
|
18 echo "---------------- read parameters ----------------<br />" > $output
|
55
|
19
|
|
20 echo "unpacking IMGT file"
|
|
21
|
35
|
22 type="`file $input`"
|
|
23 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
24 echo "Zip archive"
|
|
25 echo "unzip $input -d $PWD/files/"
|
|
26 unzip $input -d $PWD/files/
|
|
27 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
28 echo "ZX archive"
|
|
29 echo "tar -xJf $input -C $PWD/files/"
|
|
30 mkdir -p $PWD/files/$title
|
|
31 tar -xJf $input -C $PWD/files/$title
|
|
32 fi
|
|
33
|
64
|
34 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt
|
|
35 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt
|
|
36 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt
|
|
37 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt
|
|
38 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt
|
|
39 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt
|
|
40 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt
|
|
41
|
|
42
|
|
43
|
|
44 #cat $PWD/files/*/1_* > $PWD/summary.txt
|
|
45 #cat $PWD/files/*/3_* > $PWD/sequences.txt
|
|
46 #cat $PWD/files/*/5_* > $PWD/aa.txt
|
|
47 #cat $PWD/files/*/6_* > $PWD/junction.txt
|
|
48 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
49 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
50 #cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
51
|
26
|
52 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
53
|
|
54 echo "${BLASTN_DIR}"
|
|
55
|
|
56 echo "identification ($method)"
|
63
|
57 echo "identification ($method)<br />" >> $output
|
19
|
58
|
55
|
59 echo "blast or custom"
|
|
60
|
19
|
61 if [[ "${method}" == "custom" ]] ; then
|
55
|
62 echo "custom"
|
19
|
63 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
64 else
|
55
|
65 echo "blast"
|
19
|
66 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
67 ID_index=$((ID_index+1))
|
|
68 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
69 sequence_index=$((sequence_index+1))
|
|
70
|
|
71 echo "$ID_index ${sequence_index}"
|
|
72
|
|
73 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
|
|
74
|
|
75 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
76 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
77 fi
|
|
78
|
55
|
79 echo "---------------- merge_and_filter.r ----------------"
|
63
|
80 echo "---------------- merge_and_filter.r ----------------<br />" >> $output
|
19
|
81
|
66
|
82 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter}
|
0
|
83
|
55
|
84 echo "---------------- mutation_analysis.r ----------------"
|
63
|
85 echo "---------------- mutation_analysis.r ----------------<br />" >> $output
|
55
|
86
|
4
|
87 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
|
|
88 echo "R mutation analysis"
|
22
|
89 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
|
53
|
90
|
|
91 #echo "." > $output
|
|
92 #exit 0
|
|
93
|
55
|
94
|
|
95
|
|
96 echo "---------------- mutation_analysis.py ----------------"
|
63
|
97 echo "---------------- mutation_analysis.py ----------------<br />" >> $output
|
55
|
98
|
32
|
99 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
26
|
100 echo "R AA histogram"
|
55
|
101
|
|
102 echo "---------------- aa_histogram.r ----------------"
|
63
|
103 echo "---------------- aa_histogram.r ----------------<br />" >> $output
|
55
|
104
|
26
|
105 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
|
4
|
106
|
0
|
107 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
108
|
53
|
109 funcs=(sum mean median)
|
0
|
110
|
53
|
111
|
62
|
112 echo "<html><center><h1>$title</h1></center>" > $output
|
|
113
|
|
114 #display the matched/unmatched for clearity
|
|
115
|
|
116 matched_count="`cat $outdir/merged.txt | tail -n +2 | wc -l`"
|
|
117 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
|
|
118 total_count=$((matched_count + unmatched_count))
|
|
119 perc_count=$((unmatched_count / total_count * 100))
|
|
120 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
|
|
121 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
|
|
122
|
|
123 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
|
|
124 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
|
|
125 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
|
|
126
|
55
|
127 echo "---------------- main tables ----------------"
|
53
|
128 for func in ${funcs[@]}
|
4
|
129 do
|
55
|
130
|
|
131 echo "---------------- $func table ----------------"
|
|
132
|
53
|
133 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/result.txt
|
|
134
|
|
135 echo "<table border='1' width='100%'><caption><h3>${func} table</h3></caption>" >> $output
|
58
|
136 echo "<tr><th>info</th>" >> $output
|
53
|
137 for gene in ${genes[@]}
|
|
138 do
|
|
139 tmp=`cat $outdir/${gene}_${func}_n.txt`
|
|
140 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
141 done
|
|
142 tmp=`cat $outdir/all_${func}_n.txt`
|
|
143 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
|
4
|
144
|
53
|
145 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
|
|
146 do
|
|
147 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
148 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
149 else
|
|
150 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
|
|
151 fi
|
|
152 done < $outdir/result.txt
|
|
153
|
|
154 done
|
|
155
|
55
|
156 echo "---------------- download links ----------------"
|
|
157
|
0
|
158 echo "</table>" >> $output
|
53
|
159 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
|
|
160 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
|
|
161 echo "<a href='merged.txt'>all data</a><br />" >> $output
|
|
162 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output
|
|
163 echo "<a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output
|
|
164 echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output
|
2
|
165
|
55
|
166 echo "---------------- images ----------------"
|
|
167
|
4
|
168 echo "<img src='all.png'/><br />" >> $output
|
26
|
169 echo "<a href='all.txt'>download data</a><br />" >> $output
|
4
|
170 if [ -a $outdir/ca.png ]
|
|
171 then
|
|
172 echo "<img src='ca.png'/><br />" >> $output
|
26
|
173 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
4
|
174 fi
|
|
175 if [ -a $outdir/cg.png ]
|
|
176 then
|
|
177 echo "<img src='cg.png'/><br />" >> $output
|
26
|
178 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
4
|
179 fi
|
22
|
180 if [ -a $outdir/scatter.png ]
|
|
181 then
|
|
182 echo "<img src='scatter.png'/><br />" >> $output
|
26
|
183 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
184 fi
|
49
|
185 if [ -a $outdir/frequency_ranges.png ]
|
|
186 then
|
|
187 echo "<img src='frequency_ranges.png'/><br />" >> $output
|
|
188 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
|
|
189 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
|
|
190 fi
|
26
|
191 if [ -a $outdir/aa_histogram.png ]
|
|
192 then
|
|
193 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
194 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
22
|
195 fi
|
2
|
196
|
0
|
197 for gene in ${genes[@]}
|
|
198 do
|
|
199 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
|
|
200 while IFS=, read from a c g t
|
|
201 do
|
|
202 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
53
|
203 done < $outdir/transitions_${gene}_sum.txt
|
0
|
204 echo "</table>" >> $output
|
|
205 done
|
|
206
|
|
207 echo "<table border='1'><caption>All transition table</caption>" >> $output
|
|
208 while IFS=, read from a c g t
|
|
209 do
|
|
210 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
53
|
211 done < $outdir/transitions_all_sum.txt
|
0
|
212 echo "</table>" >> $output
|
|
213
|
|
214 echo "</html>" >> $output
|
2
|
215
|
47
|
216
|
|
217 #optional output for naive
|
|
218
|
55
|
219 echo "---------------- aa_histogram.r ----------------"
|
|
220
|
47
|
221 if [[ "$naive_output" != "None" ]]
|
|
222 then
|
55
|
223 echo "---------------- imgt_loader.r ----------------"
|
50
|
224 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
|
|
225 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt ${naive_output} 2>&1
|
55
|
226 echo "---------------- naive_output.r ----------------"
|
47
|
227 Rscript $dir/naive_output.r $naive_output $outdir/merged.txt $naive_output 2>&1
|
|
228 fi
|
|
229
|
|
230
|
|
231
|
|
232
|
|
233
|
|
234
|
|
235
|
|
236
|
|
237
|
|
238
|
|
239
|
2
|
240 #rm $outdir/HS12RSS.txt
|
|
241 #rm $outdir/HS23RSS.txt
|