0
|
1 #!/bin/bash
|
4
|
2 set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
|
6 output=$3
|
|
7 outdir=$4
|
|
8 title=$5
|
22
|
9 include_fr1=$6
|
34
|
10 functionality=$7
|
|
11 unique=$8
|
69
|
12 naive_output_ca=$9
|
|
13 naive_output_cg=${10}
|
|
14 naive_output_cm=${11}
|
|
15 filter_unique=${12}
|
|
16 class_filter=${13}
|
0
|
17 mkdir $outdir
|
|
18
|
55
|
19 echo "---------------- read parameters ----------------"
|
63
|
20 echo "---------------- read parameters ----------------<br />" > $output
|
55
|
21
|
|
22 echo "unpacking IMGT file"
|
|
23
|
89
|
24
|
|
25
|
35
|
26 type="`file $input`"
|
|
27 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
28 echo "Zip archive"
|
|
29 echo "unzip $input -d $PWD/files/"
|
|
30 unzip $input -d $PWD/files/
|
|
31 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
32 echo "ZX archive"
|
|
33 echo "tar -xJf $input -C $PWD/files/"
|
|
34 mkdir -p $PWD/files/$title
|
|
35 tar -xJf $input -C $PWD/files/$title
|
|
36 fi
|
|
37
|
64
|
38 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt
|
|
39 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt
|
|
40 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt
|
|
41 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt
|
|
42 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt
|
|
43 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt
|
|
44 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt
|
|
45
|
|
46 #cat $PWD/files/*/1_* > $PWD/summary.txt
|
|
47 #cat $PWD/files/*/3_* > $PWD/sequences.txt
|
|
48 #cat $PWD/files/*/5_* > $PWD/aa.txt
|
|
49 #cat $PWD/files/*/6_* > $PWD/junction.txt
|
|
50 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
51 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
52 #cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
53
|
26
|
54 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
55
|
|
56 echo "${BLASTN_DIR}"
|
|
57
|
89
|
58 echo "---------------- identification ($method) ----------------"
|
|
59 echo "---------------- identification ($method) ----------------<br />" >> $output
|
55
|
60
|
19
|
61 if [[ "${method}" == "custom" ]] ; then
|
|
62 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
63 else
|
|
64 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
65 ID_index=$((ID_index+1))
|
|
66 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
67 sequence_index=$((sequence_index+1))
|
|
68
|
|
69 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
|
|
70
|
|
71 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
72 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
73 fi
|
|
74
|
55
|
75 echo "---------------- merge_and_filter.r ----------------"
|
63
|
76 echo "---------------- merge_and_filter.r ----------------<br />" >> $output
|
19
|
77
|
90
|
78 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} 2>&1
|
0
|
79
|
55
|
80 echo "---------------- mutation_analysis.r ----------------"
|
63
|
81 echo "---------------- mutation_analysis.r ----------------<br />" >> $output
|
55
|
82
|
82
|
83 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched"
|
4
|
84 echo "R mutation analysis"
|
82
|
85 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1
|
53
|
86
|
55
|
87
|
|
88 echo "---------------- mutation_analysis.py ----------------"
|
63
|
89 echo "---------------- mutation_analysis.py ----------------<br />" >> $output
|
55
|
90
|
82
|
91 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
55
|
92
|
|
93 echo "---------------- aa_histogram.r ----------------"
|
63
|
94 echo "---------------- aa_histogram.r ----------------<br />" >> $output
|
55
|
95
|
26
|
96 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
|
4
|
97
|
0
|
98 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
99
|
53
|
100 funcs=(sum mean median)
|
0
|
101
|
82
|
102 echo "---------------- sequence_overview.r ----------------"
|
|
103
|
|
104 mkdir $outdir/sequence_overview
|
|
105
|
90
|
106 #Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
|
|
107 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
|
82
|
108
|
|
109 echo "<table border='1'>" > $outdir/base_overview.html
|
|
110
|
92
|
111 while IFS=$'\t' read ID class seq A C G T
|
82
|
112 do
|
85
|
113 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
|
82
|
114 done < $outdir/sequence_overview/ntoverview.txt
|
|
115
|
53
|
116
|
62
|
117 echo "<html><center><h1>$title</h1></center>" > $output
|
|
118
|
|
119 #display the matched/unmatched for clearity
|
|
120
|
|
121 matched_count="`cat $outdir/merged.txt | tail -n +2 | wc -l`"
|
|
122 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
|
|
123 total_count=$((matched_count + unmatched_count))
|
|
124 perc_count=$((unmatched_count / total_count * 100))
|
|
125 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
|
|
126 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
|
|
127
|
|
128 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
|
|
129 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
|
|
130 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
|
|
131
|
55
|
132 echo "---------------- main tables ----------------"
|
53
|
133 for func in ${funcs[@]}
|
4
|
134 do
|
55
|
135
|
|
136 echo "---------------- $func table ----------------"
|
|
137
|
53
|
138 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/result.txt
|
|
139
|
|
140 echo "<table border='1' width='100%'><caption><h3>${func} table</h3></caption>" >> $output
|
58
|
141 echo "<tr><th>info</th>" >> $output
|
53
|
142 for gene in ${genes[@]}
|
|
143 do
|
|
144 tmp=`cat $outdir/${gene}_${func}_n.txt`
|
|
145 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
146 done
|
78
|
147
|
|
148 tmp=`cat $outdir/unmatched_${func}_n.txt`
|
79
|
149 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th>" >> $output
|
53
|
150 tmp=`cat $outdir/all_${func}_n.txt`
|
89
|
151 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
|
4
|
152
|
78
|
153 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
|
53
|
154 do
|
|
155 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
156 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
157 else
|
78
|
158 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${unx}/${uny} (${unz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
|
53
|
159 fi
|
|
160 done < $outdir/result.txt
|
|
161
|
|
162 done
|
|
163
|
55
|
164 echo "---------------- download links ----------------"
|
|
165
|
0
|
166 echo "</table>" >> $output
|
53
|
167 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
|
|
168 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
|
|
169 echo "<a href='merged.txt'>all data</a><br />" >> $output
|
|
170 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output
|
|
171 echo "<a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output
|
|
172 echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output
|
77
|
173 echo "<a href='sequence_overview/index.html'>Sequence Overview</a><br />" >> $output
|
81
|
174 echo "<a href='base_overview.html'>Base overview</a><br />" >> $output
|
2
|
175
|
55
|
176 echo "---------------- images ----------------"
|
|
177
|
4
|
178 echo "<img src='all.png'/><br />" >> $output
|
26
|
179 echo "<a href='all.txt'>download data</a><br />" >> $output
|
4
|
180 if [ -a $outdir/ca.png ]
|
|
181 then
|
|
182 echo "<img src='ca.png'/><br />" >> $output
|
26
|
183 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
4
|
184 fi
|
|
185 if [ -a $outdir/cg.png ]
|
|
186 then
|
|
187 echo "<img src='cg.png'/><br />" >> $output
|
26
|
188 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
4
|
189 fi
|
22
|
190 if [ -a $outdir/scatter.png ]
|
|
191 then
|
|
192 echo "<img src='scatter.png'/><br />" >> $output
|
26
|
193 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
194 fi
|
49
|
195 if [ -a $outdir/frequency_ranges.png ]
|
|
196 then
|
|
197 echo "<img src='frequency_ranges.png'/><br />" >> $output
|
|
198 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
|
|
199 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
|
|
200 fi
|
26
|
201 if [ -a $outdir/aa_histogram.png ]
|
|
202 then
|
|
203 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
204 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
22
|
205 fi
|
2
|
206
|
0
|
207 for gene in ${genes[@]}
|
|
208 do
|
|
209 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
|
|
210 while IFS=, read from a c g t
|
|
211 do
|
|
212 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
53
|
213 done < $outdir/transitions_${gene}_sum.txt
|
0
|
214 echo "</table>" >> $output
|
|
215 done
|
|
216
|
|
217 echo "<table border='1'><caption>All transition table</caption>" >> $output
|
|
218 while IFS=, read from a c g t
|
|
219 do
|
|
220 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
53
|
221 done < $outdir/transitions_all_sum.txt
|
0
|
222 echo "</table>" >> $output
|
|
223
|
|
224 echo "</html>" >> $output
|
2
|
225
|
47
|
226
|
|
227 #optional output for naive
|
|
228
|
82
|
229 echo "---------------- naive_output.r ----------------"
|
55
|
230
|
47
|
231 if [[ "$naive_output" != "None" ]]
|
|
232 then
|
55
|
233 echo "---------------- imgt_loader.r ----------------"
|
50
|
234 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
|
80
|
235 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1
|
55
|
236 echo "---------------- naive_output.r ----------------"
|
81
|
237 Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1
|
47
|
238 fi
|
|
239
|
81
|
240 echo "</table>" >> $outdir/base_overview.html
|
|
241
|
|
242 echo "---------------- Done! ----------------"
|
47
|
243
|
|
244
|
|
245
|
|
246
|
|
247
|
|
248
|
|
249
|
2
|
250 #rm $outdir/HS12RSS.txt
|
|
251 #rm $outdir/HS23RSS.txt
|