0
|
1 #!/bin/bash
|
4
|
2 set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
|
6 output=$3
|
|
7 outdir=$4
|
|
8 title=$5
|
22
|
9 include_fr1=$6
|
34
|
10 functionality=$7
|
|
11 unique=$8
|
69
|
12 naive_output_ca=$9
|
|
13 naive_output_cg=${10}
|
|
14 naive_output_cm=${11}
|
|
15 filter_unique=${12}
|
|
16 class_filter=${13}
|
0
|
17 mkdir $outdir
|
|
18
|
55
|
19 echo "---------------- read parameters ----------------"
|
63
|
20 echo "---------------- read parameters ----------------<br />" > $output
|
55
|
21
|
|
22 echo "unpacking IMGT file"
|
|
23
|
89
|
24
|
|
25
|
35
|
26 type="`file $input`"
|
|
27 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
28 echo "Zip archive"
|
|
29 echo "unzip $input -d $PWD/files/"
|
|
30 unzip $input -d $PWD/files/
|
|
31 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
32 echo "ZX archive"
|
|
33 echo "tar -xJf $input -C $PWD/files/"
|
|
34 mkdir -p $PWD/files/$title
|
|
35 tar -xJf $input -C $PWD/files/$title
|
|
36 fi
|
|
37
|
64
|
38 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt
|
|
39 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt
|
|
40 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt
|
|
41 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt
|
|
42 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt
|
|
43 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt
|
|
44 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt
|
|
45
|
|
46 #cat $PWD/files/*/1_* > $PWD/summary.txt
|
|
47 #cat $PWD/files/*/3_* > $PWD/sequences.txt
|
|
48 #cat $PWD/files/*/5_* > $PWD/aa.txt
|
|
49 #cat $PWD/files/*/6_* > $PWD/junction.txt
|
|
50 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
51 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
52 #cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
53
|
26
|
54 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
55
|
|
56 echo "${BLASTN_DIR}"
|
|
57
|
|
58 echo "identification ($method)"
|
89
|
59 echo "---------------- identification ($method) ----------------"
|
|
60 echo "---------------- identification ($method) ----------------<br />" >> $output
|
55
|
61
|
19
|
62 if [[ "${method}" == "custom" ]] ; then
|
|
63 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
64 else
|
|
65 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
66 ID_index=$((ID_index+1))
|
|
67 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
68 sequence_index=$((sequence_index+1))
|
|
69
|
|
70 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
|
|
71
|
|
72 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
73 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
74 fi
|
|
75
|
55
|
76 echo "---------------- merge_and_filter.r ----------------"
|
63
|
77 echo "---------------- merge_and_filter.r ----------------<br />" >> $output
|
19
|
78
|
84
|
79 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} 2>&1
|
0
|
80
|
55
|
81 echo "---------------- mutation_analysis.r ----------------"
|
63
|
82 echo "---------------- mutation_analysis.r ----------------<br />" >> $output
|
55
|
83
|
82
|
84 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched"
|
4
|
85 echo "R mutation analysis"
|
82
|
86 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1
|
53
|
87
|
55
|
88
|
|
89 echo "---------------- mutation_analysis.py ----------------"
|
63
|
90 echo "---------------- mutation_analysis.py ----------------<br />" >> $output
|
55
|
91
|
82
|
92 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
55
|
93
|
|
94 echo "---------------- aa_histogram.r ----------------"
|
63
|
95 echo "---------------- aa_histogram.r ----------------<br />" >> $output
|
55
|
96
|
26
|
97 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
|
4
|
98
|
0
|
99 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
100
|
53
|
101 funcs=(sum mean median)
|
0
|
102
|
82
|
103 echo "---------------- sequence_overview.r ----------------"
|
|
104
|
|
105 mkdir $outdir/sequence_overview
|
|
106
|
|
107 Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
|
89
|
108 #Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
|
82
|
109
|
|
110 echo "<table border='1'>" > $outdir/base_overview.html
|
|
111
|
|
112 while read ID class seq A C G T
|
|
113 do
|
85
|
114 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
|
82
|
115 done < $outdir/sequence_overview/ntoverview.txt
|
|
116
|
53
|
117
|
62
|
118 echo "<html><center><h1>$title</h1></center>" > $output
|
|
119
|
|
120 #display the matched/unmatched for clearity
|
|
121
|
|
122 matched_count="`cat $outdir/merged.txt | tail -n +2 | wc -l`"
|
|
123 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
|
|
124 total_count=$((matched_count + unmatched_count))
|
|
125 perc_count=$((unmatched_count / total_count * 100))
|
|
126 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
|
|
127 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
|
|
128
|
|
129 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
|
|
130 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
|
|
131 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
|
|
132
|
55
|
133 echo "---------------- main tables ----------------"
|
53
|
134 for func in ${funcs[@]}
|
4
|
135 do
|
55
|
136
|
|
137 echo "---------------- $func table ----------------"
|
|
138
|
53
|
139 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/result.txt
|
|
140
|
|
141 echo "<table border='1' width='100%'><caption><h3>${func} table</h3></caption>" >> $output
|
58
|
142 echo "<tr><th>info</th>" >> $output
|
53
|
143 for gene in ${genes[@]}
|
|
144 do
|
|
145 tmp=`cat $outdir/${gene}_${func}_n.txt`
|
|
146 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
147 done
|
78
|
148
|
|
149 tmp=`cat $outdir/unmatched_${func}_n.txt`
|
79
|
150 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th>" >> $output
|
53
|
151 tmp=`cat $outdir/all_${func}_n.txt`
|
89
|
152 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
|
4
|
153
|
78
|
154 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
|
53
|
155 do
|
|
156 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
157 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
158 else
|
78
|
159 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${unx}/${uny} (${unz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
|
53
|
160 fi
|
|
161 done < $outdir/result.txt
|
|
162
|
|
163 done
|
|
164
|
55
|
165 echo "---------------- download links ----------------"
|
|
166
|
0
|
167 echo "</table>" >> $output
|
53
|
168 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
|
|
169 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
|
|
170 echo "<a href='merged.txt'>all data</a><br />" >> $output
|
|
171 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output
|
|
172 echo "<a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output
|
|
173 echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output
|
77
|
174 echo "<a href='sequence_overview/index.html'>Sequence Overview</a><br />" >> $output
|
81
|
175 echo "<a href='base_overview.html'>Base overview</a><br />" >> $output
|
2
|
176
|
55
|
177 echo "---------------- images ----------------"
|
|
178
|
4
|
179 echo "<img src='all.png'/><br />" >> $output
|
26
|
180 echo "<a href='all.txt'>download data</a><br />" >> $output
|
4
|
181 if [ -a $outdir/ca.png ]
|
|
182 then
|
|
183 echo "<img src='ca.png'/><br />" >> $output
|
26
|
184 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
4
|
185 fi
|
|
186 if [ -a $outdir/cg.png ]
|
|
187 then
|
|
188 echo "<img src='cg.png'/><br />" >> $output
|
26
|
189 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
4
|
190 fi
|
22
|
191 if [ -a $outdir/scatter.png ]
|
|
192 then
|
|
193 echo "<img src='scatter.png'/><br />" >> $output
|
26
|
194 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
195 fi
|
49
|
196 if [ -a $outdir/frequency_ranges.png ]
|
|
197 then
|
|
198 echo "<img src='frequency_ranges.png'/><br />" >> $output
|
|
199 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
|
|
200 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
|
|
201 fi
|
26
|
202 if [ -a $outdir/aa_histogram.png ]
|
|
203 then
|
|
204 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
205 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
22
|
206 fi
|
2
|
207
|
0
|
208 for gene in ${genes[@]}
|
|
209 do
|
|
210 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
|
|
211 while IFS=, read from a c g t
|
|
212 do
|
|
213 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
53
|
214 done < $outdir/transitions_${gene}_sum.txt
|
0
|
215 echo "</table>" >> $output
|
|
216 done
|
|
217
|
|
218 echo "<table border='1'><caption>All transition table</caption>" >> $output
|
|
219 while IFS=, read from a c g t
|
|
220 do
|
|
221 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
53
|
222 done < $outdir/transitions_all_sum.txt
|
0
|
223 echo "</table>" >> $output
|
|
224
|
|
225 echo "</html>" >> $output
|
2
|
226
|
47
|
227
|
|
228 #optional output for naive
|
|
229
|
82
|
230 echo "---------------- naive_output.r ----------------"
|
55
|
231
|
47
|
232 if [[ "$naive_output" != "None" ]]
|
|
233 then
|
55
|
234 echo "---------------- imgt_loader.r ----------------"
|
50
|
235 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
|
80
|
236 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1
|
55
|
237 echo "---------------- naive_output.r ----------------"
|
81
|
238 Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1
|
47
|
239 fi
|
|
240
|
81
|
241 echo "</table>" >> $outdir/base_overview.html
|
|
242
|
|
243 echo "---------------- Done! ----------------"
|
47
|
244
|
|
245
|
|
246
|
|
247
|
|
248
|
|
249
|
|
250
|
2
|
251 #rm $outdir/HS12RSS.txt
|
|
252 #rm $outdir/HS23RSS.txt
|