0
|
1 #!/bin/bash
|
110
|
2 #set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
102
|
6 log=$3 #becomes the main html page at the end
|
19
|
7 outdir=$4
|
102
|
8 output="$outdir/index.html" #copied to $log location at the end
|
19
|
9 title=$5
|
22
|
10 include_fr1=$6
|
34
|
11 functionality=$7
|
|
12 unique=$8
|
69
|
13 naive_output_ca=$9
|
|
14 naive_output_cg=${10}
|
|
15 naive_output_cm=${11}
|
|
16 filter_unique=${12}
|
|
17 class_filter=${13}
|
114
|
18 empty_region_filter=${14}
|
0
|
19 mkdir $outdir
|
|
20
|
110
|
21 tar -xzf $dir/style.tar.gz -C $outdir
|
|
22
|
55
|
23 echo "---------------- read parameters ----------------"
|
102
|
24 echo "---------------- read parameters ----------------<br />" > $log
|
55
|
25
|
|
26 echo "unpacking IMGT file"
|
|
27
|
35
|
28 type="`file $input`"
|
|
29 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
30 echo "Zip archive"
|
|
31 echo "unzip $input -d $PWD/files/"
|
|
32 unzip $input -d $PWD/files/
|
|
33 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
34 echo "ZX archive"
|
|
35 echo "tar -xJf $input -C $PWD/files/"
|
|
36 mkdir -p $PWD/files/$title
|
|
37 tar -xJf $input -C $PWD/files/$title
|
|
38 fi
|
|
39
|
64
|
40 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt
|
|
41 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt
|
|
42 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt
|
|
43 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt
|
|
44 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt
|
|
45 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt
|
|
46 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt
|
|
47
|
119
|
48 if [[ ${#BLASTN_DIR} -ge 5 ]] ; then
|
|
49 echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}"
|
|
50 else
|
|
51 BLASTN_DIR="/home/galaxy/Downloads/ncbi-blast-2.4.0+/bin"
|
|
52 echo "Dev Galaxy set BLASTN_DIR to: ${BLASTN_DIR}"
|
|
53 fi
|
19
|
54
|
89
|
55 echo "---------------- identification ($method) ----------------"
|
102
|
56 echo "---------------- identification ($method) ----------------<br />" >> $log
|
55
|
57
|
19
|
58 if [[ "${method}" == "custom" ]] ; then
|
|
59 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
60 else
|
119
|
61 echo "---------------- summary_to_fasta.py ----------------"
|
|
62 echo "---------------- summary_to_fasta.py ----------------<br />" >> $log
|
110
|
63
|
119
|
64 python $dir/summary_to_fasta.py --input $PWD/summary.txt --fasta $PWD/sequences.fasta
|
|
65
|
19
|
66 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
67 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
68 fi
|
|
69
|
55
|
70 echo "---------------- merge_and_filter.r ----------------"
|
102
|
71 echo "---------------- merge_and_filter.r ----------------<br />" >> $log
|
19
|
72
|
114
|
73 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1
|
0
|
74
|
98
|
75 echo "---------------- creating new IMGT zip ----------------"
|
102
|
76 echo "---------------- creating new IMGT zip ----------------<br />" >> $log
|
95
|
77
|
|
78 mkdir $outdir/new_IMGT
|
|
79
|
|
80 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt"
|
|
81 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
|
|
82 cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt"
|
|
83 cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt"
|
|
84 cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt"
|
|
85 cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt"
|
|
86 cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt"
|
|
87 cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt"
|
|
88 cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt"
|
|
89 cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt"
|
|
90
|
99
|
91 mkdir $outdir/new_IMGT_ca
|
|
92 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca
|
|
93
|
116
|
94 mkdir $outdir/new_IMGT_ca1
|
|
95 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca1
|
|
96
|
|
97 mkdir $outdir/new_IMGT_ca2
|
|
98 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca2
|
|
99
|
99
|
100 mkdir $outdir/new_IMGT_cg
|
|
101 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg
|
|
102
|
116
|
103 mkdir $outdir/new_IMGT_cg1
|
|
104 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg1
|
|
105
|
|
106 mkdir $outdir/new_IMGT_cg2
|
|
107 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg2
|
|
108
|
|
109 mkdir $outdir/new_IMGT_cg3
|
|
110 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg3
|
|
111
|
|
112 mkdir $outdir/new_IMGT_cg4
|
|
113 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg4
|
|
114
|
99
|
115 mkdir $outdir/new_IMGT_cm
|
|
116 cp $outdir/new_IMGT/* $outdir/new_IMGT_cm
|
|
117
|
114
|
118 Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
|
116
|
119
|
114
|
120 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca/ $outdir/merged.txt "ca" 2>&1
|
116
|
121 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca1/ $outdir/merged.txt "ca1" 2>&1
|
|
122 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca2/ $outdir/merged.txt "ca2" 2>&1
|
|
123
|
114
|
124 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg/ $outdir/merged.txt "cg" 2>&1
|
116
|
125 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg1/ $outdir/merged.txt "cg1" 2>&1
|
|
126 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg2/ $outdir/merged.txt "cg2" 2>&1
|
|
127 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg3/ $outdir/merged.txt "cg3" 2>&1
|
|
128 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg4/ $outdir/merged.txt "cg4" 2>&1
|
|
129
|
114
|
130 Rscript $dir/new_imgt.r $outdir/new_IMGT_cm/ $outdir/merged.txt "cm" 2>&1
|
95
|
131
|
|
132
|
|
133 tmp="$PWD"
|
|
134 cd $outdir/new_IMGT/ #tar weirdness...
|
|
135 tar -cJf ../new_IMGT.txz *
|
|
136
|
99
|
137 cd $outdir/new_IMGT_ca/
|
|
138 tar -cJf ../new_IMGT_ca.txz *
|
|
139
|
117
|
140 cd $outdir/new_IMGT_ca1/
|
|
141 tar -cJf ../new_IMGT_ca1.txz *
|
|
142
|
|
143 cd $outdir/new_IMGT_ca2/
|
|
144 tar -cJf ../new_IMGT_ca2.txz *
|
|
145
|
99
|
146 cd $outdir/new_IMGT_cg/
|
|
147 tar -cJf ../new_IMGT_cg.txz *
|
|
148
|
117
|
149 cd $outdir/new_IMGT_cg1/
|
|
150 tar -cJf ../new_IMGT_cg1.txz *
|
|
151
|
|
152 cd $outdir/new_IMGT_cg2/
|
|
153 tar -cJf ../new_IMGT_cg2.txz *
|
|
154
|
|
155 cd $outdir/new_IMGT_cg3/
|
|
156 tar -cJf ../new_IMGT_cg3.txz *
|
|
157
|
|
158 cd $outdir/new_IMGT_cg4/
|
|
159 tar -cJf ../new_IMGT_cg4.txz *
|
|
160
|
99
|
161 cd $outdir/new_IMGT_cm/
|
|
162 tar -cJf ../new_IMGT_cm.txz *
|
|
163
|
95
|
164 cd $tmp
|
|
165
|
55
|
166 echo "---------------- mutation_analysis.r ----------------"
|
102
|
167 echo "---------------- mutation_analysis.r ----------------<br />" >> $log
|
55
|
168
|
82
|
169 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched"
|
4
|
170 echo "R mutation analysis"
|
82
|
171 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1
|
53
|
172
|
55
|
173
|
|
174 echo "---------------- mutation_analysis.py ----------------"
|
102
|
175 echo "---------------- mutation_analysis.py ----------------<br />" >> $log
|
55
|
176
|
82
|
177 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
55
|
178
|
|
179 echo "---------------- aa_histogram.r ----------------"
|
105
|
180 echo "---------------- aa_histogram.r ----------------<br />" >> $log
|
55
|
181
|
107
|
182 Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "ca,cg,cm" $outdir/ 2>&1
|
110
|
183 if [ -e "$outdir/aa_histogram_.png" ]; then
|
|
184 mv $outdir/aa_histogram_.png $outdir/aa_histogram.png
|
|
185 mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt
|
|
186 fi
|
4
|
187
|
0
|
188 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
189
|
53
|
190 funcs=(sum mean median)
|
110
|
191 funcs=(sum)
|
0
|
192
|
82
|
193 echo "---------------- sequence_overview.r ----------------"
|
102
|
194 echo "---------------- sequence_overview.r ----------------<br />" >> $log
|
82
|
195
|
|
196 mkdir $outdir/sequence_overview
|
|
197
|
100
|
198 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
|
82
|
199
|
|
200 echo "<table border='1'>" > $outdir/base_overview.html
|
|
201
|
92
|
202 while IFS=$'\t' read ID class seq A C G T
|
82
|
203 do
|
85
|
204 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
|
82
|
205 done < $outdir/sequence_overview/ntoverview.txt
|
|
206
|
62
|
207 echo "<html><center><h1>$title</h1></center>" > $output
|
121
|
208 echo "<meta name='viewport' content='width=device-width, initial-scale=1'>" >> $output
|
110
|
209 echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output
|
|
210 echo "<script type='text/javascript' src='tabber.js'></script>" >> $output
|
|
211 echo "<script type='text/javascript' src='script.js'></script>" >> $output
|
|
212 echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output
|
122
|
213 echo "<link rel='stylesheet' type='text/css' href='pure-min.css'>" >> $output
|
62
|
214
|
98
|
215 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`"
|
62
|
216 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
|
|
217 total_count=$((matched_count + unmatched_count))
|
|
218 perc_count=$((unmatched_count / total_count * 100))
|
|
219 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
|
|
220 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
|
|
221
|
|
222 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
|
|
223 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
|
|
224 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
|
|
225
|
55
|
226 echo "---------------- main tables ----------------"
|
102
|
227 echo "---------------- main tables ----------------<br />" >> $log
|
110
|
228
|
|
229 echo "<div class='tabber'>" >> $output
|
|
230 echo "<div class='tabbertab' title='SHM Overview'>" >> $output
|
|
231
|
53
|
232 for func in ${funcs[@]}
|
4
|
233 do
|
55
|
234
|
|
235 echo "---------------- $func table ----------------"
|
102
|
236 echo "---------------- $func table ----------------<br />" >> $log
|
55
|
237
|
94
|
238 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt
|
123
|
239
|
124
|
240 echo "---------------- pattern_plots.r ----------------"
|
|
241 echo "---------------- pattern_plots.r ----------------<br />" >> $log
|
|
242
|
|
243 Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/plot1 $outdir/plot2 $outdir/plot3 2>&1
|
|
244
|
121
|
245 echo "<table class='pure-table pure-table-striped'>" >> $output
|
|
246 echo "<thead><tr><th>info</th>" >> $output
|
53
|
247 for gene in ${genes[@]}
|
|
248 do
|
|
249 tmp=`cat $outdir/${gene}_${func}_n.txt`
|
|
250 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
251 done
|
78
|
252
|
114
|
253 tmp=`cat $outdir/all_${func}_n.txt`
|
|
254 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
|
78
|
255 tmp=`cat $outdir/unmatched_${func}_n.txt`
|
121
|
256 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th><tr></thead>" >> $output
|
4
|
257
|
78
|
258 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
|
53
|
259 do
|
|
260 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
261 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
262 else
|
114
|
263 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output
|
53
|
264 fi
|
94
|
265 done < $outdir/data_${func}.txt
|
|
266 echo "</table>" >> $output
|
|
267 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output
|
53
|
268 done
|
|
269
|
124
|
270 echo "<img src='plot1.png' /><br />" >> $output
|
|
271 echo "<img src='plot2.png' /><br />" >> $output
|
|
272 echo "<img src='plot3.png' /><br />" >> $output
|
|
273
|
110
|
274 echo "</div>" >> $output #SHM overview tab end
|
|
275
|
|
276 echo "---------------- images ----------------"
|
|
277 echo "---------------- images ----------------<br />" >> $log
|
|
278
|
|
279 echo "<div class='tabbertab' title='SHM Frequency'>" >> $output
|
|
280
|
|
281 if [ -a $outdir/scatter.png ]
|
|
282 then
|
|
283 echo "<img src='scatter.png'/><br />" >> $output
|
|
284 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
285 fi
|
|
286 if [ -a $outdir/frequency_ranges.png ]
|
|
287 then
|
|
288 echo "<img src='frequency_ranges.png'/><br />" >> $output
|
|
289 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
|
|
290 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
|
|
291 fi
|
|
292
|
|
293 echo "</div>" >> $output #SHM frequency tab end
|
|
294
|
|
295 echo "<div class='tabbertab' title='Transition tables'>" >> $output
|
|
296
|
114
|
297 echo "<table border='0'>" >> $output
|
|
298
|
110
|
299 for gene in ${genes[@]}
|
|
300 do
|
114
|
301 echo "<tr>" >> $output
|
|
302 echo "<td><h1>${gene}</h1></td>" >> $output
|
|
303 echo "<td><img src='transitions_heatmap_${gene}.png' /></td>" >> $output
|
|
304 echo "<td><img src='transitions_stacked_${gene}.png' /></td>" >> $output
|
121
|
305 echo "<td><table class='pure-table transition-table pure-table-bordered'>" >> $output
|
110
|
306 while IFS=, read from a c g t
|
|
307 do
|
|
308 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
309 done < $outdir/transitions_${gene}_sum.txt
|
114
|
310 echo "</table></td>" >> $output
|
|
311
|
|
312 echo "</tr>" >> $output
|
110
|
313 done
|
55
|
314
|
114
|
315 echo "<tr>" >> $output
|
|
316 echo "<td><h1>All</h1></td>" >> $output
|
|
317 echo "<td><img src='transitions_heatmap_all.png' /></td>" >> $output
|
|
318 echo "<td><img src='transitions_stacked_all.png' /></td>" >> $output
|
121
|
319 echo "<td><table class='pure-table transition-table pure-table-bordered'>" >> $output
|
110
|
320 while IFS=, read from a c g t
|
|
321 do
|
|
322 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
323 done < $outdir/transitions_all_sum.txt
|
114
|
324 echo "</table></td>" >> $output
|
|
325
|
|
326 echo "</tr>" >> $output
|
|
327
|
110
|
328 echo "</table>" >> $output
|
|
329
|
|
330 echo "</div>" >> $output #transition tables tab end
|
|
331
|
|
332 echo "<div class='tabbertab' title='Antigen Selection'>" >> $output
|
|
333
|
|
334 if [ -a $outdir/aa_histogram.png ]
|
|
335 then
|
|
336 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
337 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
|
338 echo "<img src='aa_histogram_ca.png'/><br />" >> $output
|
|
339 echo "<a href='aa_histogram_ca.txt'>download data</a><br />" >> $output
|
|
340 echo "<img src='aa_histogram_cg.png'/><br />" >> $output
|
|
341 echo "<a href='aa_histogram_cg.txt'>download data</a><br />" >> $output
|
|
342 echo "<img src='aa_histogram_cm.png'/><br />" >> $output
|
|
343 echo "<a href='aa_histogram_cm.txt'>download data</a><br />" >> $output
|
|
344 fi
|
|
345
|
|
346 echo "<embed src='baseline_ca.pdf' width='700px' height='1000px'>" >> $output
|
|
347 echo "<embed src='baseline_cg.pdf' width='700px' height='1000px'>" >> $output
|
|
348 echo "<embed src='baseline_cm.pdf' width='700px' height='1000px'>" >> $output
|
|
349
|
|
350 echo "</div>" >> $output #antigen selection tab end
|
|
351
|
126
|
352 echo "<div class='tabbertab' title='CSR'>" >> $output #CSR tab
|
110
|
353
|
|
354 if [ -a $outdir/ca.png ]
|
|
355 then
|
|
356 echo "<img src='ca.png'/><br />" >> $output
|
|
357 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
|
358 fi
|
|
359 if [ -a $outdir/cg.png ]
|
|
360 then
|
|
361 echo "<img src='cg.png'/><br />" >> $output
|
|
362 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
|
363 fi
|
|
364
|
|
365 echo "</div>" >> $output #CSR tab end
|
|
366
|
126
|
367 echo "---------------- change-o MakeDB ----------------"
|
|
368
|
|
369 mkdir $outdir/change_o
|
|
370
|
|
371 tmp="$PWD"
|
|
372
|
|
373 cd $outdir/change_o
|
|
374
|
127
|
375 bash $dir/change_o/makedb.sh $outdir/new_IMGT.txz false false false $outdir/change_o/change-o-db.txt
|
126
|
376 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt
|
|
377
|
|
378 Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1
|
|
379
|
|
380 echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1"
|
|
381
|
|
382 if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then
|
|
383 bash $dir/change_o/makedb.sh $outdir/new_IMGT_ca.txz false false false $outdir/change_o/change-o-db-ca.txt
|
|
384 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-ca.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-ca.txt $outdir/change_o/change-o-defined_clones-summary-ca.txt
|
|
385 else
|
|
386 echo "No ca sequences" > "$outdir/change_o/change-o-db-defined_clones-ca.txt"
|
|
387 echo "No ca sequences" > "$outdir/change_o/change-o-defined_clones-summary-ca.txt"
|
|
388 fi
|
|
389
|
|
390 if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then
|
|
391 bash $dir/change_o/makedb.sh $outdir/new_IMGT_cg.txz false false false $outdir/change_o/change-o-db-cg.txt
|
|
392 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-cg.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-cg.txt $outdir/change_o/change-o-defined_clones-summary-cg.txt
|
|
393 else
|
|
394 echo "No cg sequences" > "$outdir/change_o/change-o-db-defined_clones-cg.txt"
|
|
395 echo "No cg sequences" > "$outdir/change_o/change-o-defined_clones-summary-cg.txt"
|
|
396 fi
|
|
397
|
|
398 if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then
|
|
399 bash $dir/change_o/makedb.sh $outdir/new_IMGT_cm.txz false false false $outdir/change_o/change-o-db-cm.txt
|
|
400 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-cm.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-cm.txt $outdir/change_o/change-o-defined_clones-summary-cm.txt
|
|
401 else
|
|
402 echo "No cm sequences" > "$outdir/change_o/change-o-db-defined_clones-cm.txt"
|
|
403 echo "No cm sequences" > "$outdir/change_o/change-o-defined_clones-summary-cm.txt"
|
|
404 fi
|
|
405
|
|
406 PWD="$tmp"
|
|
407
|
|
408 echo "<div class='tabbertab' title='Clonality'>" >> $output #clonality tab
|
|
409
|
|
410 function clonality_table {
|
|
411 local infile=$1
|
|
412 local outfile=$2
|
|
413
|
|
414 echo "<table class='pure-table pure-table-striped'>" >> $outfile
|
|
415 echo "<thead><tr><th>Clone size</th><th>Nr of clones</th><th>Nr of sequences</th></tr></thead>" >> $outfile
|
|
416
|
|
417 first='true'
|
|
418
|
|
419 while read size clones seqs
|
|
420 do
|
|
421 if [[ "$first" == "true" ]]; then
|
|
422 first="false"
|
|
423 continue
|
|
424 fi
|
|
425 echo "<tr><td>$size</td><td>$clones</td><td>$seqs</td></tr>" >> $outfile
|
|
426 done < $infile
|
|
427
|
|
428 echo "</table>" >> $outfile
|
|
429 }
|
|
430 echo "<div class='tabber'>" >> $output
|
|
431
|
|
432 echo "<div class='tabbertab' title='All'>" >> $output
|
|
433 clonality_table $outdir/change_o/change-o-defined_clones-summary.txt $output
|
|
434 echo "</div>" >> $output
|
|
435
|
|
436 echo "<div class='tabbertab' title='Ca'>" >> $output
|
|
437 clonality_table $outdir/change_o/change-o-defined_clones-summary-ca.txt $output
|
|
438 echo "</div>" >> $output
|
|
439
|
|
440 echo "<div class='tabbertab' title='Cg'>" >> $output
|
|
441 clonality_table $outdir/change_o/change-o-defined_clones-summary-cg.txt $output
|
|
442 echo "</div>" >> $output
|
|
443
|
|
444 echo "<div class='tabbertab' title='Cm'>" >> $output
|
|
445 clonality_table $outdir/change_o/change-o-defined_clones-summary-cm.txt $output
|
|
446 echo "</div>" >> $output
|
|
447
|
|
448 echo "</div>" >> $output #clonality tabber end
|
|
449
|
|
450 echo "</div>" >> $output #clonality tab end
|
|
451
|
110
|
452 echo "<div class='tabbertab' title='Downloads'>" >> $output
|
94
|
453
|
121
|
454 echo "<table class='pure-table pure-table-striped'>" >> $output
|
|
455 echo "<thead><tr><th>info</th><th>link</th></tr></thead>" >> $output
|
127
|
456 echo "<tr><td>The complete dataset</td><td><a href='merged.txt' download='merged.txt' >Download</a></td></tr>" >> $output
|
|
457 echo "<tr><td>The filtered dataset</td><td><a href='filtered.txt' download='filtered.txt' >Download</a></td></tr>" >> $output
|
|
458 echo "<tr><td>The SHM Overview table as a dataset</td><td><a href='data_sum.txt' download='data_sum.txt' >Download</a></td></tr>" >> $output
|
|
459 echo "<tr><td>The data used to generate the first SHM Overview plot</td><td><a href='plot1.txt' download='plot1.txt' >Download</a></td></tr>" >> $output
|
|
460 echo "<tr><td>The data used to generate the second SHM Overview plot</td><td><a href='plot2.txt' download='plot2.txt' >Download</a></td></tr>" >> $output
|
|
461 echo "<tr><td>The data used to generate the third SHM Overview plot</td><td><a href='plot3.txt' download='plot3.txt' >Download</a></td></tr>" >> $output
|
|
462 echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt' download='unmatched.txt' >Download</a></td></tr>" >> $output
|
125
|
463
|
127
|
464 echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt' download='motif_per_seq.txt' >Download</a></td></tr>" >> $output
|
|
465 echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt' download='mutation_by_id.txt' >Download</a></td></tr>" >> $output
|
|
466 echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt' download='aa_id_mutations.txt' >Download</a></td></tr>" >> $output
|
|
467 echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt' download='absent_aa_id.txt' >Download</a></td></tr>" >> $output
|
|
468 echo "<tr><td>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</td><td><a href='sequence_overview/index.html'>View</a></td></tr>" >> $output
|
125
|
469
|
127
|
470 echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>View</a></td></tr>" >> $output
|
125
|
471
|
127
|
472 echo "<tr><td>Baseline PDF (<a href='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf' download='baseline.pdf' >Download</a></td></tr>" >> $output
|
|
473 echo "<tr><td>Baseline data</td><td><a href='baseline.txt' download='baseline.txt' >Download</a></td></tr>" >> $output
|
|
474 echo "<tr><td>Baseline ca PDF</td><td><a href='baseline_ca.pdf' download='baseline_ca.pdf' >Download</a></td></tr>" >> $output
|
|
475 echo "<tr><td>Baseline ca data</td><td><a href='baseline_ca.txt' download='baseline_ca.txt' >Download</a></td></tr>" >> $output
|
|
476 echo "<tr><td>Baseline cg PDF</td><td><a href='baseline_cg.pdf' download='baseline_cg.pdf' >Download</a></td></tr>" >> $output
|
|
477 echo "<tr><td>Baseline cg data</td><td><a href='baseline_cg.txt' download='baseline_cg.txt' >Download</a></td></tr>" >> $output
|
|
478 echo "<tr><td>Baseline cm PDF</td><td><a href='baseline_cm.pdf' download='baseline_cm.pdf' >Download</a></td></tr>" >> $output
|
|
479 echo "<tr><td>Baseline cm data</td><td><a href='baseline_cm.txt' download='baseline_cm.txt' >Download</a></td></tr>" >> $output
|
125
|
480
|
127
|
481 echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz' download='new_IMGT.txz' >Download</a></td></tr>" >> $output
|
|
482 echo "<tr><td>An IMGT archive with just the matched and filtered ca sequences</td><td><a href='new_IMGT_ca.txz' download='new_IMGT_ca.txz' >Download</a></td></tr>" >> $output
|
|
483 echo "<tr><td>An IMGT archive with just the matched and filtered ca1 sequences</td><td><a href='new_IMGT_ca1.txz' download='new_IMGT_ca1.txz' >Download</a></td></tr>" >> $output
|
|
484 echo "<tr><td>An IMGT archive with just the matched and filtered ca2 sequences</td><td><a href='new_IMGT_ca2.txz' download='new_IMGT_ca2.txz' >Download</a></td></tr>" >> $output
|
|
485 echo "<tr><td>An IMGT archive with just the matched and filtered cg sequences</td><td><a href='new_IMGT_cg.txz' download='new_IMGT_cg.txz' >Download</a></td></tr>" >> $output
|
|
486 echo "<tr><td>An IMGT archive with just the matched and filtered cg1 sequences</td><td><a href='new_IMGT_cg1.txz' download='new_IMGT_cg1.txz' >Download</a></td></tr>" >> $output
|
|
487 echo "<tr><td>An IMGT archive with just the matched and filtered cg2 sequences</td><td><a href='new_IMGT_cg2.txz' download='new_IMGT_cg2.txz' >Download</a></td></tr>" >> $output
|
|
488 echo "<tr><td>An IMGT archive with just the matched and filtered cg3 sequences</td><td><a href='new_IMGT_cg3.txz' download='new_IMGT_cg3.txz' >Download</a></td></tr>" >> $output
|
|
489 echo "<tr><td>An IMGT archive with just the matched and filtered cg4 sequences</td><td><a href='new_IMGT_cg4.txz' download='new_IMGT_cg4.txz' >Download</a></td></tr>" >> $output
|
|
490 echo "<tr><td>An IMGT archive with just the matched and filtered cm sequences</td><td><a href='new_IMGT_cm.txz' download='new_IMGT_cm.txz' >Download</a></td></tr>" >> $output
|
125
|
491
|
127
|
492 echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href='change_o/change-o-db-defined_clones.txt' download='change_o/change-o-db-defined_clones.txt' >Download</a></td></tr>" >> $output
|
|
493 echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href='change_o/change-o-defined_clones-summary.txt' download='change_o/change-o-defined_clones-summary.txt' >Download</a></td></tr>" >> $output
|
|
494 echo "<tr><td>The Change-O DB file with defined clones of ca</td><td><a href='change_o/change-o-db-defined_clones-ca.txt' download='change_o/change-o-db-defined_clones-ca.txt' >Download</a></td></tr>" >> $output
|
|
495 echo "<tr><td>The Change-O DB defined clones summary file of ca</td><td><a href='change_o/change-o-defined_clones-summary-ca.txt' download='change_o/change-o-defined_clones-summary-ca.txt' >Download</a></td></tr>" >> $output
|
|
496 echo "<tr><td>The Change-O DB file with defined clones of cg</td><td><a href='change_o/change-o-db-defined_clones-cg.txt' download='change_o/change-o-db-defined_clones-cg.txt' >Download</a></td></tr>" >> $output
|
|
497 echo "<tr><td>The Change-O DB defined clones summary file of cg</td><td><a href='change_o/change-o-defined_clones-summary-cg.txt' download='change_o/change-o-defined_clones-summary-cg.txt' >Download</a></td></tr>" >> $output
|
|
498 echo "<tr><td>The Change-O DB file with defined clones of cm</td><td><a href='change_o/change-o-db-defined_clones-cm.txt' download='change_o/change-o-db-defined_clones-cm.txt' >Download</a></td></tr>" >> $output
|
|
499 echo "<tr><td>The Change-O DB defined clones summary file of cm</td><td><a href='change_o/change-o-defined_clones-summary-cm.txt' download='change_o/change-o-defined_clones-summary-cm.txt' >Download</a></td></tr>" >> $output
|
125
|
500
|
114
|
501 echo "</table>" >> $output
|
99
|
502
|
110
|
503 echo "</div>" >> $output #downloads tab end
|
55
|
504
|
110
|
505 echo "</div>" >> $output #tabs end
|
0
|
506
|
|
507 echo "</html>" >> $output
|
2
|
508
|
95
|
509 echo "---------------- baseline ----------------"
|
102
|
510 echo "---------------- baseline ----------------<br />" >> $log
|
101
|
511 tmp="$PWD"
|
|
512
|
|
513 mkdir $outdir/baseline
|
|
514
|
|
515
|
|
516 mkdir $outdir/baseline/ca_cg_cm
|
102
|
517 if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
|
|
518 cd $outdir/baseline/ca_cg_cm
|
114
|
519 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
|
102
|
520 else
|
|
521 echo "No sequences" > "$outdir/baseline.txt"
|
|
522 fi
|
101
|
523
|
|
524 mkdir $outdir/baseline/ca
|
102
|
525 if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then
|
|
526 cd $outdir/baseline/ca
|
114
|
527 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt"
|
102
|
528 else
|
|
529 echo "No ca sequences" > "$outdir/baseline_ca.txt"
|
|
530 fi
|
101
|
531
|
|
532 mkdir $outdir/baseline/cg
|
102
|
533 if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then
|
|
534 cd $outdir/baseline/cg
|
114
|
535 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt"
|
102
|
536 else
|
|
537 echo "No cg sequences" > "$outdir/baseline_cg.txt"
|
|
538 fi
|
101
|
539
|
|
540 mkdir $outdir/baseline/cm
|
102
|
541 if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then
|
|
542 cd $outdir/baseline/cm
|
114
|
543 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt"
|
102
|
544 else
|
|
545 echo "No cm sequences" > "$outdir/baseline_cm.txt"
|
|
546 fi
|
47
|
547
|
101
|
548 cd $tmp
|
|
549
|
82
|
550 echo "---------------- naive_output.r ----------------"
|
102
|
551 echo "---------------- naive_output.r ----------------<br />" >> $log
|
55
|
552
|
47
|
553 if [[ "$naive_output" != "None" ]]
|
|
554 then
|
114
|
555 cp $outdir/new_IMGT_ca.txz ${naive_output_ca}
|
|
556 cp $outdir/new_IMGT_cg.txz ${naive_output_cg}
|
|
557 cp $outdir/new_IMGT_cm.txz ${naive_output_cm}
|
47
|
558 fi
|
|
559
|
81
|
560 echo "</table>" >> $outdir/base_overview.html
|
|
561
|
105
|
562 mv $log $outdir/log.html
|
|
563
|
110
|
564 echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log
|
|
565 echo "<table border = 1>" >> $log
|
|
566 echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log
|
|
567 tIFS="$TMP"
|
|
568 IFS=$'\t'
|
|
569 while read step seq perc
|
|
570 do
|
|
571 echo "<tr>" >> $log
|
|
572 echo "<td>$step</td>" >> $log
|
|
573 echo "<td>$seq</td>" >> $log
|
|
574 echo "<td>${perc}%</td>" >> $log
|
|
575 echo "</tr>" >> $log
|
|
576 done < $outdir/filtering_steps.txt
|
|
577 echo "</table border></center></html>" >> $log
|
|
578
|
|
579 IFS="$tIFS"
|
|
580
|
105
|
581
|
81
|
582 echo "---------------- Done! ----------------"
|
107
|
583 echo "---------------- Done! ----------------<br />" >> $outdir/log.html
|
47
|
584
|
110
|
585
|
|
586
|
|
587
|
|
588
|
|
589
|
|
590
|
|
591
|
|
592
|
|
593
|
|
594
|
|
595
|
|
596
|
|
597
|
|
598
|
|
599
|
|
600
|
|
601
|
|
602
|
|
603
|
|
604
|