0
|
1 #!/bin/bash
|
110
|
2 #set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
102
|
6 log=$3 #becomes the main html page at the end
|
19
|
7 outdir=$4
|
102
|
8 output="$outdir/index.html" #copied to $log location at the end
|
19
|
9 title=$5
|
22
|
10 include_fr1=$6
|
34
|
11 functionality=$7
|
|
12 unique=$8
|
69
|
13 naive_output_ca=$9
|
|
14 naive_output_cg=${10}
|
|
15 naive_output_cm=${11}
|
|
16 filter_unique=${12}
|
|
17 class_filter=${13}
|
114
|
18 empty_region_filter=${14}
|
0
|
19 mkdir $outdir
|
|
20
|
110
|
21 tar -xzf $dir/style.tar.gz -C $outdir
|
|
22
|
55
|
23 echo "---------------- read parameters ----------------"
|
102
|
24 echo "---------------- read parameters ----------------<br />" > $log
|
55
|
25
|
|
26 echo "unpacking IMGT file"
|
|
27
|
35
|
28 type="`file $input`"
|
|
29 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
30 echo "Zip archive"
|
|
31 echo "unzip $input -d $PWD/files/"
|
|
32 unzip $input -d $PWD/files/
|
|
33 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
34 echo "ZX archive"
|
|
35 echo "tar -xJf $input -C $PWD/files/"
|
|
36 mkdir -p $PWD/files/$title
|
|
37 tar -xJf $input -C $PWD/files/$title
|
|
38 fi
|
|
39
|
64
|
40 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt
|
|
41 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt
|
|
42 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt
|
|
43 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt
|
|
44 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt
|
|
45 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt
|
|
46 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt
|
|
47
|
119
|
48 if [[ ${#BLASTN_DIR} -ge 5 ]] ; then
|
|
49 echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}"
|
|
50 else
|
|
51 BLASTN_DIR="/home/galaxy/Downloads/ncbi-blast-2.4.0+/bin"
|
|
52 echo "Dev Galaxy set BLASTN_DIR to: ${BLASTN_DIR}"
|
|
53 fi
|
19
|
54
|
89
|
55 echo "---------------- identification ($method) ----------------"
|
102
|
56 echo "---------------- identification ($method) ----------------<br />" >> $log
|
55
|
57
|
19
|
58 if [[ "${method}" == "custom" ]] ; then
|
|
59 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
60 else
|
119
|
61 echo "---------------- summary_to_fasta.py ----------------"
|
|
62 echo "---------------- summary_to_fasta.py ----------------<br />" >> $log
|
110
|
63
|
119
|
64 python $dir/summary_to_fasta.py --input $PWD/summary.txt --fasta $PWD/sequences.fasta
|
|
65
|
19
|
66 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
67 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
68 fi
|
|
69
|
55
|
70 echo "---------------- merge_and_filter.r ----------------"
|
102
|
71 echo "---------------- merge_and_filter.r ----------------<br />" >> $log
|
19
|
72
|
114
|
73 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1
|
0
|
74
|
98
|
75 echo "---------------- creating new IMGT zip ----------------"
|
102
|
76 echo "---------------- creating new IMGT zip ----------------<br />" >> $log
|
95
|
77
|
|
78 mkdir $outdir/new_IMGT
|
|
79
|
|
80 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt"
|
|
81 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
|
|
82 cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt"
|
|
83 cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt"
|
|
84 cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt"
|
|
85 cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt"
|
|
86 cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt"
|
|
87 cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt"
|
|
88 cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt"
|
|
89 cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt"
|
|
90
|
99
|
91 mkdir $outdir/new_IMGT_ca
|
|
92 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca
|
|
93
|
116
|
94 mkdir $outdir/new_IMGT_ca1
|
|
95 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca1
|
|
96
|
|
97 mkdir $outdir/new_IMGT_ca2
|
|
98 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca2
|
|
99
|
99
|
100 mkdir $outdir/new_IMGT_cg
|
|
101 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg
|
|
102
|
116
|
103 mkdir $outdir/new_IMGT_cg1
|
|
104 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg1
|
|
105
|
|
106 mkdir $outdir/new_IMGT_cg2
|
|
107 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg2
|
|
108
|
|
109 mkdir $outdir/new_IMGT_cg3
|
|
110 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg3
|
|
111
|
|
112 mkdir $outdir/new_IMGT_cg4
|
|
113 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg4
|
|
114
|
99
|
115 mkdir $outdir/new_IMGT_cm
|
|
116 cp $outdir/new_IMGT/* $outdir/new_IMGT_cm
|
|
117
|
114
|
118 Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
|
116
|
119
|
114
|
120 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca/ $outdir/merged.txt "ca" 2>&1
|
116
|
121 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca1/ $outdir/merged.txt "ca1" 2>&1
|
|
122 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca2/ $outdir/merged.txt "ca2" 2>&1
|
|
123
|
114
|
124 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg/ $outdir/merged.txt "cg" 2>&1
|
116
|
125 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg1/ $outdir/merged.txt "cg1" 2>&1
|
|
126 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg2/ $outdir/merged.txt "cg2" 2>&1
|
|
127 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg3/ $outdir/merged.txt "cg3" 2>&1
|
|
128 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg4/ $outdir/merged.txt "cg4" 2>&1
|
|
129
|
114
|
130 Rscript $dir/new_imgt.r $outdir/new_IMGT_cm/ $outdir/merged.txt "cm" 2>&1
|
95
|
131
|
|
132
|
|
133 tmp="$PWD"
|
|
134 cd $outdir/new_IMGT/ #tar weirdness...
|
|
135 tar -cJf ../new_IMGT.txz *
|
|
136
|
99
|
137 cd $outdir/new_IMGT_ca/
|
|
138 tar -cJf ../new_IMGT_ca.txz *
|
|
139
|
117
|
140 cd $outdir/new_IMGT_ca1/
|
|
141 tar -cJf ../new_IMGT_ca1.txz *
|
|
142
|
|
143 cd $outdir/new_IMGT_ca2/
|
|
144 tar -cJf ../new_IMGT_ca2.txz *
|
|
145
|
99
|
146 cd $outdir/new_IMGT_cg/
|
|
147 tar -cJf ../new_IMGT_cg.txz *
|
|
148
|
117
|
149 cd $outdir/new_IMGT_cg1/
|
|
150 tar -cJf ../new_IMGT_cg1.txz *
|
|
151
|
|
152 cd $outdir/new_IMGT_cg2/
|
|
153 tar -cJf ../new_IMGT_cg2.txz *
|
|
154
|
|
155 cd $outdir/new_IMGT_cg3/
|
|
156 tar -cJf ../new_IMGT_cg3.txz *
|
|
157
|
|
158 cd $outdir/new_IMGT_cg4/
|
|
159 tar -cJf ../new_IMGT_cg4.txz *
|
|
160
|
99
|
161 cd $outdir/new_IMGT_cm/
|
|
162 tar -cJf ../new_IMGT_cm.txz *
|
|
163
|
95
|
164 cd $tmp
|
|
165
|
55
|
166 echo "---------------- mutation_analysis.r ----------------"
|
102
|
167 echo "---------------- mutation_analysis.r ----------------<br />" >> $log
|
55
|
168
|
82
|
169 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched"
|
4
|
170 echo "R mutation analysis"
|
82
|
171 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1
|
53
|
172
|
55
|
173
|
|
174 echo "---------------- mutation_analysis.py ----------------"
|
102
|
175 echo "---------------- mutation_analysis.py ----------------<br />" >> $log
|
55
|
176
|
82
|
177 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
55
|
178
|
|
179 echo "---------------- aa_histogram.r ----------------"
|
105
|
180 echo "---------------- aa_histogram.r ----------------<br />" >> $log
|
55
|
181
|
107
|
182 Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "ca,cg,cm" $outdir/ 2>&1
|
110
|
183 if [ -e "$outdir/aa_histogram_.png" ]; then
|
|
184 mv $outdir/aa_histogram_.png $outdir/aa_histogram.png
|
|
185 mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt
|
|
186 fi
|
4
|
187
|
0
|
188 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
189
|
53
|
190 funcs=(sum mean median)
|
110
|
191 funcs=(sum)
|
0
|
192
|
82
|
193 echo "---------------- sequence_overview.r ----------------"
|
102
|
194 echo "---------------- sequence_overview.r ----------------<br />" >> $log
|
82
|
195
|
|
196 mkdir $outdir/sequence_overview
|
|
197
|
100
|
198 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
|
82
|
199
|
|
200 echo "<table border='1'>" > $outdir/base_overview.html
|
|
201
|
92
|
202 while IFS=$'\t' read ID class seq A C G T
|
82
|
203 do
|
85
|
204 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
|
82
|
205 done < $outdir/sequence_overview/ntoverview.txt
|
|
206
|
62
|
207 echo "<html><center><h1>$title</h1></center>" > $output
|
121
|
208 echo "<meta name='viewport' content='width=device-width, initial-scale=1'>" >> $output
|
110
|
209 echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output
|
|
210 echo "<script type='text/javascript' src='tabber.js'></script>" >> $output
|
|
211 echo "<script type='text/javascript' src='script.js'></script>" >> $output
|
|
212 echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output
|
122
|
213 echo "<link rel='stylesheet' type='text/css' href='pure-min.css'>" >> $output
|
62
|
214
|
98
|
215 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`"
|
62
|
216 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
|
|
217 total_count=$((matched_count + unmatched_count))
|
|
218 perc_count=$((unmatched_count / total_count * 100))
|
|
219 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
|
|
220 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
|
|
221
|
|
222 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
|
|
223 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
|
|
224 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
|
|
225
|
55
|
226 echo "---------------- main tables ----------------"
|
102
|
227 echo "---------------- main tables ----------------<br />" >> $log
|
110
|
228
|
|
229 echo "<div class='tabber'>" >> $output
|
|
230 echo "<div class='tabbertab' title='SHM Overview'>" >> $output
|
|
231
|
53
|
232 for func in ${funcs[@]}
|
4
|
233 do
|
55
|
234
|
|
235 echo "---------------- $func table ----------------"
|
102
|
236 echo "---------------- $func table ----------------<br />" >> $log
|
55
|
237
|
94
|
238 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt
|
123
|
239
|
124
|
240 echo "---------------- pattern_plots.r ----------------"
|
|
241 echo "---------------- pattern_plots.r ----------------<br />" >> $log
|
|
242
|
|
243 Rscript $dir/pattern_plots.r $outdir/data_${func}.txt $outdir/plot1 $outdir/plot2 $outdir/plot3 2>&1
|
|
244
|
121
|
245 echo "<table class='pure-table pure-table-striped'>" >> $output
|
|
246 echo "<thead><tr><th>info</th>" >> $output
|
53
|
247 for gene in ${genes[@]}
|
|
248 do
|
|
249 tmp=`cat $outdir/${gene}_${func}_n.txt`
|
|
250 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
251 done
|
78
|
252
|
114
|
253 tmp=`cat $outdir/all_${func}_n.txt`
|
|
254 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
|
78
|
255 tmp=`cat $outdir/unmatched_${func}_n.txt`
|
121
|
256 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th><tr></thead>" >> $output
|
4
|
257
|
78
|
258 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
|
53
|
259 do
|
|
260 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
261 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
262 else
|
114
|
263 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output
|
53
|
264 fi
|
94
|
265 done < $outdir/data_${func}.txt
|
|
266 echo "</table>" >> $output
|
|
267 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output
|
53
|
268 done
|
|
269
|
124
|
270 echo "<img src='plot1.png' /><br />" >> $output
|
|
271 echo "<img src='plot2.png' /><br />" >> $output
|
|
272 echo "<img src='plot3.png' /><br />" >> $output
|
|
273
|
110
|
274 echo "</div>" >> $output #SHM overview tab end
|
|
275
|
|
276 echo "---------------- images ----------------"
|
|
277 echo "---------------- images ----------------<br />" >> $log
|
|
278
|
|
279 echo "<div class='tabbertab' title='SHM Frequency'>" >> $output
|
|
280
|
|
281 if [ -a $outdir/scatter.png ]
|
|
282 then
|
|
283 echo "<img src='scatter.png'/><br />" >> $output
|
|
284 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
285 fi
|
|
286 if [ -a $outdir/frequency_ranges.png ]
|
|
287 then
|
|
288 echo "<img src='frequency_ranges.png'/><br />" >> $output
|
|
289 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
|
|
290 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
|
|
291 fi
|
|
292
|
|
293 echo "</div>" >> $output #SHM frequency tab end
|
|
294
|
|
295 echo "<div class='tabbertab' title='Transition tables'>" >> $output
|
|
296
|
114
|
297 echo "<table border='0'>" >> $output
|
|
298
|
110
|
299 for gene in ${genes[@]}
|
|
300 do
|
114
|
301 echo "<tr>" >> $output
|
|
302 echo "<td><h1>${gene}</h1></td>" >> $output
|
|
303 echo "<td><img src='transitions_heatmap_${gene}.png' /></td>" >> $output
|
|
304 echo "<td><img src='transitions_stacked_${gene}.png' /></td>" >> $output
|
121
|
305 echo "<td><table class='pure-table transition-table pure-table-bordered'>" >> $output
|
110
|
306 while IFS=, read from a c g t
|
|
307 do
|
|
308 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
309 done < $outdir/transitions_${gene}_sum.txt
|
114
|
310 echo "</table></td>" >> $output
|
|
311
|
|
312 echo "</tr>" >> $output
|
110
|
313 done
|
55
|
314
|
114
|
315 echo "<tr>" >> $output
|
|
316 echo "<td><h1>All</h1></td>" >> $output
|
|
317 echo "<td><img src='transitions_heatmap_all.png' /></td>" >> $output
|
|
318 echo "<td><img src='transitions_stacked_all.png' /></td>" >> $output
|
121
|
319 echo "<td><table class='pure-table transition-table pure-table-bordered'>" >> $output
|
110
|
320 while IFS=, read from a c g t
|
|
321 do
|
|
322 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
323 done < $outdir/transitions_all_sum.txt
|
114
|
324 echo "</table></td>" >> $output
|
|
325
|
|
326 echo "</tr>" >> $output
|
|
327
|
110
|
328 echo "</table>" >> $output
|
|
329
|
|
330 echo "</div>" >> $output #transition tables tab end
|
|
331
|
|
332 echo "<div class='tabbertab' title='Antigen Selection'>" >> $output
|
|
333
|
|
334 if [ -a $outdir/aa_histogram.png ]
|
|
335 then
|
|
336 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
337 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
|
338 echo "<img src='aa_histogram_ca.png'/><br />" >> $output
|
|
339 echo "<a href='aa_histogram_ca.txt'>download data</a><br />" >> $output
|
|
340 echo "<img src='aa_histogram_cg.png'/><br />" >> $output
|
|
341 echo "<a href='aa_histogram_cg.txt'>download data</a><br />" >> $output
|
|
342 echo "<img src='aa_histogram_cm.png'/><br />" >> $output
|
|
343 echo "<a href='aa_histogram_cm.txt'>download data</a><br />" >> $output
|
|
344 fi
|
|
345
|
|
346 echo "<embed src='baseline_ca.pdf' width='700px' height='1000px'>" >> $output
|
|
347 echo "<embed src='baseline_cg.pdf' width='700px' height='1000px'>" >> $output
|
|
348 echo "<embed src='baseline_cm.pdf' width='700px' height='1000px'>" >> $output
|
|
349
|
|
350 echo "</div>" >> $output #antigen selection tab end
|
|
351
|
|
352 echo "<div class='tabbertab' title='CSR'>" >> $output
|
|
353
|
|
354 if [ -a $outdir/ca.png ]
|
|
355 then
|
|
356 echo "<img src='ca.png'/><br />" >> $output
|
|
357 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
|
358 fi
|
|
359 if [ -a $outdir/cg.png ]
|
|
360 then
|
|
361 echo "<img src='cg.png'/><br />" >> $output
|
|
362 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
|
363 fi
|
|
364
|
|
365 echo "</div>" >> $output #CSR tab end
|
|
366
|
|
367 echo "<div class='tabbertab' title='Downloads'>" >> $output
|
94
|
368
|
121
|
369 echo "<table class='pure-table pure-table-striped'>" >> $output
|
|
370 echo "<thead><tr><th>info</th><th>link</th></tr></thead>" >> $output
|
114
|
371 echo "<tr><td>The complete dataset</td><td><a href='merged.txt'>Download</a></td></tr>" >> $output
|
123
|
372 echo "<tr><td>The SHM Overview table as a dataset</td><td><a href='data_sum.txt'>Download</a></td></tr>" >> $output
|
124
|
373 echo "<tr><td>The data used to generate the first SHM Overview plot</td><td><a href='plot1.txt'>Download</a></td></tr>" >> $output
|
|
374 echo "<tr><td>The data used to generate the sexond SHM Overview plot</td><td><a href='plot2.txt'>Download</a></td></tr>" >> $output
|
|
375 echo "<tr><td>The data used to generate the third SHM Overview plot</td><td><a href='plot3.txt'>Download</a></td></tr>" >> $output
|
114
|
376 echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt'>Download</a></td></tr>" >> $output
|
125
|
377
|
114
|
378 echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt'>Download</a></td></tr>" >> $output
|
|
379 echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt'>Download</a></td></tr>" >> $output
|
|
380 echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt'>Download</a></td></tr>" >> $output
|
|
381 echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt'>Download</a></td></tr>" >> $output
|
|
382 echo "<tr><td>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</td><td><a href='sequence_overview/index.html'>Download</a></td></tr>" >> $output
|
125
|
383
|
114
|
384 echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>Download</a></td></tr>" >> $output
|
125
|
385
|
121
|
386 echo "<tr><td>Baseline PDF (<a href='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf'>Download</a></td></tr>" >> $output
|
114
|
387 echo "<tr><td>Baseline data</td><td><a href='baseline.txt'>Download</a></td></tr>" >> $output
|
|
388 echo "<tr><td>Baseline ca PDF</td><td><a href='baseline_ca.pdf'>Download</a></td></tr>" >> $output
|
|
389 echo "<tr><td>Baseline ca data</td><td><a href='baseline_ca.txt'>Download</a></td></tr>" >> $output
|
|
390 echo "<tr><td>Baseline cg PDF</td><td><a href='baseline_cg.pdf'>Download</a></td></tr>" >> $output
|
|
391 echo "<tr><td>Baseline cg data</td><td><a href='baseline_cg.txt'>Download</a></td></tr>" >> $output
|
|
392 echo "<tr><td>Baseline cm PDF</td><td><a href='baseline_cm.pdf'>Download</a></td></tr>" >> $output
|
|
393 echo "<tr><td>Baseline cm data</td><td><a href='baseline_cm.txt'>Download</a></td></tr>" >> $output
|
125
|
394
|
114
|
395 echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz'>Download</a></td></tr>" >> $output
|
|
396 echo "<tr><td>An IMGT archive with just the matched and filtered ca sequences</td><td><a href='new_IMGT_ca.txz'>Download</a></td></tr>" >> $output
|
116
|
397 echo "<tr><td>An IMGT archive with just the matched and filtered ca1 sequences</td><td><a href='new_IMGT_ca1.txz'>Download</a></td></tr>" >> $output
|
|
398 echo "<tr><td>An IMGT archive with just the matched and filtered ca2 sequences</td><td><a href='new_IMGT_ca2.txz'>Download</a></td></tr>" >> $output
|
114
|
399 echo "<tr><td>An IMGT archive with just the matched and filtered cg sequences</td><td><a href='new_IMGT_cg.txz'>Download</a></td></tr>" >> $output
|
116
|
400 echo "<tr><td>An IMGT archive with just the matched and filtered cg1 sequences</td><td><a href='new_IMGT_cg1.txz'>Download</a></td></tr>" >> $output
|
|
401 echo "<tr><td>An IMGT archive with just the matched and filtered cg2 sequences</td><td><a href='new_IMGT_cg2.txz'>Download</a></td></tr>" >> $output
|
|
402 echo "<tr><td>An IMGT archive with just the matched and filtered cg3 sequences</td><td><a href='new_IMGT_cg3.txz'>Download</a></td></tr>" >> $output
|
|
403 echo "<tr><td>An IMGT archive with just the matched and filtered cg4 sequences</td><td><a href='new_IMGT_cg4.txz'>Download</a></td></tr>" >> $output
|
114
|
404 echo "<tr><td>An IMGT archive with just the matched and filtered cm sequences</td><td><a href='new_IMGT_cm.txz'>Download</a></td></tr>" >> $output
|
125
|
405
|
|
406 echo "<tr><td>The Change-O DB file with defined clones and subclass annotation</td><td><a href='change_o/change-o-db-defined_clones.txt'>Download</a></td></tr>" >> $output
|
120
|
407 echo "<tr><td>The Change-O DB defined clones summary file</td><td><a href='change_o/change-o-defined_clones-summary.txt'>Download</a></td></tr>" >> $output
|
125
|
408 echo "<tr><td>The Change-O DB file with defined clones of ca</td><td><a href='change_o/change-o-db-defined_clones-ca.txt'>Download</a></td></tr>" >> $output
|
|
409 echo "<tr><td>The Change-O DB defined clones summary file of ca</td><td><a href='change_o/change-o-defined_clones-summary-ca.txt'>Download</a></td></tr>" >> $output
|
|
410 echo "<tr><td>The Change-O DB file with defined clones of cg</td><td><a href='change_o/change-o-db-defined_clones-cg.txt'>Download</a></td></tr>" >> $output
|
|
411 echo "<tr><td>The Change-O DB defined clones summary file of cg</td><td><a href='change_o/change-o-defined_clones-summary-cg.txt'>Download</a></td></tr>" >> $output
|
|
412 echo "<tr><td>The Change-O DB file with defined clones of cm</td><td><a href='change_o/change-o-db-defined_clones-cm.txt'>Download</a></td></tr>" >> $output
|
|
413 echo "<tr><td>The Change-O DB defined clones summary file of cm</td><td><a href='change_o/change-o-defined_clones-summary-cm.txt'>Download</a></td></tr>" >> $output
|
|
414
|
114
|
415 echo "</table>" >> $output
|
99
|
416
|
110
|
417 echo "</div>" >> $output #downloads tab end
|
55
|
418
|
110
|
419 echo "</div>" >> $output #tabs end
|
0
|
420
|
|
421 echo "</html>" >> $output
|
2
|
422
|
95
|
423 echo "---------------- baseline ----------------"
|
102
|
424 echo "---------------- baseline ----------------<br />" >> $log
|
101
|
425 tmp="$PWD"
|
|
426
|
|
427 mkdir $outdir/baseline
|
|
428
|
|
429
|
|
430 mkdir $outdir/baseline/ca_cg_cm
|
102
|
431 if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
|
|
432 cd $outdir/baseline/ca_cg_cm
|
114
|
433 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
|
102
|
434 else
|
|
435 echo "No sequences" > "$outdir/baseline.txt"
|
|
436 fi
|
101
|
437
|
|
438 mkdir $outdir/baseline/ca
|
102
|
439 if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then
|
|
440 cd $outdir/baseline/ca
|
114
|
441 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt"
|
102
|
442 else
|
|
443 echo "No ca sequences" > "$outdir/baseline_ca.txt"
|
|
444 fi
|
101
|
445
|
|
446 mkdir $outdir/baseline/cg
|
102
|
447 if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then
|
|
448 cd $outdir/baseline/cg
|
114
|
449 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt"
|
102
|
450 else
|
|
451 echo "No cg sequences" > "$outdir/baseline_cg.txt"
|
|
452 fi
|
101
|
453
|
|
454 mkdir $outdir/baseline/cm
|
102
|
455 if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then
|
|
456 cd $outdir/baseline/cm
|
114
|
457 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt"
|
102
|
458 else
|
|
459 echo "No cm sequences" > "$outdir/baseline_cm.txt"
|
|
460 fi
|
47
|
461
|
101
|
462 cd $tmp
|
|
463
|
82
|
464 echo "---------------- naive_output.r ----------------"
|
102
|
465 echo "---------------- naive_output.r ----------------<br />" >> $log
|
55
|
466
|
47
|
467 if [[ "$naive_output" != "None" ]]
|
|
468 then
|
114
|
469 cp $outdir/new_IMGT_ca.txz ${naive_output_ca}
|
|
470 cp $outdir/new_IMGT_cg.txz ${naive_output_cg}
|
|
471 cp $outdir/new_IMGT_cm.txz ${naive_output_cm}
|
47
|
472 fi
|
|
473
|
81
|
474 echo "</table>" >> $outdir/base_overview.html
|
|
475
|
120
|
476 echo "---------------- change-o MakeDB ----------------"
|
|
477
|
|
478 mkdir $outdir/change_o
|
|
479
|
|
480 tmp="$PWD"
|
|
481
|
|
482 cd $outdir/change_o
|
|
483
|
|
484 bash $dir/change_o/makedb.sh $input false false false $outdir/change_o/change-o-db.txt
|
125
|
485 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones.txt $outdir/change_o/change-o-defined_clones-summary.txt
|
120
|
486
|
125
|
487 Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/merged.txt "all" "Sequence.ID,best_match" "SEQUENCE_ID" "Sequence.ID" $outdir/change_o/change-o-db-defined_clones.txt 2>&1
|
|
488
|
|
489 echo "Rscript $dir/merge.r $outdir/change_o/change-o-db-defined_clones.txt $outdir/$outdir/merged.txt 'all' 'Sequence.ID,best_match' 'Sequence.ID' 'Sequence.ID' '\t' $outdir/change_o/change-o-db-defined_clones.txt 2>&1"
|
|
490
|
|
491 if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then
|
|
492 bash $dir/change_o/makedb.sh $outdir/new_IMGT_ca.txz false false false $outdir/change_o/change-o-db-ca.txt
|
|
493 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-ca.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-ca.txt $outdir/change_o/change-o-defined_clones-summary-ca.txt
|
|
494 else
|
|
495 echo "No ca sequences" > "$outdir/change_o/change-o-db-defined_clones-ca.txt"
|
|
496 echo "No ca sequences" > "$outdir/change_o/change-o-defined_clones-summary-ca.txt"
|
|
497 fi
|
120
|
498
|
125
|
499 if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then
|
|
500 bash $dir/change_o/makedb.sh $outdir/new_IMGT_cg.txz false false false $outdir/change_o/change-o-db-cg.txt
|
|
501 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-cg.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-cg.txt $outdir/change_o/change-o-defined_clones-summary-cg.txt
|
|
502 else
|
|
503 echo "No cg sequences" > "$outdir/change_o/change-o-db-defined_clones-cg.txt"
|
|
504 echo "No cg sequences" > "$outdir/change_o/change-o-defined_clones-summary-cg.txt"
|
|
505 fi
|
|
506
|
|
507 if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then
|
|
508 bash $dir/change_o/makedb.sh $outdir/new_IMGT_cm.txz false false false $outdir/change_o/change-o-db-cm.txt
|
|
509 bash $dir/change_o/define_clones.sh bygroup $outdir/change_o/change-o-db-cm.txt gene first ham none min complete 3.0 $outdir/change_o/change-o-db-defined_clones-cm.txt $outdir/change_o/change-o-defined_clones-summary-cm.txt
|
|
510 else
|
|
511 echo "No cm sequences" > "$outdir/change_o/change-o-db-defined_clones-cm.txt"
|
|
512 echo "No cm sequences" > "$outdir/change_o/change-o-defined_clones-summary-cm.txt"
|
|
513 fi
|
120
|
514
|
|
515 PWD="$tmp"
|
|
516
|
105
|
517 mv $log $outdir/log.html
|
|
518
|
110
|
519 echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log
|
|
520 echo "<table border = 1>" >> $log
|
|
521 echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log
|
|
522 tIFS="$TMP"
|
|
523 IFS=$'\t'
|
|
524 while read step seq perc
|
|
525 do
|
|
526 echo "<tr>" >> $log
|
|
527 echo "<td>$step</td>" >> $log
|
|
528 echo "<td>$seq</td>" >> $log
|
|
529 echo "<td>${perc}%</td>" >> $log
|
|
530 echo "</tr>" >> $log
|
|
531 done < $outdir/filtering_steps.txt
|
|
532 echo "</table border></center></html>" >> $log
|
|
533
|
|
534 IFS="$tIFS"
|
|
535
|
105
|
536
|
81
|
537 echo "---------------- Done! ----------------"
|
107
|
538 echo "---------------- Done! ----------------<br />" >> $outdir/log.html
|
47
|
539
|
110
|
540
|
|
541
|
|
542
|
|
543
|
|
544
|
|
545
|
|
546
|
|
547
|
|
548
|
|
549
|
|
550
|
|
551
|
|
552
|
|
553
|
|
554
|
|
555
|
|
556
|
|
557
|
|
558
|
|
559
|