0
|
1 #!/bin/bash
|
110
|
2 #set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
102
|
6 log=$3 #becomes the main html page at the end
|
19
|
7 outdir=$4
|
102
|
8 output="$outdir/index.html" #copied to $log location at the end
|
19
|
9 title=$5
|
22
|
10 include_fr1=$6
|
34
|
11 functionality=$7
|
|
12 unique=$8
|
69
|
13 naive_output_ca=$9
|
|
14 naive_output_cg=${10}
|
|
15 naive_output_cm=${11}
|
|
16 filter_unique=${12}
|
|
17 class_filter=${13}
|
114
|
18 empty_region_filter=${14}
|
0
|
19 mkdir $outdir
|
|
20
|
110
|
21 tar -xzf $dir/style.tar.gz -C $outdir
|
|
22
|
55
|
23 echo "---------------- read parameters ----------------"
|
102
|
24 echo "---------------- read parameters ----------------<br />" > $log
|
55
|
25
|
|
26 echo "unpacking IMGT file"
|
|
27
|
35
|
28 type="`file $input`"
|
|
29 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
30 echo "Zip archive"
|
|
31 echo "unzip $input -d $PWD/files/"
|
|
32 unzip $input -d $PWD/files/
|
|
33 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
34 echo "ZX archive"
|
|
35 echo "tar -xJf $input -C $PWD/files/"
|
|
36 mkdir -p $PWD/files/$title
|
|
37 tar -xJf $input -C $PWD/files/$title
|
|
38 fi
|
|
39
|
64
|
40 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt
|
|
41 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt
|
|
42 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt
|
|
43 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt
|
|
44 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt
|
|
45 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt
|
|
46 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt
|
|
47
|
|
48 #cat $PWD/files/*/1_* > $PWD/summary.txt
|
|
49 #cat $PWD/files/*/3_* > $PWD/sequences.txt
|
|
50 #cat $PWD/files/*/5_* > $PWD/aa.txt
|
|
51 #cat $PWD/files/*/6_* > $PWD/junction.txt
|
|
52 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
53 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
54 #cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
55
|
26
|
56 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
57
|
|
58 echo "${BLASTN_DIR}"
|
|
59
|
89
|
60 echo "---------------- identification ($method) ----------------"
|
102
|
61 echo "---------------- identification ($method) ----------------<br />" >> $log
|
55
|
62
|
19
|
63 if [[ "${method}" == "custom" ]] ; then
|
|
64 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
65 else
|
|
66 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
67 ID_index=$((ID_index+1))
|
|
68 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
69 sequence_index=$((sequence_index+1))
|
|
70
|
110
|
71 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp
|
|
72
|
|
73 cat $PWD/sequences.tmp | grep -B1 -vE "^$" sequences.fasta #filter out empty sequences
|
|
74
|
|
75 rm $PWD/sequences.tmp
|
19
|
76
|
|
77 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
78 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
79 fi
|
|
80
|
55
|
81 echo "---------------- merge_and_filter.r ----------------"
|
102
|
82 echo "---------------- merge_and_filter.r ----------------<br />" >> $log
|
19
|
83
|
114
|
84 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1
|
0
|
85
|
98
|
86 echo "---------------- creating new IMGT zip ----------------"
|
102
|
87 echo "---------------- creating new IMGT zip ----------------<br />" >> $log
|
95
|
88
|
|
89 mkdir $outdir/new_IMGT
|
|
90
|
|
91 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt"
|
|
92 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
|
|
93 cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt"
|
|
94 cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt"
|
|
95 cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt"
|
|
96 cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt"
|
|
97 cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt"
|
|
98 cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt"
|
|
99 cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt"
|
|
100 cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt"
|
|
101
|
99
|
102 mkdir $outdir/new_IMGT_ca
|
|
103 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca
|
|
104
|
116
|
105 mkdir $outdir/new_IMGT_ca1
|
|
106 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca1
|
|
107
|
|
108 mkdir $outdir/new_IMGT_ca2
|
|
109 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca2
|
|
110
|
99
|
111 mkdir $outdir/new_IMGT_cg
|
|
112 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg
|
|
113
|
116
|
114 mkdir $outdir/new_IMGT_cg1
|
|
115 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg1
|
|
116
|
|
117 mkdir $outdir/new_IMGT_cg2
|
|
118 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg2
|
|
119
|
|
120 mkdir $outdir/new_IMGT_cg3
|
|
121 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg3
|
|
122
|
|
123 mkdir $outdir/new_IMGT_cg4
|
|
124 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg4
|
|
125
|
99
|
126 mkdir $outdir/new_IMGT_cm
|
|
127 cp $outdir/new_IMGT/* $outdir/new_IMGT_cm
|
|
128
|
114
|
129 Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
|
116
|
130
|
114
|
131 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca/ $outdir/merged.txt "ca" 2>&1
|
116
|
132 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca1/ $outdir/merged.txt "ca1" 2>&1
|
|
133 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca2/ $outdir/merged.txt "ca2" 2>&1
|
|
134
|
114
|
135 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg/ $outdir/merged.txt "cg" 2>&1
|
116
|
136 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg1/ $outdir/merged.txt "cg1" 2>&1
|
|
137 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg2/ $outdir/merged.txt "cg2" 2>&1
|
|
138 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg3/ $outdir/merged.txt "cg3" 2>&1
|
|
139 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg4/ $outdir/merged.txt "cg4" 2>&1
|
|
140
|
114
|
141 Rscript $dir/new_imgt.r $outdir/new_IMGT_cm/ $outdir/merged.txt "cm" 2>&1
|
95
|
142
|
|
143
|
|
144 tmp="$PWD"
|
|
145 cd $outdir/new_IMGT/ #tar weirdness...
|
|
146 tar -cJf ../new_IMGT.txz *
|
|
147
|
99
|
148 cd $outdir/new_IMGT_ca/
|
|
149 tar -cJf ../new_IMGT_ca.txz *
|
|
150
|
117
|
151 cd $outdir/new_IMGT_ca1/
|
|
152 tar -cJf ../new_IMGT_ca1.txz *
|
|
153
|
|
154 cd $outdir/new_IMGT_ca2/
|
|
155 tar -cJf ../new_IMGT_ca2.txz *
|
|
156
|
99
|
157 cd $outdir/new_IMGT_cg/
|
|
158 tar -cJf ../new_IMGT_cg.txz *
|
|
159
|
117
|
160 cd $outdir/new_IMGT_cg1/
|
|
161 tar -cJf ../new_IMGT_cg1.txz *
|
|
162
|
|
163 cd $outdir/new_IMGT_cg2/
|
|
164 tar -cJf ../new_IMGT_cg2.txz *
|
|
165
|
|
166 cd $outdir/new_IMGT_cg3/
|
|
167 tar -cJf ../new_IMGT_cg3.txz *
|
|
168
|
|
169 cd $outdir/new_IMGT_cg4/
|
|
170 tar -cJf ../new_IMGT_cg4.txz *
|
|
171
|
99
|
172 cd $outdir/new_IMGT_cm/
|
|
173 tar -cJf ../new_IMGT_cm.txz *
|
|
174
|
95
|
175 cd $tmp
|
|
176
|
55
|
177 echo "---------------- mutation_analysis.r ----------------"
|
102
|
178 echo "---------------- mutation_analysis.r ----------------<br />" >> $log
|
55
|
179
|
82
|
180 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched"
|
4
|
181 echo "R mutation analysis"
|
82
|
182 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1
|
53
|
183
|
55
|
184
|
|
185 echo "---------------- mutation_analysis.py ----------------"
|
102
|
186 echo "---------------- mutation_analysis.py ----------------<br />" >> $log
|
55
|
187
|
82
|
188 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
55
|
189
|
|
190 echo "---------------- aa_histogram.r ----------------"
|
105
|
191 echo "---------------- aa_histogram.r ----------------<br />" >> $log
|
55
|
192
|
107
|
193 Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "ca,cg,cm" $outdir/ 2>&1
|
110
|
194 if [ -e "$outdir/aa_histogram_.png" ]; then
|
|
195 mv $outdir/aa_histogram_.png $outdir/aa_histogram.png
|
|
196 mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt
|
|
197 fi
|
4
|
198
|
0
|
199 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
200
|
53
|
201 funcs=(sum mean median)
|
110
|
202 funcs=(sum)
|
0
|
203
|
82
|
204 echo "---------------- sequence_overview.r ----------------"
|
102
|
205 echo "---------------- sequence_overview.r ----------------<br />" >> $log
|
82
|
206
|
|
207 mkdir $outdir/sequence_overview
|
|
208
|
90
|
209 #Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
|
100
|
210 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
|
82
|
211
|
|
212 echo "<table border='1'>" > $outdir/base_overview.html
|
|
213
|
92
|
214 while IFS=$'\t' read ID class seq A C G T
|
82
|
215 do
|
85
|
216 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
|
82
|
217 done < $outdir/sequence_overview/ntoverview.txt
|
|
218
|
62
|
219 echo "<html><center><h1>$title</h1></center>" > $output
|
110
|
220 echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output
|
|
221 echo "<script type='text/javascript' src='tabber.js'></script>" >> $output
|
|
222 echo "<script type='text/javascript' src='script.js'></script>" >> $output
|
|
223 echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output
|
62
|
224
|
|
225 #display the matched/unmatched for clearity
|
|
226
|
98
|
227 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`"
|
62
|
228 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
|
|
229 total_count=$((matched_count + unmatched_count))
|
|
230 perc_count=$((unmatched_count / total_count * 100))
|
|
231 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
|
|
232 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
|
|
233
|
|
234 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
|
|
235 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
|
|
236 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
|
|
237
|
55
|
238 echo "---------------- main tables ----------------"
|
102
|
239 echo "---------------- main tables ----------------<br />" >> $log
|
110
|
240
|
|
241 echo "<div class='tabber'>" >> $output
|
|
242 echo "<div class='tabbertab' title='SHM Overview'>" >> $output
|
|
243
|
53
|
244 for func in ${funcs[@]}
|
4
|
245 do
|
55
|
246
|
|
247 echo "---------------- $func table ----------------"
|
102
|
248 echo "---------------- $func table ----------------<br />" >> $log
|
55
|
249
|
94
|
250 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt
|
53
|
251
|
98
|
252 echo "<table border='1' width='100%'><caption><h3><a href='data_${func}.txt'>${func} table</a></h3></caption>" >> $output
|
58
|
253 echo "<tr><th>info</th>" >> $output
|
53
|
254 for gene in ${genes[@]}
|
|
255 do
|
|
256 tmp=`cat $outdir/${gene}_${func}_n.txt`
|
|
257 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
258 done
|
78
|
259
|
114
|
260 tmp=`cat $outdir/all_${func}_n.txt`
|
|
261 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
|
78
|
262 tmp=`cat $outdir/unmatched_${func}_n.txt`
|
79
|
263 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th>" >> $output
|
4
|
264
|
78
|
265 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
|
53
|
266 do
|
|
267 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
268 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
269 else
|
114
|
270 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output
|
53
|
271 fi
|
94
|
272 done < $outdir/data_${func}.txt
|
|
273 echo "</table>" >> $output
|
|
274 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output
|
53
|
275 done
|
|
276
|
110
|
277 echo "</div>" >> $output #SHM overview tab end
|
|
278
|
|
279 echo "---------------- images ----------------"
|
|
280 echo "---------------- images ----------------<br />" >> $log
|
|
281
|
|
282 echo "<div class='tabbertab' title='SHM Frequency'>" >> $output
|
|
283
|
|
284 if [ -a $outdir/scatter.png ]
|
|
285 then
|
|
286 echo "<img src='scatter.png'/><br />" >> $output
|
|
287 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
288 fi
|
|
289 if [ -a $outdir/frequency_ranges.png ]
|
|
290 then
|
|
291 echo "<img src='frequency_ranges.png'/><br />" >> $output
|
|
292 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
|
|
293 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
|
|
294 fi
|
|
295
|
|
296 echo "</div>" >> $output #SHM frequency tab end
|
|
297
|
|
298 echo "<div class='tabbertab' title='Transition tables'>" >> $output
|
|
299
|
114
|
300 echo "<table border='0'>" >> $output
|
|
301
|
110
|
302 for gene in ${genes[@]}
|
|
303 do
|
114
|
304 echo "<tr>" >> $output
|
|
305 echo "<td><h1>${gene}</h1></td>" >> $output
|
|
306 echo "<td><img src='transitions_heatmap_${gene}.png' /></td>" >> $output
|
|
307 echo "<td><img src='transitions_stacked_${gene}.png' /></td>" >> $output
|
|
308 echo "<td><table border='1'>" >> $output
|
110
|
309 while IFS=, read from a c g t
|
|
310 do
|
|
311 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
312 done < $outdir/transitions_${gene}_sum.txt
|
114
|
313 echo "</table></td>" >> $output
|
|
314
|
|
315 echo "</tr>" >> $output
|
110
|
316 done
|
55
|
317
|
114
|
318 echo "<tr>" >> $output
|
|
319 echo "<td><h1>All</h1></td>" >> $output
|
|
320 echo "<td><img src='transitions_heatmap_all.png' /></td>" >> $output
|
|
321 echo "<td><img src='transitions_stacked_all.png' /></td>" >> $output
|
|
322 echo "<td><table border='1'>" >> $output
|
110
|
323 while IFS=, read from a c g t
|
|
324 do
|
|
325 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
326 done < $outdir/transitions_all_sum.txt
|
114
|
327 echo "</table></td>" >> $output
|
|
328
|
|
329 echo "</tr>" >> $output
|
|
330
|
110
|
331 echo "</table>" >> $output
|
|
332
|
|
333 echo "</div>" >> $output #transition tables tab end
|
|
334
|
|
335 echo "<div class='tabbertab' title='Antigen Selection'>" >> $output
|
|
336
|
|
337 if [ -a $outdir/aa_histogram.png ]
|
|
338 then
|
|
339 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
340 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
|
341 echo "<img src='aa_histogram_ca.png'/><br />" >> $output
|
|
342 echo "<a href='aa_histogram_ca.txt'>download data</a><br />" >> $output
|
|
343 echo "<img src='aa_histogram_cg.png'/><br />" >> $output
|
|
344 echo "<a href='aa_histogram_cg.txt'>download data</a><br />" >> $output
|
|
345 echo "<img src='aa_histogram_cm.png'/><br />" >> $output
|
|
346 echo "<a href='aa_histogram_cm.txt'>download data</a><br />" >> $output
|
|
347 fi
|
|
348
|
|
349 echo "<embed src='baseline_ca.pdf' width='700px' height='1000px'>" >> $output
|
|
350 echo "<embed src='baseline_cg.pdf' width='700px' height='1000px'>" >> $output
|
|
351 echo "<embed src='baseline_cm.pdf' width='700px' height='1000px'>" >> $output
|
|
352
|
|
353 echo "</div>" >> $output #antigen selection tab end
|
|
354
|
|
355 echo "<div class='tabbertab' title='CSR'>" >> $output
|
|
356
|
|
357 if [ -a $outdir/ca.png ]
|
|
358 then
|
|
359 echo "<img src='ca.png'/><br />" >> $output
|
|
360 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
|
361 fi
|
|
362 if [ -a $outdir/cg.png ]
|
|
363 then
|
|
364 echo "<img src='cg.png'/><br />" >> $output
|
|
365 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
|
366 fi
|
|
367
|
|
368 echo "</div>" >> $output #CSR tab end
|
|
369
|
|
370 echo "<div class='tabbertab' title='Downloads'>" >> $output
|
94
|
371
|
114
|
372 echo "<table border='1' width='700px'>" >> $output
|
|
373 echo "<tr><td>The complete dataset</td><td><a href='merged.txt'>Download</a></td></tr>" >> $output
|
|
374 echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt'>Download</a></td></tr>" >> $output
|
|
375 echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt'>Download</a></td></tr>" >> $output
|
|
376 echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt'>Download</a></td></tr>" >> $output
|
|
377 echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt'>Download</a></td></tr>" >> $output
|
|
378 echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt'>Download</a></td></tr>" >> $output
|
|
379 echo "<tr><td>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</td><td><a href='sequence_overview/index.html'>Download</a></td></tr>" >> $output
|
|
380 echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>Download</a></td></tr>" >> $output
|
|
381 echo "<tr><td>Baseline PDF (<href a='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf'>Download</a></td></tr>" >> $output
|
|
382 echo "<tr><td>Baseline data</td><td><a href='baseline.txt'>Download</a></td></tr>" >> $output
|
|
383 echo "<tr><td>Baseline ca PDF</td><td><a href='baseline_ca.pdf'>Download</a></td></tr>" >> $output
|
|
384 echo "<tr><td>Baseline ca data</td><td><a href='baseline_ca.txt'>Download</a></td></tr>" >> $output
|
|
385 echo "<tr><td>Baseline cg PDF</td><td><a href='baseline_cg.pdf'>Download</a></td></tr>" >> $output
|
|
386 echo "<tr><td>Baseline cg data</td><td><a href='baseline_cg.txt'>Download</a></td></tr>" >> $output
|
|
387 echo "<tr><td>Baseline cm PDF</td><td><a href='baseline_cm.pdf'>Download</a></td></tr>" >> $output
|
|
388 echo "<tr><td>Baseline cm data</td><td><a href='baseline_cm.txt'>Download</a></td></tr>" >> $output
|
|
389 #echo "<tr><td></td><td><a href='IgAT.zip'>IgAT zip</a></td></tr>" >> $output
|
|
390 #echo "<tr><td></td><td><a href='IgAT_ca.zip'>IgAT ca zip</a></td></tr>" >> $output
|
|
391 #echo "<tr><td></td><td><a href='IgAT_cg.zip'>IgAT cg zip</a></td></tr>" >> $output
|
|
392 #echo "<tr><td></td><td><a href='IgAT_cm.zip'>IgAT cm zip</a></td></tr>" >> $output
|
|
393 echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz'>Download</a></td></tr>" >> $output
|
|
394 echo "<tr><td>An IMGT archive with just the matched and filtered ca sequences</td><td><a href='new_IMGT_ca.txz'>Download</a></td></tr>" >> $output
|
116
|
395 echo "<tr><td>An IMGT archive with just the matched and filtered ca1 sequences</td><td><a href='new_IMGT_ca1.txz'>Download</a></td></tr>" >> $output
|
|
396 echo "<tr><td>An IMGT archive with just the matched and filtered ca2 sequences</td><td><a href='new_IMGT_ca2.txz'>Download</a></td></tr>" >> $output
|
114
|
397 echo "<tr><td>An IMGT archive with just the matched and filtered cg sequences</td><td><a href='new_IMGT_cg.txz'>Download</a></td></tr>" >> $output
|
116
|
398 echo "<tr><td>An IMGT archive with just the matched and filtered cg1 sequences</td><td><a href='new_IMGT_cg1.txz'>Download</a></td></tr>" >> $output
|
|
399 echo "<tr><td>An IMGT archive with just the matched and filtered cg2 sequences</td><td><a href='new_IMGT_cg2.txz'>Download</a></td></tr>" >> $output
|
|
400 echo "<tr><td>An IMGT archive with just the matched and filtered cg3 sequences</td><td><a href='new_IMGT_cg3.txz'>Download</a></td></tr>" >> $output
|
|
401 echo "<tr><td>An IMGT archive with just the matched and filtered cg4 sequences</td><td><a href='new_IMGT_cg4.txz'>Download</a></td></tr>" >> $output
|
114
|
402 echo "<tr><td>An IMGT archive with just the matched and filtered cm sequences</td><td><a href='new_IMGT_cm.txz'>Download</a></td></tr>" >> $output
|
|
403 echo "</table>" >> $output
|
99
|
404
|
110
|
405 echo "</div>" >> $output #downloads tab end
|
55
|
406
|
110
|
407 echo "</div>" >> $output #tabs end
|
0
|
408
|
|
409 echo "</html>" >> $output
|
2
|
410
|
95
|
411 echo "---------------- baseline ----------------"
|
102
|
412 echo "---------------- baseline ----------------<br />" >> $log
|
101
|
413 tmp="$PWD"
|
|
414
|
|
415 mkdir $outdir/baseline
|
|
416
|
|
417
|
|
418 mkdir $outdir/baseline/ca_cg_cm
|
102
|
419 if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
|
|
420 cd $outdir/baseline/ca_cg_cm
|
114
|
421 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
|
102
|
422 else
|
|
423 echo "No sequences" > "$outdir/baseline.txt"
|
|
424 fi
|
101
|
425
|
|
426 mkdir $outdir/baseline/ca
|
102
|
427 if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then
|
|
428 cd $outdir/baseline/ca
|
114
|
429 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt"
|
102
|
430 else
|
|
431 echo "No ca sequences" > "$outdir/baseline_ca.txt"
|
|
432 fi
|
101
|
433
|
|
434 mkdir $outdir/baseline/cg
|
102
|
435 if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then
|
|
436 cd $outdir/baseline/cg
|
114
|
437 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt"
|
102
|
438 else
|
|
439 echo "No cg sequences" > "$outdir/baseline_cg.txt"
|
|
440 fi
|
101
|
441
|
|
442 mkdir $outdir/baseline/cm
|
102
|
443 if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then
|
|
444 cd $outdir/baseline/cm
|
114
|
445 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt"
|
102
|
446 else
|
|
447 echo "No cm sequences" > "$outdir/baseline_cm.txt"
|
|
448 fi
|
47
|
449
|
101
|
450 cd $tmp
|
|
451
|
82
|
452 echo "---------------- naive_output.r ----------------"
|
102
|
453 echo "---------------- naive_output.r ----------------<br />" >> $log
|
55
|
454
|
47
|
455 if [[ "$naive_output" != "None" ]]
|
|
456 then
|
114
|
457 #echo "---------------- imgt_loader.r ----------------"
|
|
458 #echo "---------------- imgt_loader.r ----------------<br />" >> $log
|
50
|
459 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
|
114
|
460 #Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1
|
95
|
461
|
114
|
462 #echo "---------------- naive_output.r ----------------"
|
|
463 #echo "---------------- naive_output.r ----------------<br />" >> $log
|
|
464 #Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1
|
|
465
|
|
466 cp $outdir/new_IMGT_ca.txz ${naive_output_ca}
|
|
467 cp $outdir/new_IMGT_cg.txz ${naive_output_cg}
|
|
468 cp $outdir/new_IMGT_cm.txz ${naive_output_cm}
|
47
|
469 fi
|
|
470
|
81
|
471 echo "</table>" >> $outdir/base_overview.html
|
|
472
|
105
|
473 mv $log $outdir/log.html
|
|
474
|
110
|
475 echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log
|
|
476 echo "<table border = 1>" >> $log
|
|
477 echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log
|
|
478 tIFS="$TMP"
|
|
479 IFS=$'\t'
|
|
480 while read step seq perc
|
|
481 do
|
|
482 echo "<tr>" >> $log
|
|
483 echo "<td>$step</td>" >> $log
|
|
484 echo "<td>$seq</td>" >> $log
|
|
485 echo "<td>${perc}%</td>" >> $log
|
|
486 echo "</tr>" >> $log
|
|
487 done < $outdir/filtering_steps.txt
|
|
488 echo "</table border></center></html>" >> $log
|
|
489
|
|
490 IFS="$tIFS"
|
|
491
|
105
|
492
|
81
|
493 echo "---------------- Done! ----------------"
|
107
|
494 echo "---------------- Done! ----------------<br />" >> $outdir/log.html
|
47
|
495
|
110
|
496
|
|
497
|
|
498
|
|
499
|
|
500
|
|
501
|
|
502
|
|
503
|
|
504
|
|
505
|
|
506
|
|
507
|
|
508
|
|
509
|
|
510
|
|
511
|
|
512
|
|
513
|
|
514
|
|
515
|