0
|
1 #!/bin/bash
|
110
|
2 #set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
102
|
6 log=$3 #becomes the main html page at the end
|
19
|
7 outdir=$4
|
102
|
8 output="$outdir/index.html" #copied to $log location at the end
|
19
|
9 title=$5
|
22
|
10 include_fr1=$6
|
34
|
11 functionality=$7
|
|
12 unique=$8
|
69
|
13 naive_output_ca=$9
|
|
14 naive_output_cg=${10}
|
|
15 naive_output_cm=${11}
|
|
16 filter_unique=${12}
|
|
17 class_filter=${13}
|
114
|
18 empty_region_filter=${14}
|
0
|
19 mkdir $outdir
|
|
20
|
110
|
21 tar -xzf $dir/style.tar.gz -C $outdir
|
|
22
|
55
|
23 echo "---------------- read parameters ----------------"
|
102
|
24 echo "---------------- read parameters ----------------<br />" > $log
|
55
|
25
|
|
26 echo "unpacking IMGT file"
|
|
27
|
35
|
28 type="`file $input`"
|
|
29 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
30 echo "Zip archive"
|
|
31 echo "unzip $input -d $PWD/files/"
|
|
32 unzip $input -d $PWD/files/
|
|
33 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
34 echo "ZX archive"
|
|
35 echo "tar -xJf $input -C $PWD/files/"
|
|
36 mkdir -p $PWD/files/$title
|
|
37 tar -xJf $input -C $PWD/files/$title
|
|
38 fi
|
|
39
|
64
|
40 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt
|
|
41 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt
|
|
42 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt
|
|
43 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt
|
|
44 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt
|
|
45 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt
|
|
46 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt
|
|
47
|
|
48 #cat $PWD/files/*/1_* > $PWD/summary.txt
|
|
49 #cat $PWD/files/*/3_* > $PWD/sequences.txt
|
|
50 #cat $PWD/files/*/5_* > $PWD/aa.txt
|
|
51 #cat $PWD/files/*/6_* > $PWD/junction.txt
|
|
52 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
53 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
54 #cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
55
|
26
|
56 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
57
|
|
58 echo "${BLASTN_DIR}"
|
|
59
|
89
|
60 echo "---------------- identification ($method) ----------------"
|
102
|
61 echo "---------------- identification ($method) ----------------<br />" >> $log
|
55
|
62
|
19
|
63 if [[ "${method}" == "custom" ]] ; then
|
|
64 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
65 else
|
|
66 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
67 ID_index=$((ID_index+1))
|
|
68 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
69 sequence_index=$((sequence_index+1))
|
|
70
|
110
|
71 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp
|
|
72
|
|
73 cat $PWD/sequences.tmp | grep -B1 -vE "^$" sequences.fasta #filter out empty sequences
|
|
74
|
|
75 rm $PWD/sequences.tmp
|
19
|
76
|
|
77 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
78 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
79 fi
|
|
80
|
55
|
81 echo "---------------- merge_and_filter.r ----------------"
|
102
|
82 echo "---------------- merge_and_filter.r ----------------<br />" >> $log
|
19
|
83
|
114
|
84 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1
|
0
|
85
|
98
|
86 echo "---------------- creating new IMGT zip ----------------"
|
102
|
87 echo "---------------- creating new IMGT zip ----------------<br />" >> $log
|
95
|
88
|
|
89 mkdir $outdir/new_IMGT
|
|
90
|
|
91 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt"
|
|
92 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
|
|
93 cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt"
|
|
94 cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt"
|
|
95 cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt"
|
|
96 cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt"
|
|
97 cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt"
|
|
98 cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt"
|
|
99 cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt"
|
|
100 cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt"
|
|
101
|
99
|
102 mkdir $outdir/new_IMGT_ca
|
|
103 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca
|
|
104
|
116
|
105 mkdir $outdir/new_IMGT_ca1
|
|
106 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca1
|
|
107
|
|
108 mkdir $outdir/new_IMGT_ca2
|
|
109 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca2
|
|
110
|
99
|
111 mkdir $outdir/new_IMGT_cg
|
|
112 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg
|
|
113
|
116
|
114 mkdir $outdir/new_IMGT_cg1
|
|
115 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg1
|
|
116
|
|
117 mkdir $outdir/new_IMGT_cg2
|
|
118 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg2
|
|
119
|
|
120 mkdir $outdir/new_IMGT_cg3
|
|
121 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg3
|
|
122
|
|
123 mkdir $outdir/new_IMGT_cg4
|
|
124 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg4
|
|
125
|
99
|
126 mkdir $outdir/new_IMGT_cm
|
|
127 cp $outdir/new_IMGT/* $outdir/new_IMGT_cm
|
|
128
|
114
|
129 Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
|
116
|
130
|
114
|
131 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca/ $outdir/merged.txt "ca" 2>&1
|
116
|
132 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca1/ $outdir/merged.txt "ca1" 2>&1
|
|
133 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca2/ $outdir/merged.txt "ca2" 2>&1
|
|
134
|
114
|
135 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg/ $outdir/merged.txt "cg" 2>&1
|
116
|
136 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg1/ $outdir/merged.txt "cg1" 2>&1
|
|
137 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg2/ $outdir/merged.txt "cg2" 2>&1
|
|
138 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg3/ $outdir/merged.txt "cg3" 2>&1
|
|
139 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg4/ $outdir/merged.txt "cg4" 2>&1
|
|
140
|
114
|
141 Rscript $dir/new_imgt.r $outdir/new_IMGT_cm/ $outdir/merged.txt "cm" 2>&1
|
95
|
142
|
|
143
|
|
144 tmp="$PWD"
|
|
145 cd $outdir/new_IMGT/ #tar weirdness...
|
|
146 tar -cJf ../new_IMGT.txz *
|
|
147
|
99
|
148 cd $outdir/new_IMGT_ca/
|
|
149 tar -cJf ../new_IMGT_ca.txz *
|
|
150
|
|
151 cd $outdir/new_IMGT_cg/
|
|
152 tar -cJf ../new_IMGT_cg.txz *
|
|
153
|
|
154 cd $outdir/new_IMGT_cm/
|
|
155 tar -cJf ../new_IMGT_cm.txz *
|
|
156
|
95
|
157 cd $tmp
|
|
158
|
55
|
159 echo "---------------- mutation_analysis.r ----------------"
|
102
|
160 echo "---------------- mutation_analysis.r ----------------<br />" >> $log
|
55
|
161
|
82
|
162 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched"
|
4
|
163 echo "R mutation analysis"
|
82
|
164 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1
|
53
|
165
|
55
|
166
|
|
167 echo "---------------- mutation_analysis.py ----------------"
|
102
|
168 echo "---------------- mutation_analysis.py ----------------<br />" >> $log
|
55
|
169
|
82
|
170 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
55
|
171
|
|
172 echo "---------------- aa_histogram.r ----------------"
|
105
|
173 echo "---------------- aa_histogram.r ----------------<br />" >> $log
|
55
|
174
|
107
|
175 Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "ca,cg,cm" $outdir/ 2>&1
|
110
|
176 if [ -e "$outdir/aa_histogram_.png" ]; then
|
|
177 mv $outdir/aa_histogram_.png $outdir/aa_histogram.png
|
|
178 mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt
|
|
179 fi
|
4
|
180
|
0
|
181 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
182
|
53
|
183 funcs=(sum mean median)
|
110
|
184 funcs=(sum)
|
0
|
185
|
82
|
186 echo "---------------- sequence_overview.r ----------------"
|
102
|
187 echo "---------------- sequence_overview.r ----------------<br />" >> $log
|
82
|
188
|
|
189 mkdir $outdir/sequence_overview
|
|
190
|
90
|
191 #Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
|
100
|
192 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
|
82
|
193
|
|
194 echo "<table border='1'>" > $outdir/base_overview.html
|
|
195
|
92
|
196 while IFS=$'\t' read ID class seq A C G T
|
82
|
197 do
|
85
|
198 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
|
82
|
199 done < $outdir/sequence_overview/ntoverview.txt
|
|
200
|
62
|
201 echo "<html><center><h1>$title</h1></center>" > $output
|
110
|
202 echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output
|
|
203 echo "<script type='text/javascript' src='tabber.js'></script>" >> $output
|
|
204 echo "<script type='text/javascript' src='script.js'></script>" >> $output
|
|
205 echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output
|
62
|
206
|
|
207 #display the matched/unmatched for clearity
|
|
208
|
98
|
209 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`"
|
62
|
210 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
|
|
211 total_count=$((matched_count + unmatched_count))
|
|
212 perc_count=$((unmatched_count / total_count * 100))
|
|
213 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
|
|
214 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
|
|
215
|
|
216 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
|
|
217 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
|
|
218 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
|
|
219
|
55
|
220 echo "---------------- main tables ----------------"
|
102
|
221 echo "---------------- main tables ----------------<br />" >> $log
|
110
|
222
|
|
223 echo "<div class='tabber'>" >> $output
|
|
224 echo "<div class='tabbertab' title='SHM Overview'>" >> $output
|
|
225
|
53
|
226 for func in ${funcs[@]}
|
4
|
227 do
|
55
|
228
|
|
229 echo "---------------- $func table ----------------"
|
102
|
230 echo "---------------- $func table ----------------<br />" >> $log
|
55
|
231
|
94
|
232 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt
|
53
|
233
|
98
|
234 echo "<table border='1' width='100%'><caption><h3><a href='data_${func}.txt'>${func} table</a></h3></caption>" >> $output
|
58
|
235 echo "<tr><th>info</th>" >> $output
|
53
|
236 for gene in ${genes[@]}
|
|
237 do
|
|
238 tmp=`cat $outdir/${gene}_${func}_n.txt`
|
|
239 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
240 done
|
78
|
241
|
114
|
242 tmp=`cat $outdir/all_${func}_n.txt`
|
|
243 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
|
78
|
244 tmp=`cat $outdir/unmatched_${func}_n.txt`
|
79
|
245 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th>" >> $output
|
4
|
246
|
78
|
247 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
|
53
|
248 do
|
|
249 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
250 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
251 else
|
114
|
252 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output
|
53
|
253 fi
|
94
|
254 done < $outdir/data_${func}.txt
|
|
255 echo "</table>" >> $output
|
|
256 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output
|
53
|
257 done
|
|
258
|
110
|
259 echo "</div>" >> $output #SHM overview tab end
|
|
260
|
|
261 echo "---------------- images ----------------"
|
|
262 echo "---------------- images ----------------<br />" >> $log
|
|
263
|
|
264 echo "<div class='tabbertab' title='SHM Frequency'>" >> $output
|
|
265
|
|
266 if [ -a $outdir/scatter.png ]
|
|
267 then
|
|
268 echo "<img src='scatter.png'/><br />" >> $output
|
|
269 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
270 fi
|
|
271 if [ -a $outdir/frequency_ranges.png ]
|
|
272 then
|
|
273 echo "<img src='frequency_ranges.png'/><br />" >> $output
|
|
274 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
|
|
275 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
|
|
276 fi
|
|
277
|
|
278 echo "</div>" >> $output #SHM frequency tab end
|
|
279
|
|
280 echo "<div class='tabbertab' title='Transition tables'>" >> $output
|
|
281
|
114
|
282 echo "<table border='0'>" >> $output
|
|
283
|
110
|
284 for gene in ${genes[@]}
|
|
285 do
|
114
|
286 echo "<tr>" >> $output
|
|
287 echo "<td><h1>${gene}</h1></td>" >> $output
|
|
288 echo "<td><img src='transitions_heatmap_${gene}.png' /></td>" >> $output
|
|
289 echo "<td><img src='transitions_stacked_${gene}.png' /></td>" >> $output
|
|
290 echo "<td><table border='1'>" >> $output
|
110
|
291 while IFS=, read from a c g t
|
|
292 do
|
|
293 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
294 done < $outdir/transitions_${gene}_sum.txt
|
114
|
295 echo "</table></td>" >> $output
|
|
296
|
|
297 echo "</tr>" >> $output
|
110
|
298 done
|
55
|
299
|
114
|
300 echo "<tr>" >> $output
|
|
301 echo "<td><h1>All</h1></td>" >> $output
|
|
302 echo "<td><img src='transitions_heatmap_all.png' /></td>" >> $output
|
|
303 echo "<td><img src='transitions_stacked_all.png' /></td>" >> $output
|
|
304 echo "<td><table border='1'>" >> $output
|
110
|
305 while IFS=, read from a c g t
|
|
306 do
|
|
307 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
308 done < $outdir/transitions_all_sum.txt
|
114
|
309 echo "</table></td>" >> $output
|
|
310
|
|
311 echo "</tr>" >> $output
|
|
312
|
110
|
313 echo "</table>" >> $output
|
|
314
|
|
315 echo "</div>" >> $output #transition tables tab end
|
|
316
|
|
317 echo "<div class='tabbertab' title='Antigen Selection'>" >> $output
|
|
318
|
|
319 if [ -a $outdir/aa_histogram.png ]
|
|
320 then
|
|
321 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
322 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
|
323 echo "<img src='aa_histogram_ca.png'/><br />" >> $output
|
|
324 echo "<a href='aa_histogram_ca.txt'>download data</a><br />" >> $output
|
|
325 echo "<img src='aa_histogram_cg.png'/><br />" >> $output
|
|
326 echo "<a href='aa_histogram_cg.txt'>download data</a><br />" >> $output
|
|
327 echo "<img src='aa_histogram_cm.png'/><br />" >> $output
|
|
328 echo "<a href='aa_histogram_cm.txt'>download data</a><br />" >> $output
|
|
329 fi
|
|
330
|
|
331 echo "<embed src='baseline_ca.pdf' width='700px' height='1000px'>" >> $output
|
|
332 echo "<embed src='baseline_cg.pdf' width='700px' height='1000px'>" >> $output
|
|
333 echo "<embed src='baseline_cm.pdf' width='700px' height='1000px'>" >> $output
|
|
334
|
|
335 echo "</div>" >> $output #antigen selection tab end
|
|
336
|
|
337 echo "<div class='tabbertab' title='CSR'>" >> $output
|
|
338
|
|
339 if [ -a $outdir/ca.png ]
|
|
340 then
|
|
341 echo "<img src='ca.png'/><br />" >> $output
|
|
342 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
|
343 fi
|
|
344 if [ -a $outdir/cg.png ]
|
|
345 then
|
|
346 echo "<img src='cg.png'/><br />" >> $output
|
|
347 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
|
348 fi
|
|
349
|
|
350 echo "</div>" >> $output #CSR tab end
|
|
351
|
|
352 echo "<div class='tabbertab' title='Downloads'>" >> $output
|
94
|
353
|
114
|
354 echo "<table border='1' width='700px'>" >> $output
|
|
355 echo "<tr><td>The complete dataset</td><td><a href='merged.txt'>Download</a></td></tr>" >> $output
|
|
356 echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt'>Download</a></td></tr>" >> $output
|
|
357 echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt'>Download</a></td></tr>" >> $output
|
|
358 echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt'>Download</a></td></tr>" >> $output
|
|
359 echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt'>Download</a></td></tr>" >> $output
|
|
360 echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt'>Download</a></td></tr>" >> $output
|
|
361 echo "<tr><td>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</td><td><a href='sequence_overview/index.html'>Download</a></td></tr>" >> $output
|
|
362 echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>Download</a></td></tr>" >> $output
|
|
363 echo "<tr><td>Baseline PDF (<href a='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf'>Download</a></td></tr>" >> $output
|
|
364 echo "<tr><td>Baseline data</td><td><a href='baseline.txt'>Download</a></td></tr>" >> $output
|
|
365 echo "<tr><td>Baseline ca PDF</td><td><a href='baseline_ca.pdf'>Download</a></td></tr>" >> $output
|
|
366 echo "<tr><td>Baseline ca data</td><td><a href='baseline_ca.txt'>Download</a></td></tr>" >> $output
|
|
367 echo "<tr><td>Baseline cg PDF</td><td><a href='baseline_cg.pdf'>Download</a></td></tr>" >> $output
|
|
368 echo "<tr><td>Baseline cg data</td><td><a href='baseline_cg.txt'>Download</a></td></tr>" >> $output
|
|
369 echo "<tr><td>Baseline cm PDF</td><td><a href='baseline_cm.pdf'>Download</a></td></tr>" >> $output
|
|
370 echo "<tr><td>Baseline cm data</td><td><a href='baseline_cm.txt'>Download</a></td></tr>" >> $output
|
|
371 #echo "<tr><td></td><td><a href='IgAT.zip'>IgAT zip</a></td></tr>" >> $output
|
|
372 #echo "<tr><td></td><td><a href='IgAT_ca.zip'>IgAT ca zip</a></td></tr>" >> $output
|
|
373 #echo "<tr><td></td><td><a href='IgAT_cg.zip'>IgAT cg zip</a></td></tr>" >> $output
|
|
374 #echo "<tr><td></td><td><a href='IgAT_cm.zip'>IgAT cm zip</a></td></tr>" >> $output
|
|
375 echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz'>Download</a></td></tr>" >> $output
|
|
376 echo "<tr><td>An IMGT archive with just the matched and filtered ca sequences</td><td><a href='new_IMGT_ca.txz'>Download</a></td></tr>" >> $output
|
116
|
377 echo "<tr><td>An IMGT archive with just the matched and filtered ca1 sequences</td><td><a href='new_IMGT_ca1.txz'>Download</a></td></tr>" >> $output
|
|
378 echo "<tr><td>An IMGT archive with just the matched and filtered ca2 sequences</td><td><a href='new_IMGT_ca2.txz'>Download</a></td></tr>" >> $output
|
114
|
379 echo "<tr><td>An IMGT archive with just the matched and filtered cg sequences</td><td><a href='new_IMGT_cg.txz'>Download</a></td></tr>" >> $output
|
116
|
380 echo "<tr><td>An IMGT archive with just the matched and filtered cg1 sequences</td><td><a href='new_IMGT_cg1.txz'>Download</a></td></tr>" >> $output
|
|
381 echo "<tr><td>An IMGT archive with just the matched and filtered cg2 sequences</td><td><a href='new_IMGT_cg2.txz'>Download</a></td></tr>" >> $output
|
|
382 echo "<tr><td>An IMGT archive with just the matched and filtered cg3 sequences</td><td><a href='new_IMGT_cg3.txz'>Download</a></td></tr>" >> $output
|
|
383 echo "<tr><td>An IMGT archive with just the matched and filtered cg4 sequences</td><td><a href='new_IMGT_cg4.txz'>Download</a></td></tr>" >> $output
|
114
|
384 echo "<tr><td>An IMGT archive with just the matched and filtered cm sequences</td><td><a href='new_IMGT_cm.txz'>Download</a></td></tr>" >> $output
|
|
385 echo "</table>" >> $output
|
99
|
386
|
110
|
387 echo "</div>" >> $output #downloads tab end
|
55
|
388
|
110
|
389 echo "</div>" >> $output #tabs end
|
0
|
390
|
|
391 echo "</html>" >> $output
|
2
|
392
|
95
|
393 echo "---------------- baseline ----------------"
|
102
|
394 echo "---------------- baseline ----------------<br />" >> $log
|
101
|
395 tmp="$PWD"
|
|
396
|
|
397 mkdir $outdir/baseline
|
|
398
|
|
399
|
|
400 mkdir $outdir/baseline/ca_cg_cm
|
102
|
401 if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
|
|
402 cd $outdir/baseline/ca_cg_cm
|
114
|
403 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
|
102
|
404 else
|
|
405 echo "No sequences" > "$outdir/baseline.txt"
|
|
406 fi
|
101
|
407
|
|
408 mkdir $outdir/baseline/ca
|
102
|
409 if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then
|
|
410 cd $outdir/baseline/ca
|
114
|
411 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt"
|
102
|
412 else
|
|
413 echo "No ca sequences" > "$outdir/baseline_ca.txt"
|
|
414 fi
|
101
|
415
|
|
416 mkdir $outdir/baseline/cg
|
102
|
417 if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then
|
|
418 cd $outdir/baseline/cg
|
114
|
419 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt"
|
102
|
420 else
|
|
421 echo "No cg sequences" > "$outdir/baseline_cg.txt"
|
|
422 fi
|
101
|
423
|
|
424 mkdir $outdir/baseline/cm
|
102
|
425 if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then
|
|
426 cd $outdir/baseline/cm
|
114
|
427 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt"
|
102
|
428 else
|
|
429 echo "No cm sequences" > "$outdir/baseline_cm.txt"
|
|
430 fi
|
47
|
431
|
101
|
432 cd $tmp
|
|
433
|
82
|
434 echo "---------------- naive_output.r ----------------"
|
102
|
435 echo "---------------- naive_output.r ----------------<br />" >> $log
|
55
|
436
|
47
|
437 if [[ "$naive_output" != "None" ]]
|
|
438 then
|
114
|
439 #echo "---------------- imgt_loader.r ----------------"
|
|
440 #echo "---------------- imgt_loader.r ----------------<br />" >> $log
|
50
|
441 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
|
114
|
442 #Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1
|
95
|
443
|
114
|
444 #echo "---------------- naive_output.r ----------------"
|
|
445 #echo "---------------- naive_output.r ----------------<br />" >> $log
|
|
446 #Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1
|
|
447
|
|
448 cp $outdir/new_IMGT_ca.txz ${naive_output_ca}
|
|
449 cp $outdir/new_IMGT_cg.txz ${naive_output_cg}
|
|
450 cp $outdir/new_IMGT_cm.txz ${naive_output_cm}
|
47
|
451 fi
|
|
452
|
81
|
453 echo "</table>" >> $outdir/base_overview.html
|
|
454
|
105
|
455 mv $log $outdir/log.html
|
|
456
|
110
|
457 echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log
|
|
458 echo "<table border = 1>" >> $log
|
|
459 echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log
|
|
460 tIFS="$TMP"
|
|
461 IFS=$'\t'
|
|
462 while read step seq perc
|
|
463 do
|
|
464 echo "<tr>" >> $log
|
|
465 echo "<td>$step</td>" >> $log
|
|
466 echo "<td>$seq</td>" >> $log
|
|
467 echo "<td>${perc}%</td>" >> $log
|
|
468 echo "</tr>" >> $log
|
|
469 done < $outdir/filtering_steps.txt
|
|
470 echo "</table border></center></html>" >> $log
|
|
471
|
|
472 IFS="$tIFS"
|
|
473
|
105
|
474
|
81
|
475 echo "---------------- Done! ----------------"
|
107
|
476 echo "---------------- Done! ----------------<br />" >> $outdir/log.html
|
47
|
477
|
110
|
478
|
|
479
|
|
480
|
|
481
|
|
482
|
|
483
|
|
484
|
|
485
|
|
486
|
|
487
|
|
488
|
|
489
|
|
490
|
|
491
|
|
492
|
|
493
|
|
494
|
|
495
|
|
496
|
|
497
|