0
|
1 #!/bin/bash
|
110
|
2 #set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
102
|
6 log=$3 #becomes the main html page at the end
|
19
|
7 outdir=$4
|
102
|
8 output="$outdir/index.html" #copied to $log location at the end
|
19
|
9 title=$5
|
22
|
10 include_fr1=$6
|
34
|
11 functionality=$7
|
|
12 unique=$8
|
69
|
13 naive_output_ca=$9
|
|
14 naive_output_cg=${10}
|
|
15 naive_output_cm=${11}
|
|
16 filter_unique=${12}
|
|
17 class_filter=${13}
|
114
|
18 empty_region_filter=${14}
|
0
|
19 mkdir $outdir
|
|
20
|
110
|
21 tar -xzf $dir/style.tar.gz -C $outdir
|
|
22
|
55
|
23 echo "---------------- read parameters ----------------"
|
102
|
24 echo "---------------- read parameters ----------------<br />" > $log
|
55
|
25
|
|
26 echo "unpacking IMGT file"
|
|
27
|
35
|
28 type="`file $input`"
|
|
29 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
30 echo "Zip archive"
|
|
31 echo "unzip $input -d $PWD/files/"
|
|
32 unzip $input -d $PWD/files/
|
|
33 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
34 echo "ZX archive"
|
|
35 echo "tar -xJf $input -C $PWD/files/"
|
|
36 mkdir -p $PWD/files/$title
|
|
37 tar -xJf $input -C $PWD/files/$title
|
|
38 fi
|
|
39
|
64
|
40 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt
|
|
41 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt
|
|
42 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt
|
|
43 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt
|
|
44 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt
|
|
45 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt
|
|
46 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt
|
|
47
|
|
48 #cat $PWD/files/*/1_* > $PWD/summary.txt
|
|
49 #cat $PWD/files/*/3_* > $PWD/sequences.txt
|
|
50 #cat $PWD/files/*/5_* > $PWD/aa.txt
|
|
51 #cat $PWD/files/*/6_* > $PWD/junction.txt
|
|
52 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
53 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
54 #cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
55
|
119
|
56 if [[ ${#BLASTN_DIR} -ge 5 ]] ; then
|
|
57 echo "On server, using BLASTN_DIR env: ${BLASTN_DIR}"
|
|
58 else
|
|
59 BLASTN_DIR="/home/galaxy/Downloads/ncbi-blast-2.4.0+/bin"
|
|
60 echo "Dev Galaxy set BLASTN_DIR to: ${BLASTN_DIR}"
|
|
61 fi
|
19
|
62
|
89
|
63 echo "---------------- identification ($method) ----------------"
|
102
|
64 echo "---------------- identification ($method) ----------------<br />" >> $log
|
55
|
65
|
19
|
66 if [[ "${method}" == "custom" ]] ; then
|
|
67 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
68 else
|
119
|
69 #ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
70 #ID_index=$((ID_index+1))
|
|
71 #sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
72 #sequence_index=$((sequence_index+1))
|
|
73
|
|
74 #echo "${ID_index}, ${sequence_index}"
|
19
|
75
|
119
|
76 #cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp
|
|
77 #cat $PWD/summary.txt | tail -n+2 | awk -v id="${ID_index}" -v seq="${sequence_index}" 'BEGIN{FS="\t"} if(NF>10 && length($seq) > 0) {print ">" $id "\n" $seq} {}' > $PWD/sequences.fasta
|
|
78
|
|
79 #cat $PWD/sequences.tmp | grep -B1 -vE ">.*|^$" | grep -v "^\-\-$" > sequences.fasta #filter out empty sequences
|
110
|
80
|
119
|
81 echo "---------------- summary_to_fasta.py ----------------"
|
|
82 echo "---------------- summary_to_fasta.py ----------------<br />" >> $log
|
110
|
83
|
119
|
84 python $dir/summary_to_fasta.py --input $PWD/summary.txt --fasta $PWD/sequences.fasta
|
|
85
|
|
86 #rm $PWD/sequences.tmp
|
19
|
87
|
|
88 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
89 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
90 fi
|
|
91
|
55
|
92 echo "---------------- merge_and_filter.r ----------------"
|
102
|
93 echo "---------------- merge_and_filter.r ----------------<br />" >> $log
|
19
|
94
|
114
|
95 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1
|
0
|
96
|
98
|
97 echo "---------------- creating new IMGT zip ----------------"
|
102
|
98 echo "---------------- creating new IMGT zip ----------------<br />" >> $log
|
95
|
99
|
|
100 mkdir $outdir/new_IMGT
|
|
101
|
|
102 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt"
|
|
103 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
|
|
104 cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt"
|
|
105 cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt"
|
|
106 cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt"
|
|
107 cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt"
|
|
108 cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt"
|
|
109 cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt"
|
|
110 cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt"
|
|
111 cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt"
|
|
112
|
99
|
113 mkdir $outdir/new_IMGT_ca
|
|
114 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca
|
|
115
|
116
|
116 mkdir $outdir/new_IMGT_ca1
|
|
117 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca1
|
|
118
|
|
119 mkdir $outdir/new_IMGT_ca2
|
|
120 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca2
|
|
121
|
99
|
122 mkdir $outdir/new_IMGT_cg
|
|
123 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg
|
|
124
|
116
|
125 mkdir $outdir/new_IMGT_cg1
|
|
126 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg1
|
|
127
|
|
128 mkdir $outdir/new_IMGT_cg2
|
|
129 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg2
|
|
130
|
|
131 mkdir $outdir/new_IMGT_cg3
|
|
132 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg3
|
|
133
|
|
134 mkdir $outdir/new_IMGT_cg4
|
|
135 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg4
|
|
136
|
99
|
137 mkdir $outdir/new_IMGT_cm
|
|
138 cp $outdir/new_IMGT/* $outdir/new_IMGT_cm
|
|
139
|
114
|
140 Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
|
116
|
141
|
114
|
142 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca/ $outdir/merged.txt "ca" 2>&1
|
116
|
143 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca1/ $outdir/merged.txt "ca1" 2>&1
|
|
144 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca2/ $outdir/merged.txt "ca2" 2>&1
|
|
145
|
114
|
146 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg/ $outdir/merged.txt "cg" 2>&1
|
116
|
147 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg1/ $outdir/merged.txt "cg1" 2>&1
|
|
148 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg2/ $outdir/merged.txt "cg2" 2>&1
|
|
149 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg3/ $outdir/merged.txt "cg3" 2>&1
|
|
150 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg4/ $outdir/merged.txt "cg4" 2>&1
|
|
151
|
114
|
152 Rscript $dir/new_imgt.r $outdir/new_IMGT_cm/ $outdir/merged.txt "cm" 2>&1
|
95
|
153
|
|
154
|
|
155 tmp="$PWD"
|
|
156 cd $outdir/new_IMGT/ #tar weirdness...
|
|
157 tar -cJf ../new_IMGT.txz *
|
|
158
|
99
|
159 cd $outdir/new_IMGT_ca/
|
|
160 tar -cJf ../new_IMGT_ca.txz *
|
|
161
|
117
|
162 cd $outdir/new_IMGT_ca1/
|
|
163 tar -cJf ../new_IMGT_ca1.txz *
|
|
164
|
|
165 cd $outdir/new_IMGT_ca2/
|
|
166 tar -cJf ../new_IMGT_ca2.txz *
|
|
167
|
99
|
168 cd $outdir/new_IMGT_cg/
|
|
169 tar -cJf ../new_IMGT_cg.txz *
|
|
170
|
117
|
171 cd $outdir/new_IMGT_cg1/
|
|
172 tar -cJf ../new_IMGT_cg1.txz *
|
|
173
|
|
174 cd $outdir/new_IMGT_cg2/
|
|
175 tar -cJf ../new_IMGT_cg2.txz *
|
|
176
|
|
177 cd $outdir/new_IMGT_cg3/
|
|
178 tar -cJf ../new_IMGT_cg3.txz *
|
|
179
|
|
180 cd $outdir/new_IMGT_cg4/
|
|
181 tar -cJf ../new_IMGT_cg4.txz *
|
|
182
|
99
|
183 cd $outdir/new_IMGT_cm/
|
|
184 tar -cJf ../new_IMGT_cm.txz *
|
|
185
|
95
|
186 cd $tmp
|
|
187
|
55
|
188 echo "---------------- mutation_analysis.r ----------------"
|
102
|
189 echo "---------------- mutation_analysis.r ----------------<br />" >> $log
|
55
|
190
|
82
|
191 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched"
|
4
|
192 echo "R mutation analysis"
|
82
|
193 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1
|
53
|
194
|
55
|
195
|
|
196 echo "---------------- mutation_analysis.py ----------------"
|
102
|
197 echo "---------------- mutation_analysis.py ----------------<br />" >> $log
|
55
|
198
|
82
|
199 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
55
|
200
|
|
201 echo "---------------- aa_histogram.r ----------------"
|
105
|
202 echo "---------------- aa_histogram.r ----------------<br />" >> $log
|
55
|
203
|
107
|
204 Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "ca,cg,cm" $outdir/ 2>&1
|
110
|
205 if [ -e "$outdir/aa_histogram_.png" ]; then
|
|
206 mv $outdir/aa_histogram_.png $outdir/aa_histogram.png
|
|
207 mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt
|
|
208 fi
|
4
|
209
|
0
|
210 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
211
|
53
|
212 funcs=(sum mean median)
|
110
|
213 funcs=(sum)
|
0
|
214
|
82
|
215 echo "---------------- sequence_overview.r ----------------"
|
102
|
216 echo "---------------- sequence_overview.r ----------------<br />" >> $log
|
82
|
217
|
|
218 mkdir $outdir/sequence_overview
|
|
219
|
90
|
220 #Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
|
100
|
221 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
|
82
|
222
|
|
223 echo "<table border='1'>" > $outdir/base_overview.html
|
|
224
|
92
|
225 while IFS=$'\t' read ID class seq A C G T
|
82
|
226 do
|
85
|
227 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
|
82
|
228 done < $outdir/sequence_overview/ntoverview.txt
|
|
229
|
62
|
230 echo "<html><center><h1>$title</h1></center>" > $output
|
110
|
231 echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output
|
|
232 echo "<script type='text/javascript' src='tabber.js'></script>" >> $output
|
|
233 echo "<script type='text/javascript' src='script.js'></script>" >> $output
|
|
234 echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output
|
62
|
235
|
|
236 #display the matched/unmatched for clearity
|
|
237
|
98
|
238 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`"
|
62
|
239 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
|
|
240 total_count=$((matched_count + unmatched_count))
|
|
241 perc_count=$((unmatched_count / total_count * 100))
|
|
242 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
|
|
243 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
|
|
244
|
|
245 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
|
|
246 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
|
|
247 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
|
|
248
|
55
|
249 echo "---------------- main tables ----------------"
|
102
|
250 echo "---------------- main tables ----------------<br />" >> $log
|
110
|
251
|
|
252 echo "<div class='tabber'>" >> $output
|
|
253 echo "<div class='tabbertab' title='SHM Overview'>" >> $output
|
|
254
|
53
|
255 for func in ${funcs[@]}
|
4
|
256 do
|
55
|
257
|
|
258 echo "---------------- $func table ----------------"
|
102
|
259 echo "---------------- $func table ----------------<br />" >> $log
|
55
|
260
|
94
|
261 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt
|
53
|
262
|
98
|
263 echo "<table border='1' width='100%'><caption><h3><a href='data_${func}.txt'>${func} table</a></h3></caption>" >> $output
|
58
|
264 echo "<tr><th>info</th>" >> $output
|
53
|
265 for gene in ${genes[@]}
|
|
266 do
|
|
267 tmp=`cat $outdir/${gene}_${func}_n.txt`
|
|
268 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
269 done
|
78
|
270
|
114
|
271 tmp=`cat $outdir/all_${func}_n.txt`
|
|
272 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
|
78
|
273 tmp=`cat $outdir/unmatched_${func}_n.txt`
|
79
|
274 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th>" >> $output
|
4
|
275
|
78
|
276 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
|
53
|
277 do
|
|
278 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
279 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
280 else
|
114
|
281 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output
|
53
|
282 fi
|
94
|
283 done < $outdir/data_${func}.txt
|
|
284 echo "</table>" >> $output
|
|
285 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output
|
53
|
286 done
|
|
287
|
110
|
288 echo "</div>" >> $output #SHM overview tab end
|
|
289
|
|
290 echo "---------------- images ----------------"
|
|
291 echo "---------------- images ----------------<br />" >> $log
|
|
292
|
|
293 echo "<div class='tabbertab' title='SHM Frequency'>" >> $output
|
|
294
|
|
295 if [ -a $outdir/scatter.png ]
|
|
296 then
|
|
297 echo "<img src='scatter.png'/><br />" >> $output
|
|
298 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
299 fi
|
|
300 if [ -a $outdir/frequency_ranges.png ]
|
|
301 then
|
|
302 echo "<img src='frequency_ranges.png'/><br />" >> $output
|
|
303 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
|
|
304 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
|
|
305 fi
|
|
306
|
|
307 echo "</div>" >> $output #SHM frequency tab end
|
|
308
|
|
309 echo "<div class='tabbertab' title='Transition tables'>" >> $output
|
|
310
|
114
|
311 echo "<table border='0'>" >> $output
|
|
312
|
110
|
313 for gene in ${genes[@]}
|
|
314 do
|
114
|
315 echo "<tr>" >> $output
|
|
316 echo "<td><h1>${gene}</h1></td>" >> $output
|
|
317 echo "<td><img src='transitions_heatmap_${gene}.png' /></td>" >> $output
|
|
318 echo "<td><img src='transitions_stacked_${gene}.png' /></td>" >> $output
|
|
319 echo "<td><table border='1'>" >> $output
|
110
|
320 while IFS=, read from a c g t
|
|
321 do
|
|
322 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
323 done < $outdir/transitions_${gene}_sum.txt
|
114
|
324 echo "</table></td>" >> $output
|
|
325
|
|
326 echo "</tr>" >> $output
|
110
|
327 done
|
55
|
328
|
114
|
329 echo "<tr>" >> $output
|
|
330 echo "<td><h1>All</h1></td>" >> $output
|
|
331 echo "<td><img src='transitions_heatmap_all.png' /></td>" >> $output
|
|
332 echo "<td><img src='transitions_stacked_all.png' /></td>" >> $output
|
|
333 echo "<td><table border='1'>" >> $output
|
110
|
334 while IFS=, read from a c g t
|
|
335 do
|
|
336 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
337 done < $outdir/transitions_all_sum.txt
|
114
|
338 echo "</table></td>" >> $output
|
|
339
|
|
340 echo "</tr>" >> $output
|
|
341
|
110
|
342 echo "</table>" >> $output
|
|
343
|
|
344 echo "</div>" >> $output #transition tables tab end
|
|
345
|
|
346 echo "<div class='tabbertab' title='Antigen Selection'>" >> $output
|
|
347
|
|
348 if [ -a $outdir/aa_histogram.png ]
|
|
349 then
|
|
350 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
351 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
|
352 echo "<img src='aa_histogram_ca.png'/><br />" >> $output
|
|
353 echo "<a href='aa_histogram_ca.txt'>download data</a><br />" >> $output
|
|
354 echo "<img src='aa_histogram_cg.png'/><br />" >> $output
|
|
355 echo "<a href='aa_histogram_cg.txt'>download data</a><br />" >> $output
|
|
356 echo "<img src='aa_histogram_cm.png'/><br />" >> $output
|
|
357 echo "<a href='aa_histogram_cm.txt'>download data</a><br />" >> $output
|
|
358 fi
|
|
359
|
|
360 echo "<embed src='baseline_ca.pdf' width='700px' height='1000px'>" >> $output
|
|
361 echo "<embed src='baseline_cg.pdf' width='700px' height='1000px'>" >> $output
|
|
362 echo "<embed src='baseline_cm.pdf' width='700px' height='1000px'>" >> $output
|
|
363
|
|
364 echo "</div>" >> $output #antigen selection tab end
|
|
365
|
|
366 echo "<div class='tabbertab' title='CSR'>" >> $output
|
|
367
|
|
368 if [ -a $outdir/ca.png ]
|
|
369 then
|
|
370 echo "<img src='ca.png'/><br />" >> $output
|
|
371 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
|
372 fi
|
|
373 if [ -a $outdir/cg.png ]
|
|
374 then
|
|
375 echo "<img src='cg.png'/><br />" >> $output
|
|
376 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
|
377 fi
|
|
378
|
|
379 echo "</div>" >> $output #CSR tab end
|
|
380
|
|
381 echo "<div class='tabbertab' title='Downloads'>" >> $output
|
94
|
382
|
114
|
383 echo "<table border='1' width='700px'>" >> $output
|
|
384 echo "<tr><td>The complete dataset</td><td><a href='merged.txt'>Download</a></td></tr>" >> $output
|
|
385 echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt'>Download</a></td></tr>" >> $output
|
|
386 echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt'>Download</a></td></tr>" >> $output
|
|
387 echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt'>Download</a></td></tr>" >> $output
|
|
388 echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt'>Download</a></td></tr>" >> $output
|
|
389 echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt'>Download</a></td></tr>" >> $output
|
|
390 echo "<tr><td>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</td><td><a href='sequence_overview/index.html'>Download</a></td></tr>" >> $output
|
|
391 echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>Download</a></td></tr>" >> $output
|
|
392 echo "<tr><td>Baseline PDF (<href a='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf'>Download</a></td></tr>" >> $output
|
|
393 echo "<tr><td>Baseline data</td><td><a href='baseline.txt'>Download</a></td></tr>" >> $output
|
|
394 echo "<tr><td>Baseline ca PDF</td><td><a href='baseline_ca.pdf'>Download</a></td></tr>" >> $output
|
|
395 echo "<tr><td>Baseline ca data</td><td><a href='baseline_ca.txt'>Download</a></td></tr>" >> $output
|
|
396 echo "<tr><td>Baseline cg PDF</td><td><a href='baseline_cg.pdf'>Download</a></td></tr>" >> $output
|
|
397 echo "<tr><td>Baseline cg data</td><td><a href='baseline_cg.txt'>Download</a></td></tr>" >> $output
|
|
398 echo "<tr><td>Baseline cm PDF</td><td><a href='baseline_cm.pdf'>Download</a></td></tr>" >> $output
|
|
399 echo "<tr><td>Baseline cm data</td><td><a href='baseline_cm.txt'>Download</a></td></tr>" >> $output
|
|
400 #echo "<tr><td></td><td><a href='IgAT.zip'>IgAT zip</a></td></tr>" >> $output
|
|
401 #echo "<tr><td></td><td><a href='IgAT_ca.zip'>IgAT ca zip</a></td></tr>" >> $output
|
|
402 #echo "<tr><td></td><td><a href='IgAT_cg.zip'>IgAT cg zip</a></td></tr>" >> $output
|
|
403 #echo "<tr><td></td><td><a href='IgAT_cm.zip'>IgAT cm zip</a></td></tr>" >> $output
|
|
404 echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz'>Download</a></td></tr>" >> $output
|
|
405 echo "<tr><td>An IMGT archive with just the matched and filtered ca sequences</td><td><a href='new_IMGT_ca.txz'>Download</a></td></tr>" >> $output
|
116
|
406 echo "<tr><td>An IMGT archive with just the matched and filtered ca1 sequences</td><td><a href='new_IMGT_ca1.txz'>Download</a></td></tr>" >> $output
|
|
407 echo "<tr><td>An IMGT archive with just the matched and filtered ca2 sequences</td><td><a href='new_IMGT_ca2.txz'>Download</a></td></tr>" >> $output
|
114
|
408 echo "<tr><td>An IMGT archive with just the matched and filtered cg sequences</td><td><a href='new_IMGT_cg.txz'>Download</a></td></tr>" >> $output
|
116
|
409 echo "<tr><td>An IMGT archive with just the matched and filtered cg1 sequences</td><td><a href='new_IMGT_cg1.txz'>Download</a></td></tr>" >> $output
|
|
410 echo "<tr><td>An IMGT archive with just the matched and filtered cg2 sequences</td><td><a href='new_IMGT_cg2.txz'>Download</a></td></tr>" >> $output
|
|
411 echo "<tr><td>An IMGT archive with just the matched and filtered cg3 sequences</td><td><a href='new_IMGT_cg3.txz'>Download</a></td></tr>" >> $output
|
|
412 echo "<tr><td>An IMGT archive with just the matched and filtered cg4 sequences</td><td><a href='new_IMGT_cg4.txz'>Download</a></td></tr>" >> $output
|
114
|
413 echo "<tr><td>An IMGT archive with just the matched and filtered cm sequences</td><td><a href='new_IMGT_cm.txz'>Download</a></td></tr>" >> $output
|
|
414 echo "</table>" >> $output
|
99
|
415
|
110
|
416 echo "</div>" >> $output #downloads tab end
|
55
|
417
|
110
|
418 echo "</div>" >> $output #tabs end
|
0
|
419
|
|
420 echo "</html>" >> $output
|
2
|
421
|
95
|
422 echo "---------------- baseline ----------------"
|
102
|
423 echo "---------------- baseline ----------------<br />" >> $log
|
101
|
424 tmp="$PWD"
|
|
425
|
|
426 mkdir $outdir/baseline
|
|
427
|
|
428
|
|
429 mkdir $outdir/baseline/ca_cg_cm
|
102
|
430 if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
|
|
431 cd $outdir/baseline/ca_cg_cm
|
114
|
432 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
|
102
|
433 else
|
|
434 echo "No sequences" > "$outdir/baseline.txt"
|
|
435 fi
|
101
|
436
|
|
437 mkdir $outdir/baseline/ca
|
102
|
438 if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then
|
|
439 cd $outdir/baseline/ca
|
114
|
440 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt"
|
102
|
441 else
|
|
442 echo "No ca sequences" > "$outdir/baseline_ca.txt"
|
|
443 fi
|
101
|
444
|
|
445 mkdir $outdir/baseline/cg
|
102
|
446 if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then
|
|
447 cd $outdir/baseline/cg
|
114
|
448 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt"
|
102
|
449 else
|
|
450 echo "No cg sequences" > "$outdir/baseline_cg.txt"
|
|
451 fi
|
101
|
452
|
|
453 mkdir $outdir/baseline/cm
|
102
|
454 if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then
|
|
455 cd $outdir/baseline/cm
|
114
|
456 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt"
|
102
|
457 else
|
|
458 echo "No cm sequences" > "$outdir/baseline_cm.txt"
|
|
459 fi
|
47
|
460
|
101
|
461 cd $tmp
|
|
462
|
82
|
463 echo "---------------- naive_output.r ----------------"
|
102
|
464 echo "---------------- naive_output.r ----------------<br />" >> $log
|
55
|
465
|
47
|
466 if [[ "$naive_output" != "None" ]]
|
|
467 then
|
114
|
468 #echo "---------------- imgt_loader.r ----------------"
|
|
469 #echo "---------------- imgt_loader.r ----------------<br />" >> $log
|
50
|
470 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
|
114
|
471 #Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1
|
95
|
472
|
114
|
473 #echo "---------------- naive_output.r ----------------"
|
|
474 #echo "---------------- naive_output.r ----------------<br />" >> $log
|
|
475 #Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1
|
|
476
|
|
477 cp $outdir/new_IMGT_ca.txz ${naive_output_ca}
|
|
478 cp $outdir/new_IMGT_cg.txz ${naive_output_cg}
|
|
479 cp $outdir/new_IMGT_cm.txz ${naive_output_cm}
|
47
|
480 fi
|
|
481
|
81
|
482 echo "</table>" >> $outdir/base_overview.html
|
|
483
|
105
|
484 mv $log $outdir/log.html
|
|
485
|
110
|
486 echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log
|
|
487 echo "<table border = 1>" >> $log
|
|
488 echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log
|
|
489 tIFS="$TMP"
|
|
490 IFS=$'\t'
|
|
491 while read step seq perc
|
|
492 do
|
|
493 echo "<tr>" >> $log
|
|
494 echo "<td>$step</td>" >> $log
|
|
495 echo "<td>$seq</td>" >> $log
|
|
496 echo "<td>${perc}%</td>" >> $log
|
|
497 echo "</tr>" >> $log
|
|
498 done < $outdir/filtering_steps.txt
|
|
499 echo "</table border></center></html>" >> $log
|
|
500
|
|
501 IFS="$tIFS"
|
|
502
|
105
|
503
|
81
|
504 echo "---------------- Done! ----------------"
|
107
|
505 echo "---------------- Done! ----------------<br />" >> $outdir/log.html
|
47
|
506
|
110
|
507
|
|
508
|
|
509
|
|
510
|
|
511
|
|
512
|
|
513
|
|
514
|
|
515
|
|
516
|
|
517
|
|
518
|
|
519
|
|
520
|
|
521
|
|
522
|
|
523
|
|
524
|
|
525
|
|
526
|