0
|
1 #!/bin/bash
|
110
|
2 #set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
102
|
6 log=$3 #becomes the main html page at the end
|
19
|
7 outdir=$4
|
102
|
8 output="$outdir/index.html" #copied to $log location at the end
|
19
|
9 title=$5
|
22
|
10 include_fr1=$6
|
34
|
11 functionality=$7
|
|
12 unique=$8
|
69
|
13 naive_output_ca=$9
|
|
14 naive_output_cg=${10}
|
|
15 naive_output_cm=${11}
|
|
16 filter_unique=${12}
|
|
17 class_filter=${13}
|
114
|
18 empty_region_filter=${14}
|
0
|
19 mkdir $outdir
|
|
20
|
110
|
21 tar -xzf $dir/style.tar.gz -C $outdir
|
|
22
|
55
|
23 echo "---------------- read parameters ----------------"
|
102
|
24 echo "---------------- read parameters ----------------<br />" > $log
|
55
|
25
|
|
26 echo "unpacking IMGT file"
|
|
27
|
35
|
28 type="`file $input`"
|
|
29 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
30 echo "Zip archive"
|
|
31 echo "unzip $input -d $PWD/files/"
|
|
32 unzip $input -d $PWD/files/
|
|
33 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
34 echo "ZX archive"
|
|
35 echo "tar -xJf $input -C $PWD/files/"
|
|
36 mkdir -p $PWD/files/$title
|
|
37 tar -xJf $input -C $PWD/files/$title
|
|
38 fi
|
|
39
|
64
|
40 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt
|
|
41 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt
|
|
42 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt
|
|
43 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt
|
|
44 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt
|
|
45 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt
|
|
46 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt
|
|
47
|
|
48 #cat $PWD/files/*/1_* > $PWD/summary.txt
|
|
49 #cat $PWD/files/*/3_* > $PWD/sequences.txt
|
|
50 #cat $PWD/files/*/5_* > $PWD/aa.txt
|
|
51 #cat $PWD/files/*/6_* > $PWD/junction.txt
|
|
52 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
53 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
54 #cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
55
|
26
|
56 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
57
|
|
58 echo "${BLASTN_DIR}"
|
|
59
|
89
|
60 echo "---------------- identification ($method) ----------------"
|
102
|
61 echo "---------------- identification ($method) ----------------<br />" >> $log
|
55
|
62
|
19
|
63 if [[ "${method}" == "custom" ]] ; then
|
|
64 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
65 else
|
|
66 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
67 ID_index=$((ID_index+1))
|
|
68 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
69 sequence_index=$((sequence_index+1))
|
|
70
|
110
|
71 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp
|
|
72
|
|
73 cat $PWD/sequences.tmp | grep -B1 -vE "^$" sequences.fasta #filter out empty sequences
|
|
74
|
|
75 rm $PWD/sequences.tmp
|
19
|
76
|
|
77 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
78 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
79 fi
|
|
80
|
55
|
81 echo "---------------- merge_and_filter.r ----------------"
|
102
|
82 echo "---------------- merge_and_filter.r ----------------<br />" >> $log
|
19
|
83
|
114
|
84 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1
|
0
|
85
|
98
|
86 echo "---------------- creating new IMGT zip ----------------"
|
102
|
87 echo "---------------- creating new IMGT zip ----------------<br />" >> $log
|
95
|
88
|
|
89 mkdir $outdir/new_IMGT
|
|
90
|
|
91 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt"
|
|
92 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
|
|
93 cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt"
|
|
94 cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt"
|
|
95 cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt"
|
|
96 cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt"
|
|
97 cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt"
|
|
98 cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt"
|
|
99 cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt"
|
|
100 cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt"
|
|
101
|
99
|
102 mkdir $outdir/new_IMGT_ca
|
|
103 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca
|
|
104
|
|
105 mkdir $outdir/new_IMGT_cg
|
|
106 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg
|
|
107
|
|
108 mkdir $outdir/new_IMGT_cm
|
|
109 cp $outdir/new_IMGT/* $outdir/new_IMGT_cm
|
|
110
|
114
|
111 Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
|
|
112 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca/ $outdir/merged.txt "ca" 2>&1
|
|
113 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg/ $outdir/merged.txt "cg" 2>&1
|
|
114 Rscript $dir/new_imgt.r $outdir/new_IMGT_cm/ $outdir/merged.txt "cm" 2>&1
|
95
|
115
|
|
116
|
|
117 tmp="$PWD"
|
|
118 cd $outdir/new_IMGT/ #tar weirdness...
|
|
119 tar -cJf ../new_IMGT.txz *
|
|
120
|
99
|
121 cd $outdir/new_IMGT_ca/
|
|
122 tar -cJf ../new_IMGT_ca.txz *
|
|
123
|
|
124 cd $outdir/new_IMGT_cg/
|
|
125 tar -cJf ../new_IMGT_cg.txz *
|
|
126
|
|
127 cd $outdir/new_IMGT_cm/
|
|
128 tar -cJf ../new_IMGT_cm.txz *
|
|
129
|
95
|
130 cd $tmp
|
|
131
|
55
|
132 echo "---------------- mutation_analysis.r ----------------"
|
102
|
133 echo "---------------- mutation_analysis.r ----------------<br />" >> $log
|
55
|
134
|
82
|
135 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched"
|
4
|
136 echo "R mutation analysis"
|
82
|
137 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1
|
53
|
138
|
55
|
139
|
|
140 echo "---------------- mutation_analysis.py ----------------"
|
102
|
141 echo "---------------- mutation_analysis.py ----------------<br />" >> $log
|
55
|
142
|
82
|
143 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
55
|
144
|
|
145 echo "---------------- aa_histogram.r ----------------"
|
105
|
146 echo "---------------- aa_histogram.r ----------------<br />" >> $log
|
55
|
147
|
107
|
148 Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "ca,cg,cm" $outdir/ 2>&1
|
110
|
149 if [ -e "$outdir/aa_histogram_.png" ]; then
|
|
150 mv $outdir/aa_histogram_.png $outdir/aa_histogram.png
|
|
151 mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt
|
|
152 fi
|
4
|
153
|
0
|
154 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
155
|
53
|
156 funcs=(sum mean median)
|
110
|
157 funcs=(sum)
|
0
|
158
|
82
|
159 echo "---------------- sequence_overview.r ----------------"
|
102
|
160 echo "---------------- sequence_overview.r ----------------<br />" >> $log
|
82
|
161
|
|
162 mkdir $outdir/sequence_overview
|
|
163
|
90
|
164 #Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
|
100
|
165 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
|
82
|
166
|
|
167 echo "<table border='1'>" > $outdir/base_overview.html
|
|
168
|
92
|
169 while IFS=$'\t' read ID class seq A C G T
|
82
|
170 do
|
85
|
171 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
|
82
|
172 done < $outdir/sequence_overview/ntoverview.txt
|
|
173
|
62
|
174 echo "<html><center><h1>$title</h1></center>" > $output
|
110
|
175 echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output
|
|
176 echo "<script type='text/javascript' src='tabber.js'></script>" >> $output
|
|
177 echo "<script type='text/javascript' src='script.js'></script>" >> $output
|
|
178 echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output
|
62
|
179
|
|
180 #display the matched/unmatched for clearity
|
|
181
|
98
|
182 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`"
|
62
|
183 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
|
|
184 total_count=$((matched_count + unmatched_count))
|
|
185 perc_count=$((unmatched_count / total_count * 100))
|
|
186 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
|
|
187 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
|
|
188
|
|
189 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
|
|
190 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
|
|
191 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
|
|
192
|
55
|
193 echo "---------------- main tables ----------------"
|
102
|
194 echo "---------------- main tables ----------------<br />" >> $log
|
110
|
195
|
|
196 echo "<div class='tabber'>" >> $output
|
|
197 echo "<div class='tabbertab' title='SHM Overview'>" >> $output
|
|
198
|
53
|
199 for func in ${funcs[@]}
|
4
|
200 do
|
55
|
201
|
|
202 echo "---------------- $func table ----------------"
|
102
|
203 echo "---------------- $func table ----------------<br />" >> $log
|
55
|
204
|
94
|
205 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt
|
53
|
206
|
98
|
207 echo "<table border='1' width='100%'><caption><h3><a href='data_${func}.txt'>${func} table</a></h3></caption>" >> $output
|
58
|
208 echo "<tr><th>info</th>" >> $output
|
53
|
209 for gene in ${genes[@]}
|
|
210 do
|
|
211 tmp=`cat $outdir/${gene}_${func}_n.txt`
|
|
212 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
213 done
|
78
|
214
|
114
|
215 tmp=`cat $outdir/all_${func}_n.txt`
|
|
216 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
|
78
|
217 tmp=`cat $outdir/unmatched_${func}_n.txt`
|
79
|
218 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th>" >> $output
|
4
|
219
|
78
|
220 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
|
53
|
221 do
|
|
222 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
223 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
224 else
|
114
|
225 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output
|
53
|
226 fi
|
94
|
227 done < $outdir/data_${func}.txt
|
|
228 echo "</table>" >> $output
|
|
229 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output
|
53
|
230 done
|
|
231
|
110
|
232 echo "</div>" >> $output #SHM overview tab end
|
|
233
|
|
234 echo "---------------- images ----------------"
|
|
235 echo "---------------- images ----------------<br />" >> $log
|
|
236
|
|
237 echo "<div class='tabbertab' title='SHM Frequency'>" >> $output
|
|
238
|
|
239 if [ -a $outdir/scatter.png ]
|
|
240 then
|
|
241 echo "<img src='scatter.png'/><br />" >> $output
|
|
242 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
243 fi
|
|
244 if [ -a $outdir/frequency_ranges.png ]
|
|
245 then
|
|
246 echo "<img src='frequency_ranges.png'/><br />" >> $output
|
|
247 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
|
|
248 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
|
|
249 fi
|
|
250
|
|
251 echo "</div>" >> $output #SHM frequency tab end
|
|
252
|
|
253 echo "<div class='tabbertab' title='Transition tables'>" >> $output
|
|
254
|
114
|
255 echo "<table border='0'>" >> $output
|
|
256
|
110
|
257 for gene in ${genes[@]}
|
|
258 do
|
114
|
259 echo "<tr>" >> $output
|
|
260 echo "<td><h1>${gene}</h1></td>" >> $output
|
|
261 echo "<td><img src='transitions_heatmap_${gene}.png' /></td>" >> $output
|
|
262 echo "<td><img src='transitions_stacked_${gene}.png' /></td>" >> $output
|
|
263 echo "<td><table border='1'>" >> $output
|
110
|
264 while IFS=, read from a c g t
|
|
265 do
|
|
266 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
267 done < $outdir/transitions_${gene}_sum.txt
|
114
|
268 echo "</table></td>" >> $output
|
|
269
|
|
270 echo "</tr>" >> $output
|
110
|
271 done
|
55
|
272
|
114
|
273 echo "<tr>" >> $output
|
|
274 echo "<td><h1>All</h1></td>" >> $output
|
|
275 echo "<td><img src='transitions_heatmap_all.png' /></td>" >> $output
|
|
276 echo "<td><img src='transitions_stacked_all.png' /></td>" >> $output
|
|
277 echo "<td><table border='1'>" >> $output
|
110
|
278 while IFS=, read from a c g t
|
|
279 do
|
|
280 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
281 done < $outdir/transitions_all_sum.txt
|
114
|
282 echo "</table></td>" >> $output
|
|
283
|
|
284 echo "</tr>" >> $output
|
|
285
|
110
|
286 echo "</table>" >> $output
|
|
287
|
|
288 echo "</div>" >> $output #transition tables tab end
|
|
289
|
|
290 echo "<div class='tabbertab' title='Antigen Selection'>" >> $output
|
|
291
|
|
292 if [ -a $outdir/aa_histogram.png ]
|
|
293 then
|
|
294 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
295 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
|
296 echo "<img src='aa_histogram_ca.png'/><br />" >> $output
|
|
297 echo "<a href='aa_histogram_ca.txt'>download data</a><br />" >> $output
|
|
298 echo "<img src='aa_histogram_cg.png'/><br />" >> $output
|
|
299 echo "<a href='aa_histogram_cg.txt'>download data</a><br />" >> $output
|
|
300 echo "<img src='aa_histogram_cm.png'/><br />" >> $output
|
|
301 echo "<a href='aa_histogram_cm.txt'>download data</a><br />" >> $output
|
|
302 fi
|
|
303
|
|
304 echo "<embed src='baseline_ca.pdf' width='700px' height='1000px'>" >> $output
|
|
305 echo "<embed src='baseline_cg.pdf' width='700px' height='1000px'>" >> $output
|
|
306 echo "<embed src='baseline_cm.pdf' width='700px' height='1000px'>" >> $output
|
|
307
|
|
308 echo "</div>" >> $output #antigen selection tab end
|
|
309
|
|
310 echo "<div class='tabbertab' title='CSR'>" >> $output
|
|
311
|
|
312 if [ -a $outdir/ca.png ]
|
|
313 then
|
|
314 echo "<img src='ca.png'/><br />" >> $output
|
|
315 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
|
316 fi
|
|
317 if [ -a $outdir/cg.png ]
|
|
318 then
|
|
319 echo "<img src='cg.png'/><br />" >> $output
|
|
320 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
|
321 fi
|
|
322
|
|
323 echo "</div>" >> $output #CSR tab end
|
|
324
|
|
325 echo "<div class='tabbertab' title='Downloads'>" >> $output
|
94
|
326
|
114
|
327 echo "<table border='1' width='700px'>" >> $output
|
|
328 echo "<tr><td>The complete dataset</td><td><a href='merged.txt'>Download</a></td></tr>" >> $output
|
|
329 echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt'>Download</a></td></tr>" >> $output
|
|
330 echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt'>Download</a></td></tr>" >> $output
|
|
331 echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt'>Download</a></td></tr>" >> $output
|
|
332 echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt'>Download</a></td></tr>" >> $output
|
|
333 echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt'>Download</a></td></tr>" >> $output
|
|
334 echo "<tr><td>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</td><td><a href='sequence_overview/index.html'>Download</a></td></tr>" >> $output
|
|
335 echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>Download</a></td></tr>" >> $output
|
|
336 echo "<tr><td>Baseline PDF (<href a='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf'>Download</a></td></tr>" >> $output
|
|
337 echo "<tr><td>Baseline data</td><td><a href='baseline.txt'>Download</a></td></tr>" >> $output
|
|
338 echo "<tr><td>Baseline ca PDF</td><td><a href='baseline_ca.pdf'>Download</a></td></tr>" >> $output
|
|
339 echo "<tr><td>Baseline ca data</td><td><a href='baseline_ca.txt'>Download</a></td></tr>" >> $output
|
|
340 echo "<tr><td>Baseline cg PDF</td><td><a href='baseline_cg.pdf'>Download</a></td></tr>" >> $output
|
|
341 echo "<tr><td>Baseline cg data</td><td><a href='baseline_cg.txt'>Download</a></td></tr>" >> $output
|
|
342 echo "<tr><td>Baseline cm PDF</td><td><a href='baseline_cm.pdf'>Download</a></td></tr>" >> $output
|
|
343 echo "<tr><td>Baseline cm data</td><td><a href='baseline_cm.txt'>Download</a></td></tr>" >> $output
|
|
344 #echo "<tr><td></td><td><a href='IgAT.zip'>IgAT zip</a></td></tr>" >> $output
|
|
345 #echo "<tr><td></td><td><a href='IgAT_ca.zip'>IgAT ca zip</a></td></tr>" >> $output
|
|
346 #echo "<tr><td></td><td><a href='IgAT_cg.zip'>IgAT cg zip</a></td></tr>" >> $output
|
|
347 #echo "<tr><td></td><td><a href='IgAT_cm.zip'>IgAT cm zip</a></td></tr>" >> $output
|
|
348 echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz'>Download</a></td></tr>" >> $output
|
|
349 echo "<tr><td>An IMGT archive with just the matched and filtered ca sequences</td><td><a href='new_IMGT_ca.txz'>Download</a></td></tr>" >> $output
|
|
350 echo "<tr><td>An IMGT archive with just the matched and filtered cg sequences</td><td><a href='new_IMGT_cg.txz'>Download</a></td></tr>" >> $output
|
|
351 echo "<tr><td>An IMGT archive with just the matched and filtered cm sequences</td><td><a href='new_IMGT_cm.txz'>Download</a></td></tr>" >> $output
|
|
352 echo "</table>" >> $output
|
99
|
353
|
110
|
354 echo "</div>" >> $output #downloads tab end
|
55
|
355
|
110
|
356 echo "</div>" >> $output #tabs end
|
0
|
357
|
|
358 echo "</html>" >> $output
|
2
|
359
|
95
|
360 echo "---------------- baseline ----------------"
|
102
|
361 echo "---------------- baseline ----------------<br />" >> $log
|
101
|
362 tmp="$PWD"
|
|
363
|
|
364 mkdir $outdir/baseline
|
|
365
|
|
366
|
|
367 mkdir $outdir/baseline/ca_cg_cm
|
102
|
368 if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
|
|
369 cd $outdir/baseline/ca_cg_cm
|
114
|
370 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
|
102
|
371 else
|
|
372 echo "No sequences" > "$outdir/baseline.txt"
|
|
373 fi
|
101
|
374
|
|
375 mkdir $outdir/baseline/ca
|
102
|
376 if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then
|
|
377 cd $outdir/baseline/ca
|
114
|
378 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt"
|
102
|
379 else
|
|
380 echo "No ca sequences" > "$outdir/baseline_ca.txt"
|
|
381 fi
|
101
|
382
|
|
383 mkdir $outdir/baseline/cg
|
102
|
384 if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then
|
|
385 cd $outdir/baseline/cg
|
114
|
386 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt"
|
102
|
387 else
|
|
388 echo "No cg sequences" > "$outdir/baseline_cg.txt"
|
|
389 fi
|
101
|
390
|
|
391 mkdir $outdir/baseline/cm
|
102
|
392 if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then
|
|
393 cd $outdir/baseline/cm
|
114
|
394 bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt"
|
102
|
395 else
|
|
396 echo "No cm sequences" > "$outdir/baseline_cm.txt"
|
|
397 fi
|
47
|
398
|
101
|
399 cd $tmp
|
|
400
|
82
|
401 echo "---------------- naive_output.r ----------------"
|
102
|
402 echo "---------------- naive_output.r ----------------<br />" >> $log
|
55
|
403
|
47
|
404 if [[ "$naive_output" != "None" ]]
|
|
405 then
|
114
|
406 #echo "---------------- imgt_loader.r ----------------"
|
|
407 #echo "---------------- imgt_loader.r ----------------<br />" >> $log
|
50
|
408 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
|
114
|
409 #Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1
|
95
|
410
|
114
|
411 #echo "---------------- naive_output.r ----------------"
|
|
412 #echo "---------------- naive_output.r ----------------<br />" >> $log
|
|
413 #Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1
|
|
414
|
|
415 cp $outdir/new_IMGT_ca.txz ${naive_output_ca}
|
|
416 cp $outdir/new_IMGT_cg.txz ${naive_output_cg}
|
|
417 cp $outdir/new_IMGT_cm.txz ${naive_output_cm}
|
47
|
418 fi
|
|
419
|
81
|
420 echo "</table>" >> $outdir/base_overview.html
|
|
421
|
105
|
422 mv $log $outdir/log.html
|
|
423
|
110
|
424 echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log
|
|
425 echo "<table border = 1>" >> $log
|
|
426 echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log
|
|
427 tIFS="$TMP"
|
|
428 IFS=$'\t'
|
|
429 while read step seq perc
|
|
430 do
|
|
431 echo "<tr>" >> $log
|
|
432 echo "<td>$step</td>" >> $log
|
|
433 echo "<td>$seq</td>" >> $log
|
|
434 echo "<td>${perc}%</td>" >> $log
|
|
435 echo "</tr>" >> $log
|
|
436 done < $outdir/filtering_steps.txt
|
|
437 echo "</table border></center></html>" >> $log
|
|
438
|
|
439 IFS="$tIFS"
|
|
440
|
105
|
441
|
81
|
442 echo "---------------- Done! ----------------"
|
107
|
443 echo "---------------- Done! ----------------<br />" >> $outdir/log.html
|
47
|
444
|
110
|
445
|
|
446
|
|
447
|
|
448
|
|
449
|
|
450
|
|
451
|
|
452
|
|
453
|
|
454
|
|
455
|
|
456
|
|
457
|
|
458
|
|
459
|
|
460
|
|
461
|
|
462
|
|
463
|
|
464
|