0
|
1 #!/bin/bash
|
110
|
2 #set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
102
|
6 log=$3 #becomes the main html page at the end
|
19
|
7 outdir=$4
|
102
|
8 output="$outdir/index.html" #copied to $log location at the end
|
19
|
9 title=$5
|
22
|
10 include_fr1=$6
|
34
|
11 functionality=$7
|
|
12 unique=$8
|
69
|
13 naive_output_ca=$9
|
|
14 naive_output_cg=${10}
|
|
15 naive_output_cm=${11}
|
|
16 filter_unique=${12}
|
|
17 class_filter=${13}
|
0
|
18 mkdir $outdir
|
|
19
|
110
|
20 tar -xzf $dir/style.tar.gz -C $outdir
|
|
21
|
55
|
22 echo "---------------- read parameters ----------------"
|
102
|
23 echo "---------------- read parameters ----------------<br />" > $log
|
55
|
24
|
|
25 echo "unpacking IMGT file"
|
|
26
|
35
|
27 type="`file $input`"
|
|
28 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
29 echo "Zip archive"
|
|
30 echo "unzip $input -d $PWD/files/"
|
|
31 unzip $input -d $PWD/files/
|
|
32 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
33 echo "ZX archive"
|
|
34 echo "tar -xJf $input -C $PWD/files/"
|
|
35 mkdir -p $PWD/files/$title
|
|
36 tar -xJf $input -C $PWD/files/$title
|
|
37 fi
|
|
38
|
64
|
39 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt
|
|
40 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt
|
|
41 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt
|
|
42 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt
|
|
43 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt
|
|
44 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt
|
|
45 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt
|
|
46
|
|
47 #cat $PWD/files/*/1_* > $PWD/summary.txt
|
|
48 #cat $PWD/files/*/3_* > $PWD/sequences.txt
|
|
49 #cat $PWD/files/*/5_* > $PWD/aa.txt
|
|
50 #cat $PWD/files/*/6_* > $PWD/junction.txt
|
|
51 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
52 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
53 #cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
54
|
26
|
55 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
56
|
|
57 echo "${BLASTN_DIR}"
|
|
58
|
89
|
59 echo "---------------- identification ($method) ----------------"
|
102
|
60 echo "---------------- identification ($method) ----------------<br />" >> $log
|
55
|
61
|
19
|
62 if [[ "${method}" == "custom" ]] ; then
|
|
63 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
64 else
|
|
65 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
66 ID_index=$((ID_index+1))
|
|
67 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
68 sequence_index=$((sequence_index+1))
|
|
69
|
110
|
70 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp
|
|
71
|
|
72 cat $PWD/sequences.tmp | grep -B1 -vE "^$" sequences.fasta #filter out empty sequences
|
|
73
|
|
74 rm $PWD/sequences.tmp
|
19
|
75
|
|
76 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
77 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
78 fi
|
|
79
|
55
|
80 echo "---------------- merge_and_filter.r ----------------"
|
102
|
81 echo "---------------- merge_and_filter.r ----------------<br />" >> $log
|
19
|
82
|
90
|
83 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} 2>&1
|
0
|
84
|
98
|
85 echo "---------------- creating new IMGT zip ----------------"
|
102
|
86 echo "---------------- creating new IMGT zip ----------------<br />" >> $log
|
95
|
87
|
|
88 mkdir $outdir/new_IMGT
|
|
89
|
|
90 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt"
|
|
91 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
|
|
92 cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt"
|
|
93 cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt"
|
|
94 cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt"
|
|
95 cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt"
|
|
96 cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt"
|
|
97 cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt"
|
|
98 cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt"
|
|
99 cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt"
|
|
100
|
99
|
101 mkdir $outdir/new_IMGT_ca
|
|
102 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca
|
|
103
|
|
104 mkdir $outdir/new_IMGT_cg
|
|
105 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg
|
|
106
|
|
107 mkdir $outdir/new_IMGT_cm
|
|
108 cp $outdir/new_IMGT/* $outdir/new_IMGT_cm
|
|
109
|
|
110 Rscript $dir/tmp/igat.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
|
101
|
111 Rscript $dir/tmp/igat.r $outdir/new_IMGT_ca/ $outdir/merged.txt "ca" 2>&1
|
|
112 Rscript $dir/tmp/igat.r $outdir/new_IMGT_cg/ $outdir/merged.txt "cg" 2>&1
|
|
113 Rscript $dir/tmp/igat.r $outdir/new_IMGT_cm/ $outdir/merged.txt "cm" 2>&1
|
95
|
114
|
|
115
|
|
116 tmp="$PWD"
|
|
117 cd $outdir/new_IMGT/ #tar weirdness...
|
|
118 tar -cJf ../new_IMGT.txz *
|
|
119 cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT/IgAT.xlsm
|
|
120 zip -r ../IgAT.zip *
|
|
121
|
99
|
122 cd $outdir/new_IMGT_ca/
|
|
123 tar -cJf ../new_IMGT_ca.txz *
|
|
124 cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT_ca/IgAT.xlsm
|
|
125 zip -r ../IgAT_ca.zip *
|
|
126
|
|
127 cd $outdir/new_IMGT_cg/
|
|
128 tar -cJf ../new_IMGT_cg.txz *
|
|
129 cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT_cg/IgAT.xlsm
|
|
130 zip -r ../IgAT_cg.zip *
|
|
131
|
|
132 cd $outdir/new_IMGT_cm/
|
|
133 tar -cJf ../new_IMGT_cm.txz *
|
|
134 cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT_cm/IgAT.xlsm
|
|
135 zip -r ../IgAT_cm.zip *
|
|
136
|
95
|
137 cd $tmp
|
|
138
|
55
|
139 echo "---------------- mutation_analysis.r ----------------"
|
102
|
140 echo "---------------- mutation_analysis.r ----------------<br />" >> $log
|
55
|
141
|
82
|
142 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched"
|
4
|
143 echo "R mutation analysis"
|
82
|
144 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1
|
53
|
145
|
55
|
146
|
|
147 echo "---------------- mutation_analysis.py ----------------"
|
102
|
148 echo "---------------- mutation_analysis.py ----------------<br />" >> $log
|
55
|
149
|
82
|
150 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
55
|
151
|
|
152 echo "---------------- aa_histogram.r ----------------"
|
105
|
153 echo "---------------- aa_histogram.r ----------------<br />" >> $log
|
55
|
154
|
107
|
155 Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "ca,cg,cm" $outdir/ 2>&1
|
110
|
156 if [ -e "$outdir/aa_histogram_.png" ]; then
|
|
157 mv $outdir/aa_histogram_.png $outdir/aa_histogram.png
|
|
158 mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt
|
|
159 fi
|
4
|
160
|
0
|
161 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
162
|
53
|
163 funcs=(sum mean median)
|
110
|
164 funcs=(sum)
|
0
|
165
|
82
|
166 echo "---------------- sequence_overview.r ----------------"
|
102
|
167 echo "---------------- sequence_overview.r ----------------<br />" >> $log
|
82
|
168
|
|
169 mkdir $outdir/sequence_overview
|
|
170
|
90
|
171 #Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
|
100
|
172 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
|
82
|
173
|
|
174 echo "<table border='1'>" > $outdir/base_overview.html
|
|
175
|
92
|
176 while IFS=$'\t' read ID class seq A C G T
|
82
|
177 do
|
85
|
178 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
|
82
|
179 done < $outdir/sequence_overview/ntoverview.txt
|
|
180
|
62
|
181 echo "<html><center><h1>$title</h1></center>" > $output
|
110
|
182 echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output
|
|
183 echo "<script type='text/javascript' src='tabber.js'></script>" >> $output
|
|
184 echo "<script type='text/javascript' src='script.js'></script>" >> $output
|
|
185 echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output
|
62
|
186
|
|
187 #display the matched/unmatched for clearity
|
|
188
|
98
|
189 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`"
|
62
|
190 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
|
|
191 total_count=$((matched_count + unmatched_count))
|
|
192 perc_count=$((unmatched_count / total_count * 100))
|
|
193 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
|
|
194 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
|
|
195
|
|
196 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
|
|
197 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
|
|
198 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
|
|
199
|
55
|
200 echo "---------------- main tables ----------------"
|
102
|
201 echo "---------------- main tables ----------------<br />" >> $log
|
110
|
202
|
|
203 echo "<div class='tabber'>" >> $output
|
|
204 echo "<div class='tabbertab' title='SHM Overview'>" >> $output
|
|
205
|
53
|
206 for func in ${funcs[@]}
|
4
|
207 do
|
55
|
208
|
|
209 echo "---------------- $func table ----------------"
|
102
|
210 echo "---------------- $func table ----------------<br />" >> $log
|
55
|
211
|
94
|
212 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt
|
53
|
213
|
98
|
214 echo "<table border='1' width='100%'><caption><h3><a href='data_${func}.txt'>${func} table</a></h3></caption>" >> $output
|
58
|
215 echo "<tr><th>info</th>" >> $output
|
53
|
216 for gene in ${genes[@]}
|
|
217 do
|
|
218 tmp=`cat $outdir/${gene}_${func}_n.txt`
|
|
219 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
220 done
|
78
|
221
|
|
222 tmp=`cat $outdir/unmatched_${func}_n.txt`
|
79
|
223 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th>" >> $output
|
53
|
224 tmp=`cat $outdir/all_${func}_n.txt`
|
89
|
225 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
|
4
|
226
|
78
|
227 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
|
53
|
228 do
|
|
229 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
230 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
231 else
|
78
|
232 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${unx}/${uny} (${unz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
|
53
|
233 fi
|
94
|
234 done < $outdir/data_${func}.txt
|
|
235 echo "</table>" >> $output
|
|
236 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output
|
53
|
237 done
|
|
238
|
110
|
239 echo "</div>" >> $output #SHM overview tab end
|
|
240
|
|
241 echo "---------------- images ----------------"
|
|
242 echo "---------------- images ----------------<br />" >> $log
|
|
243
|
|
244 echo "<div class='tabbertab' title='SHM Frequency'>" >> $output
|
|
245
|
|
246 if [ -a $outdir/scatter.png ]
|
|
247 then
|
|
248 echo "<img src='scatter.png'/><br />" >> $output
|
|
249 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
250 fi
|
|
251 if [ -a $outdir/frequency_ranges.png ]
|
|
252 then
|
|
253 echo "<img src='frequency_ranges.png'/><br />" >> $output
|
|
254 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
|
|
255 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
|
|
256 fi
|
|
257
|
|
258 echo "</div>" >> $output #SHM frequency tab end
|
|
259
|
|
260 echo "<div class='tabbertab' title='Transition tables'>" >> $output
|
|
261
|
|
262 for gene in ${genes[@]}
|
|
263 do
|
|
264 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
|
|
265 while IFS=, read from a c g t
|
|
266 do
|
|
267 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
268 done < $outdir/transitions_${gene}_sum.txt
|
|
269 echo "</table>" >> $output
|
|
270 done
|
55
|
271
|
110
|
272 echo "<table border='1'><caption>All transition table</caption>" >> $output
|
|
273 while IFS=, read from a c g t
|
|
274 do
|
|
275 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
276 done < $outdir/transitions_all_sum.txt
|
|
277 echo "</table>" >> $output
|
|
278
|
|
279 echo "</div>" >> $output #transition tables tab end
|
|
280
|
|
281 echo "<div class='tabbertab' title='Antigen Selection'>" >> $output
|
|
282
|
|
283 if [ -a $outdir/aa_histogram.png ]
|
|
284 then
|
|
285 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
286 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
|
287 echo "<img src='aa_histogram_ca.png'/><br />" >> $output
|
|
288 echo "<a href='aa_histogram_ca.txt'>download data</a><br />" >> $output
|
|
289 echo "<img src='aa_histogram_cg.png'/><br />" >> $output
|
|
290 echo "<a href='aa_histogram_cg.txt'>download data</a><br />" >> $output
|
|
291 echo "<img src='aa_histogram_cm.png'/><br />" >> $output
|
|
292 echo "<a href='aa_histogram_cm.txt'>download data</a><br />" >> $output
|
|
293 fi
|
|
294
|
|
295 echo "<embed src='baseline_ca.pdf' width='700px' height='1000px'>" >> $output
|
|
296 echo "<embed src='baseline_cg.pdf' width='700px' height='1000px'>" >> $output
|
|
297 echo "<embed src='baseline_cm.pdf' width='700px' height='1000px'>" >> $output
|
|
298
|
|
299 echo "</div>" >> $output #antigen selection tab end
|
|
300
|
|
301 echo "<div class='tabbertab' title='CSR'>" >> $output
|
|
302
|
|
303 if [ -a $outdir/ca.png ]
|
|
304 then
|
|
305 echo "<img src='ca.png'/><br />" >> $output
|
|
306 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
|
307 fi
|
|
308 if [ -a $outdir/cg.png ]
|
|
309 then
|
|
310 echo "<img src='cg.png'/><br />" >> $output
|
|
311 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
|
312 fi
|
|
313
|
|
314 echo "</div>" >> $output #CSR tab end
|
|
315
|
|
316 echo "<div class='tabbertab' title='Downloads'>" >> $output
|
94
|
317
|
53
|
318 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
|
|
319 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
|
|
320 echo "<a href='merged.txt'>all data</a><br />" >> $output
|
|
321 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output
|
|
322 echo "<a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output
|
|
323 echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output
|
77
|
324 echo "<a href='sequence_overview/index.html'>Sequence Overview</a><br />" >> $output
|
81
|
325 echo "<a href='base_overview.html'>Base overview</a><br />" >> $output
|
95
|
326 echo "<a href='baseline.pdf'>Baseline PDF</a><br />" >> $output
|
|
327 echo "<a href='baseline.txt'>Baseline Table</a><br />" >> $output
|
99
|
328 echo "<a href='baseline_ca.pdf'>Baseline ca PDF</a><br />" >> $output
|
|
329 echo "<a href='baseline_ca.txt'>Baseline ca Table</a><br />" >> $output
|
|
330 echo "<a href='baseline_cg.pdf'>Baseline cg PDF</a><br />" >> $output
|
|
331 echo "<a href='baseline_cg.txt'>Baseline cg Table</a><br />" >> $output
|
|
332 echo "<a href='baseline_cm.pdf'>Baseline cm PDF</a><br />" >> $output
|
|
333 echo "<a href='baseline_cm.txt'>Baseline cm Table</a><br />" >> $output
|
95
|
334 echo "<a href='IgAT.zip'>IgAT zip</a><br />" >> $output
|
99
|
335 echo "<a href='IgAT_ca.zip'>IgAT ca zip</a><br />" >> $output
|
|
336 echo "<a href='IgAT_cg.zip'>IgAT cg zip</a><br />" >> $output
|
|
337 echo "<a href='IgAT_cm.zip'>IgAT cm zip</a><br />" >> $output
|
|
338 echo "<a href='new_IMGT.txz'>Filtered IMGT zip</a><br />" >> $output
|
|
339 echo "<a href='new_IMGT_ca.txz'>Filtered ca IMGT zip</a><br />" >> $output
|
|
340 echo "<a href='new_IMGT_cg.txz'>Filtered cg IMGT zip</a><br />" >> $output
|
|
341 echo "<a href='new_IMGT_cm.txz'>Filtered cm IMGT zip</a><br />" >> $output
|
|
342
|
110
|
343 echo "</div>" >> $output #downloads tab end
|
55
|
344
|
110
|
345 echo "</div>" >> $output #tabs end
|
0
|
346
|
|
347 echo "</html>" >> $output
|
2
|
348
|
95
|
349 echo "---------------- baseline ----------------"
|
102
|
350 echo "---------------- baseline ----------------<br />" >> $log
|
101
|
351 tmp="$PWD"
|
|
352
|
|
353 mkdir $outdir/baseline
|
|
354
|
|
355
|
|
356 mkdir $outdir/baseline/ca_cg_cm
|
102
|
357 if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
|
|
358 cd $outdir/baseline/ca_cg_cm
|
|
359 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
|
|
360 else
|
|
361 echo "No sequences" > "$outdir/baseline.txt"
|
|
362 fi
|
101
|
363
|
|
364 mkdir $outdir/baseline/ca
|
102
|
365 if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then
|
|
366 cd $outdir/baseline/ca
|
|
367 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt"
|
|
368 else
|
|
369 echo "No ca sequences" > "$outdir/baseline_ca.txt"
|
|
370 fi
|
101
|
371
|
|
372 mkdir $outdir/baseline/cg
|
102
|
373 if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then
|
|
374 cd $outdir/baseline/cg
|
|
375 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt"
|
|
376 else
|
|
377 echo "No cg sequences" > "$outdir/baseline_cg.txt"
|
|
378 fi
|
101
|
379
|
|
380 mkdir $outdir/baseline/cm
|
102
|
381 if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then
|
|
382 cd $outdir/baseline/cm
|
|
383 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt"
|
|
384 else
|
|
385 echo "No cm sequences" > "$outdir/baseline_cm.txt"
|
|
386 fi
|
47
|
387
|
101
|
388 cd $tmp
|
|
389
|
82
|
390 echo "---------------- naive_output.r ----------------"
|
102
|
391 echo "---------------- naive_output.r ----------------<br />" >> $log
|
55
|
392
|
47
|
393 if [[ "$naive_output" != "None" ]]
|
|
394 then
|
55
|
395 echo "---------------- imgt_loader.r ----------------"
|
102
|
396 echo "---------------- imgt_loader.r ----------------<br />" >> $log
|
50
|
397 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
|
80
|
398 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1
|
95
|
399
|
55
|
400 echo "---------------- naive_output.r ----------------"
|
102
|
401 echo "---------------- naive_output.r ----------------<br />" >> $log
|
81
|
402 Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1
|
47
|
403 fi
|
|
404
|
81
|
405 echo "</table>" >> $outdir/base_overview.html
|
|
406
|
105
|
407 mv $log $outdir/log.html
|
|
408
|
110
|
409 echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log
|
|
410 echo "<table border = 1>" >> $log
|
|
411 echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log
|
|
412 tIFS="$TMP"
|
|
413 IFS=$'\t'
|
|
414 while read step seq perc
|
|
415 do
|
|
416 echo "<tr>" >> $log
|
|
417 echo "<td>$step</td>" >> $log
|
|
418 echo "<td>$seq</td>" >> $log
|
|
419 echo "<td>${perc}%</td>" >> $log
|
|
420 echo "</tr>" >> $log
|
|
421 done < $outdir/filtering_steps.txt
|
|
422 echo "</table border></center></html>" >> $log
|
|
423
|
|
424 IFS="$tIFS"
|
|
425
|
105
|
426
|
81
|
427 echo "---------------- Done! ----------------"
|
107
|
428 echo "---------------- Done! ----------------<br />" >> $outdir/log.html
|
47
|
429
|
110
|
430
|
|
431
|
|
432
|
|
433
|
|
434
|
|
435
|
|
436
|
|
437
|
|
438
|
|
439
|
|
440
|
|
441
|
|
442
|
|
443
|
|
444
|
|
445
|
|
446
|
|
447
|
|
448
|
|
449
|