0
|
1 #!/bin/bash
|
4
|
2 set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
|
6 output=$3
|
|
7 outdir=$4
|
|
8 title=$5
|
22
|
9 include_fr1=$6
|
34
|
10 functionality=$7
|
|
11 unique=$8
|
69
|
12 naive_output_ca=$9
|
|
13 naive_output_cg=${10}
|
|
14 naive_output_cm=${11}
|
|
15 filter_unique=${12}
|
|
16 class_filter=${13}
|
0
|
17 mkdir $outdir
|
|
18
|
55
|
19 echo "---------------- read parameters ----------------"
|
63
|
20 echo "---------------- read parameters ----------------<br />" > $output
|
55
|
21
|
|
22 echo "unpacking IMGT file"
|
|
23
|
89
|
24
|
|
25
|
35
|
26 type="`file $input`"
|
|
27 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
28 echo "Zip archive"
|
|
29 echo "unzip $input -d $PWD/files/"
|
|
30 unzip $input -d $PWD/files/
|
|
31 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
32 echo "ZX archive"
|
|
33 echo "tar -xJf $input -C $PWD/files/"
|
|
34 mkdir -p $PWD/files/$title
|
|
35 tar -xJf $input -C $PWD/files/$title
|
|
36 fi
|
|
37
|
64
|
38 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt
|
|
39 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt
|
|
40 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt
|
|
41 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt
|
|
42 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt
|
|
43 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt
|
|
44 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt
|
|
45
|
|
46 #cat $PWD/files/*/1_* > $PWD/summary.txt
|
|
47 #cat $PWD/files/*/3_* > $PWD/sequences.txt
|
|
48 #cat $PWD/files/*/5_* > $PWD/aa.txt
|
|
49 #cat $PWD/files/*/6_* > $PWD/junction.txt
|
|
50 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
51 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
52 #cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
53
|
26
|
54 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
55
|
|
56 echo "${BLASTN_DIR}"
|
|
57
|
89
|
58 echo "---------------- identification ($method) ----------------"
|
|
59 echo "---------------- identification ($method) ----------------<br />" >> $output
|
55
|
60
|
19
|
61 if [[ "${method}" == "custom" ]] ; then
|
|
62 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
63 else
|
|
64 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
65 ID_index=$((ID_index+1))
|
|
66 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
67 sequence_index=$((sequence_index+1))
|
|
68
|
|
69 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
|
|
70
|
|
71 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
72 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
73 fi
|
|
74
|
55
|
75 echo "---------------- merge_and_filter.r ----------------"
|
63
|
76 echo "---------------- merge_and_filter.r ----------------<br />" >> $output
|
19
|
77
|
90
|
78 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} 2>&1
|
0
|
79
|
98
|
80 echo "---------------- creating new IMGT zip ----------------"
|
95
|
81 echo "---------------- creating new IMGT zip ----------------<br />" >> $output
|
|
82
|
|
83 mkdir $outdir/new_IMGT
|
|
84
|
|
85 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt"
|
|
86 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
|
|
87 cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt"
|
|
88 cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt"
|
|
89 cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt"
|
|
90 cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt"
|
|
91 cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt"
|
|
92 cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt"
|
|
93 cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt"
|
|
94 cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt"
|
|
95
|
|
96 Rscript $dir/tmp/igat.r $outdir/new_IMGT/ $outdir/merged.txt 2>&1
|
|
97
|
|
98
|
|
99 tmp="$PWD"
|
|
100 cd $outdir/new_IMGT/ #tar weirdness...
|
|
101 tar -cJf ../new_IMGT.txz *
|
|
102
|
|
103 cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT/IgAT.xlsm
|
|
104
|
|
105 #tar -cJf ../IgAT.txz *
|
|
106 zip -r ../IgAT.zip *
|
|
107
|
|
108 cd $tmp
|
|
109
|
|
110
|
55
|
111 echo "---------------- mutation_analysis.r ----------------"
|
63
|
112 echo "---------------- mutation_analysis.r ----------------<br />" >> $output
|
55
|
113
|
82
|
114 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched"
|
4
|
115 echo "R mutation analysis"
|
82
|
116 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1
|
53
|
117
|
55
|
118
|
|
119 echo "---------------- mutation_analysis.py ----------------"
|
63
|
120 echo "---------------- mutation_analysis.py ----------------<br />" >> $output
|
55
|
121
|
82
|
122 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
55
|
123
|
|
124 echo "---------------- aa_histogram.r ----------------"
|
63
|
125 echo "---------------- aa_histogram.r ----------------<br />" >> $output
|
55
|
126
|
26
|
127 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
|
4
|
128
|
0
|
129 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
130
|
53
|
131 funcs=(sum mean median)
|
0
|
132
|
82
|
133 echo "---------------- sequence_overview.r ----------------"
|
95
|
134 echo "---------------- sequence_overview.r ----------------" >> $output
|
82
|
135
|
|
136 mkdir $outdir/sequence_overview
|
|
137
|
90
|
138 #Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
|
94
|
139 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
|
82
|
140
|
|
141 echo "<table border='1'>" > $outdir/base_overview.html
|
|
142
|
92
|
143 while IFS=$'\t' read ID class seq A C G T
|
82
|
144 do
|
85
|
145 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
|
82
|
146 done < $outdir/sequence_overview/ntoverview.txt
|
|
147
|
53
|
148
|
62
|
149 echo "<html><center><h1>$title</h1></center>" > $output
|
|
150
|
|
151 #display the matched/unmatched for clearity
|
|
152
|
98
|
153 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`"
|
62
|
154 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
|
|
155 total_count=$((matched_count + unmatched_count))
|
|
156 perc_count=$((unmatched_count / total_count * 100))
|
|
157 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
|
|
158 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
|
|
159
|
|
160 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
|
|
161 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
|
|
162 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
|
|
163
|
55
|
164 echo "---------------- main tables ----------------"
|
53
|
165 for func in ${funcs[@]}
|
4
|
166 do
|
55
|
167
|
|
168 echo "---------------- $func table ----------------"
|
|
169
|
94
|
170 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt
|
53
|
171
|
98
|
172 echo "<table border='1' width='100%'><caption><h3><a href='data_${func}.txt'>${func} table</a></h3></caption>" >> $output
|
58
|
173 echo "<tr><th>info</th>" >> $output
|
53
|
174 for gene in ${genes[@]}
|
|
175 do
|
|
176 tmp=`cat $outdir/${gene}_${func}_n.txt`
|
|
177 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
178 done
|
78
|
179
|
|
180 tmp=`cat $outdir/unmatched_${func}_n.txt`
|
79
|
181 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th>" >> $output
|
53
|
182 tmp=`cat $outdir/all_${func}_n.txt`
|
89
|
183 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
|
4
|
184
|
78
|
185 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
|
53
|
186 do
|
|
187 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
188 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
189 else
|
78
|
190 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${unx}/${uny} (${unz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
|
53
|
191 fi
|
94
|
192 done < $outdir/data_${func}.txt
|
|
193 echo "</table>" >> $output
|
|
194 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output
|
53
|
195 done
|
|
196
|
55
|
197 echo "---------------- download links ----------------"
|
|
198
|
94
|
199
|
53
|
200 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
|
|
201 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
|
|
202 echo "<a href='merged.txt'>all data</a><br />" >> $output
|
|
203 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output
|
|
204 echo "<a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output
|
|
205 echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output
|
77
|
206 echo "<a href='sequence_overview/index.html'>Sequence Overview</a><br />" >> $output
|
81
|
207 echo "<a href='base_overview.html'>Base overview</a><br />" >> $output
|
95
|
208 echo "<a href='baseline.pdf'>Baseline PDF</a><br />" >> $output
|
|
209 echo "<a href='baseline.txt'>Baseline Table</a><br />" >> $output
|
|
210 echo "<a href='IgAT.zip'>IgAT zip</a><br />" >> $output
|
2
|
211
|
55
|
212 echo "---------------- images ----------------"
|
|
213
|
4
|
214 echo "<img src='all.png'/><br />" >> $output
|
26
|
215 echo "<a href='all.txt'>download data</a><br />" >> $output
|
4
|
216 if [ -a $outdir/ca.png ]
|
|
217 then
|
|
218 echo "<img src='ca.png'/><br />" >> $output
|
26
|
219 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
4
|
220 fi
|
|
221 if [ -a $outdir/cg.png ]
|
|
222 then
|
|
223 echo "<img src='cg.png'/><br />" >> $output
|
26
|
224 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
4
|
225 fi
|
22
|
226 if [ -a $outdir/scatter.png ]
|
|
227 then
|
|
228 echo "<img src='scatter.png'/><br />" >> $output
|
26
|
229 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
230 fi
|
49
|
231 if [ -a $outdir/frequency_ranges.png ]
|
|
232 then
|
|
233 echo "<img src='frequency_ranges.png'/><br />" >> $output
|
|
234 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
|
|
235 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
|
|
236 fi
|
26
|
237 if [ -a $outdir/aa_histogram.png ]
|
|
238 then
|
|
239 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
240 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
22
|
241 fi
|
2
|
242
|
0
|
243 for gene in ${genes[@]}
|
|
244 do
|
|
245 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
|
|
246 while IFS=, read from a c g t
|
|
247 do
|
|
248 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
53
|
249 done < $outdir/transitions_${gene}_sum.txt
|
0
|
250 echo "</table>" >> $output
|
|
251 done
|
|
252
|
|
253 echo "<table border='1'><caption>All transition table</caption>" >> $output
|
|
254 while IFS=, read from a c g t
|
|
255 do
|
|
256 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
53
|
257 done < $outdir/transitions_all_sum.txt
|
0
|
258 echo "</table>" >> $output
|
|
259
|
|
260 echo "</html>" >> $output
|
2
|
261
|
95
|
262 echo "---------------- baseline ----------------"
|
|
263 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "sample name" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
|
47
|
264
|
|
265 #optional output for naive
|
|
266
|
82
|
267 echo "---------------- naive_output.r ----------------"
|
55
|
268
|
47
|
269 if [[ "$naive_output" != "None" ]]
|
|
270 then
|
55
|
271 echo "---------------- imgt_loader.r ----------------"
|
50
|
272 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
|
80
|
273 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1
|
95
|
274
|
55
|
275 echo "---------------- naive_output.r ----------------"
|
81
|
276 Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1
|
47
|
277 fi
|
|
278
|
81
|
279 echo "</table>" >> $outdir/base_overview.html
|
|
280
|
|
281 echo "---------------- Done! ----------------"
|
47
|
282
|
|
283
|
|
284
|
|
285
|
|
286
|
|
287
|
|
288
|
2
|
289 #rm $outdir/HS12RSS.txt
|
|
290 #rm $outdir/HS23RSS.txt
|