0
|
1 #!/bin/bash
|
4
|
2 set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
|
6 output=$3
|
|
7 outdir=$4
|
|
8 title=$5
|
22
|
9 include_fr1=$6
|
34
|
10 functionality=$7
|
|
11 unique=$8
|
47
|
12 naive_output=$9
|
52
|
13 filter_unique=${10}
|
0
|
14 mkdir $outdir
|
|
15
|
55
|
16 echo "---------------- read parameters ----------------"
|
62
|
17 echo "---------------- read parameters ----------------" > $output
|
55
|
18
|
|
19 echo "unpacking IMGT file"
|
|
20
|
35
|
21 type="`file $input`"
|
|
22 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
23 echo "Zip archive"
|
|
24 echo "unzip $input -d $PWD/files/"
|
|
25 unzip $input -d $PWD/files/
|
|
26 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
27 echo "ZX archive"
|
|
28 echo "tar -xJf $input -C $PWD/files/"
|
|
29 mkdir -p $PWD/files/$title
|
|
30 tar -xJf $input -C $PWD/files/$title
|
|
31 fi
|
|
32
|
0
|
33 cat $PWD/files/*/1_* > $PWD/summary.txt
|
41
|
34 cat $PWD/files/*/3_* > $PWD/sequences.txt
|
47
|
35 cat $PWD/files/*/5_* > $PWD/aa.txt
|
39
|
36 cat $PWD/files/*/6_* > $PWD/junction.txt
|
0
|
37 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
38 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
39 cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
40
|
26
|
41 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
42
|
|
43 echo "${BLASTN_DIR}"
|
|
44
|
|
45 echo "identification ($method)"
|
62
|
46 echo "identification ($method)" >> $output
|
19
|
47
|
55
|
48 echo "blast or custom"
|
|
49
|
19
|
50 if [[ "${method}" == "custom" ]] ; then
|
55
|
51 echo "custom"
|
19
|
52 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
53 else
|
55
|
54 echo "blast"
|
19
|
55 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
56 ID_index=$((ID_index+1))
|
|
57 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
58 sequence_index=$((sequence_index+1))
|
|
59
|
|
60 echo "$ID_index ${sequence_index}"
|
|
61
|
|
62 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
|
|
63
|
|
64 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
65 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
66 fi
|
|
67
|
55
|
68 echo "---------------- merge_and_filter.r ----------------"
|
62
|
69 echo "---------------- merge_and_filter.r ----------------" >> $output
|
19
|
70
|
52
|
71 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique}
|
0
|
72
|
55
|
73 echo "---------------- mutation_analysis.r ----------------"
|
62
|
74 echo "---------------- mutation_analysis.r ----------------" >> $output
|
55
|
75
|
4
|
76 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
|
|
77 echo "R mutation analysis"
|
22
|
78 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
|
53
|
79
|
|
80 #echo "." > $output
|
|
81 #exit 0
|
|
82
|
55
|
83
|
|
84
|
|
85 echo "---------------- mutation_analysis.py ----------------"
|
62
|
86 echo "---------------- mutation_analysis.py ----------------" >> $output
|
55
|
87
|
32
|
88 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
26
|
89 echo "R AA histogram"
|
55
|
90
|
|
91 echo "---------------- aa_histogram.r ----------------"
|
62
|
92 echo "---------------- aa_histogram.r ----------------" >> $output
|
55
|
93
|
26
|
94 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
|
4
|
95
|
0
|
96 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
97
|
53
|
98 funcs=(sum mean median)
|
0
|
99
|
53
|
100
|
62
|
101 echo "<html><center><h1>$title</h1></center>" > $output
|
|
102
|
|
103 #display the matched/unmatched for clearity
|
|
104
|
|
105 matched_count="`cat $outdir/merged.txt | tail -n +2 | wc -l`"
|
|
106 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
|
|
107 total_count=$((matched_count + unmatched_count))
|
|
108 perc_count=$((unmatched_count / total_count * 100))
|
|
109 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
|
|
110 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
|
|
111
|
|
112 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
|
|
113 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
|
|
114 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
|
|
115
|
55
|
116 echo "---------------- main tables ----------------"
|
53
|
117 for func in ${funcs[@]}
|
4
|
118 do
|
55
|
119
|
|
120 echo "---------------- $func table ----------------"
|
|
121
|
53
|
122 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/result.txt
|
|
123
|
|
124 echo "<table border='1' width='100%'><caption><h3>${func} table</h3></caption>" >> $output
|
58
|
125 echo "<tr><th>info</th>" >> $output
|
53
|
126 for gene in ${genes[@]}
|
|
127 do
|
|
128 tmp=`cat $outdir/${gene}_${func}_n.txt`
|
|
129 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
130 done
|
|
131 tmp=`cat $outdir/all_${func}_n.txt`
|
|
132 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
|
4
|
133
|
53
|
134 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
|
|
135 do
|
|
136 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
137 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
138 else
|
|
139 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
|
|
140 fi
|
|
141 done < $outdir/result.txt
|
|
142
|
|
143 done
|
|
144
|
55
|
145 echo "---------------- download links ----------------"
|
|
146
|
0
|
147 echo "</table>" >> $output
|
53
|
148 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
|
|
149 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
|
|
150 echo "<a href='merged.txt'>all data</a><br />" >> $output
|
|
151 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output
|
|
152 echo "<a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output
|
|
153 echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output
|
2
|
154
|
55
|
155 echo "---------------- images ----------------"
|
|
156
|
4
|
157 echo "<img src='all.png'/><br />" >> $output
|
26
|
158 echo "<a href='all.txt'>download data</a><br />" >> $output
|
4
|
159 if [ -a $outdir/ca.png ]
|
|
160 then
|
|
161 echo "<img src='ca.png'/><br />" >> $output
|
26
|
162 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
4
|
163 fi
|
|
164 if [ -a $outdir/cg.png ]
|
|
165 then
|
|
166 echo "<img src='cg.png'/><br />" >> $output
|
26
|
167 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
4
|
168 fi
|
22
|
169 if [ -a $outdir/scatter.png ]
|
|
170 then
|
|
171 echo "<img src='scatter.png'/><br />" >> $output
|
26
|
172 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
173 fi
|
49
|
174 if [ -a $outdir/frequency_ranges.png ]
|
|
175 then
|
|
176 echo "<img src='frequency_ranges.png'/><br />" >> $output
|
|
177 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
|
|
178 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
|
|
179 fi
|
26
|
180 if [ -a $outdir/aa_histogram.png ]
|
|
181 then
|
|
182 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
183 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
22
|
184 fi
|
2
|
185
|
0
|
186 for gene in ${genes[@]}
|
|
187 do
|
|
188 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
|
|
189 while IFS=, read from a c g t
|
|
190 do
|
|
191 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
53
|
192 done < $outdir/transitions_${gene}_sum.txt
|
0
|
193 echo "</table>" >> $output
|
|
194 done
|
|
195
|
|
196 echo "<table border='1'><caption>All transition table</caption>" >> $output
|
|
197 while IFS=, read from a c g t
|
|
198 do
|
|
199 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
53
|
200 done < $outdir/transitions_all_sum.txt
|
0
|
201 echo "</table>" >> $output
|
|
202
|
|
203 echo "</html>" >> $output
|
2
|
204
|
47
|
205
|
|
206 #optional output for naive
|
|
207
|
55
|
208 echo "---------------- aa_histogram.r ----------------"
|
|
209
|
47
|
210 if [[ "$naive_output" != "None" ]]
|
|
211 then
|
55
|
212 echo "---------------- imgt_loader.r ----------------"
|
50
|
213 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
|
|
214 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt ${naive_output} 2>&1
|
55
|
215 echo "---------------- naive_output.r ----------------"
|
47
|
216 Rscript $dir/naive_output.r $naive_output $outdir/merged.txt $naive_output 2>&1
|
|
217 fi
|
|
218
|
|
219
|
|
220
|
|
221
|
|
222
|
|
223
|
|
224
|
|
225
|
|
226
|
|
227
|
|
228
|
2
|
229 #rm $outdir/HS12RSS.txt
|
|
230 #rm $outdir/HS23RSS.txt
|