0
|
1 #!/bin/bash
|
4
|
2 set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
|
6 output=$3
|
|
7 outdir=$4
|
|
8 title=$5
|
22
|
9 include_fr1=$6
|
34
|
10 functionality=$7
|
|
11 unique=$8
|
47
|
12 naive_output=$9
|
52
|
13 filter_unique=${10}
|
0
|
14 mkdir $outdir
|
|
15
|
55
|
16 echo "---------------- read parameters ----------------"
|
|
17
|
|
18 echo "unpacking IMGT file"
|
|
19
|
35
|
20 type="`file $input`"
|
|
21 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
22 echo "Zip archive"
|
|
23 echo "unzip $input -d $PWD/files/"
|
|
24 unzip $input -d $PWD/files/
|
|
25 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
26 echo "ZX archive"
|
|
27 echo "tar -xJf $input -C $PWD/files/"
|
|
28 mkdir -p $PWD/files/$title
|
|
29 tar -xJf $input -C $PWD/files/$title
|
|
30 fi
|
|
31
|
0
|
32 cat $PWD/files/*/1_* > $PWD/summary.txt
|
41
|
33 cat $PWD/files/*/3_* > $PWD/sequences.txt
|
47
|
34 cat $PWD/files/*/5_* > $PWD/aa.txt
|
39
|
35 cat $PWD/files/*/6_* > $PWD/junction.txt
|
0
|
36 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
37 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
38 cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
39
|
26
|
40 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
41
|
|
42 echo "${BLASTN_DIR}"
|
|
43
|
|
44 echo "identification ($method)"
|
|
45
|
55
|
46 echo "blast or custom"
|
|
47
|
19
|
48 if [[ "${method}" == "custom" ]] ; then
|
55
|
49 echo "custom"
|
19
|
50 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
51 else
|
55
|
52 echo "blast"
|
19
|
53 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
54 ID_index=$((ID_index+1))
|
|
55 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
56 sequence_index=$((sequence_index+1))
|
|
57
|
|
58 echo "$ID_index ${sequence_index}"
|
|
59
|
|
60 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
|
|
61
|
|
62 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
63 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
64 fi
|
|
65
|
55
|
66 echo "---------------- merge_and_filter.r ----------------"
|
19
|
67
|
52
|
68 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique}
|
0
|
69
|
55
|
70 echo "---------------- mutation_analysis.r ----------------"
|
|
71
|
4
|
72 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
|
|
73 echo "R mutation analysis"
|
22
|
74 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
|
53
|
75
|
|
76 #echo "." > $output
|
|
77 #exit 0
|
|
78
|
55
|
79
|
|
80
|
|
81 echo "---------------- mutation_analysis.py ----------------"
|
|
82
|
32
|
83 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
26
|
84 echo "R AA histogram"
|
55
|
85
|
|
86 echo "---------------- aa_histogram.r ----------------"
|
|
87
|
26
|
88 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
|
4
|
89
|
0
|
90 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
91
|
53
|
92 funcs=(sum mean median)
|
0
|
93
|
53
|
94
|
|
95 echo "<html><center><h1>$title</h1></center>" >> $output
|
55
|
96 echo "---------------- main tables ----------------"
|
53
|
97 for func in ${funcs[@]}
|
4
|
98 do
|
55
|
99
|
|
100 echo "---------------- $func table ----------------"
|
|
101
|
53
|
102 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/result.txt
|
|
103
|
|
104 echo "<table border='1' width='100%'><caption><h3>${func} table</h3></caption>" >> $output
|
58
|
105 echo "<tr><th>info</th>" >> $output
|
53
|
106 for gene in ${genes[@]}
|
|
107 do
|
|
108 tmp=`cat $outdir/${gene}_${func}_n.txt`
|
|
109 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
110 done
|
|
111 tmp=`cat $outdir/all_${func}_n.txt`
|
|
112 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
|
4
|
113
|
53
|
114 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
|
|
115 do
|
|
116 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
117 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
118 else
|
|
119 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
|
|
120 fi
|
|
121 done < $outdir/result.txt
|
|
122
|
|
123 done
|
|
124
|
55
|
125 echo "---------------- download links ----------------"
|
|
126
|
0
|
127 echo "</table>" >> $output
|
53
|
128 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
|
|
129 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
|
|
130 echo "<a href='merged.txt'>all data</a><br />" >> $output
|
|
131 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output
|
|
132 echo "<a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output
|
|
133 echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output
|
2
|
134
|
55
|
135 echo "---------------- images ----------------"
|
|
136
|
4
|
137 echo "<img src='all.png'/><br />" >> $output
|
26
|
138 echo "<a href='all.txt'>download data</a><br />" >> $output
|
4
|
139 if [ -a $outdir/ca.png ]
|
|
140 then
|
|
141 echo "<img src='ca.png'/><br />" >> $output
|
26
|
142 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
4
|
143 fi
|
|
144 if [ -a $outdir/cg.png ]
|
|
145 then
|
|
146 echo "<img src='cg.png'/><br />" >> $output
|
26
|
147 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
4
|
148 fi
|
22
|
149 if [ -a $outdir/scatter.png ]
|
|
150 then
|
|
151 echo "<img src='scatter.png'/><br />" >> $output
|
26
|
152 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
153 fi
|
49
|
154 if [ -a $outdir/frequency_ranges.png ]
|
|
155 then
|
|
156 echo "<img src='frequency_ranges.png'/><br />" >> $output
|
|
157 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
|
|
158 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
|
|
159 fi
|
26
|
160 if [ -a $outdir/aa_histogram.png ]
|
|
161 then
|
|
162 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
163 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
22
|
164 fi
|
2
|
165
|
0
|
166 for gene in ${genes[@]}
|
|
167 do
|
|
168 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
|
|
169 while IFS=, read from a c g t
|
|
170 do
|
|
171 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
53
|
172 done < $outdir/transitions_${gene}_sum.txt
|
0
|
173 echo "</table>" >> $output
|
|
174 done
|
|
175
|
|
176 echo "<table border='1'><caption>All transition table</caption>" >> $output
|
|
177 while IFS=, read from a c g t
|
|
178 do
|
|
179 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
53
|
180 done < $outdir/transitions_all_sum.txt
|
0
|
181 echo "</table>" >> $output
|
|
182
|
|
183 echo "</html>" >> $output
|
2
|
184
|
47
|
185
|
|
186 #optional output for naive
|
|
187
|
55
|
188 echo "---------------- aa_histogram.r ----------------"
|
|
189
|
47
|
190 if [[ "$naive_output" != "None" ]]
|
|
191 then
|
55
|
192 echo "---------------- imgt_loader.r ----------------"
|
50
|
193 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
|
|
194 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt ${naive_output} 2>&1
|
55
|
195 echo "---------------- naive_output.r ----------------"
|
47
|
196 Rscript $dir/naive_output.r $naive_output $outdir/merged.txt $naive_output 2>&1
|
|
197 fi
|
|
198
|
|
199
|
|
200
|
|
201
|
|
202
|
|
203
|
|
204
|
|
205
|
|
206
|
|
207
|
|
208
|
2
|
209 #rm $outdir/HS12RSS.txt
|
|
210 #rm $outdir/HS23RSS.txt
|