0
|
1 #!/bin/bash
|
4
|
2 set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
|
6 output=$3
|
|
7 outdir=$4
|
|
8 title=$5
|
22
|
9 include_fr1=$6
|
34
|
10 functionality=$7
|
|
11 unique=$8
|
47
|
12 naive_output=$9
|
52
|
13 filter_unique=${10}
|
0
|
14 mkdir $outdir
|
|
15
|
35
|
16 type="`file $input`"
|
|
17 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
18 echo "Zip archive"
|
|
19 echo "unzip $input -d $PWD/files/"
|
|
20 unzip $input -d $PWD/files/
|
|
21 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
22 echo "ZX archive"
|
|
23 echo "tar -xJf $input -C $PWD/files/"
|
|
24 mkdir -p $PWD/files/$title
|
|
25 tar -xJf $input -C $PWD/files/$title
|
|
26 fi
|
|
27
|
0
|
28 cat $PWD/files/*/1_* > $PWD/summary.txt
|
41
|
29 cat $PWD/files/*/3_* > $PWD/sequences.txt
|
47
|
30 cat $PWD/files/*/5_* > $PWD/aa.txt
|
39
|
31 cat $PWD/files/*/6_* > $PWD/junction.txt
|
0
|
32 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
33 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
34 cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
35
|
53
|
36 cp $dir/tabber.js $outdir
|
|
37
|
26
|
38 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
39
|
|
40 echo "${BLASTN_DIR}"
|
|
41
|
|
42 echo "identification ($method)"
|
|
43
|
|
44 if [[ "${method}" == "custom" ]] ; then
|
|
45 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
46 else
|
|
47 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
48 ID_index=$((ID_index+1))
|
|
49 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
50 sequence_index=$((sequence_index+1))
|
|
51
|
|
52 echo "$ID_index ${sequence_index}"
|
|
53
|
|
54 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
|
|
55
|
|
56 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
57 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
58 fi
|
|
59
|
|
60
|
|
61
|
4
|
62 echo "merging"
|
52
|
63 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique}
|
0
|
64
|
4
|
65 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
|
|
66 echo "R mutation analysis"
|
22
|
67 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
|
53
|
68
|
|
69 #echo "." > $output
|
|
70 #exit 0
|
|
71
|
4
|
72 echo "python mutation analysis"
|
32
|
73 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
26
|
74 echo "R AA histogram"
|
|
75 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
|
4
|
76
|
0
|
77 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
78
|
53
|
79 funcs=(sum mean median)
|
0
|
80
|
53
|
81
|
|
82 echo "<html><center><h1>$title</h1></center>" >> $output
|
|
83
|
|
84 for func in ${funcs[@]}
|
4
|
85 do
|
53
|
86 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/result.txt
|
|
87
|
|
88 echo "<table border='1' width='100%'><caption><h3>${func} table</h3></caption>" >> $output
|
|
89 echo "<tr><th'>info</th>" >> $output
|
|
90 for gene in ${genes[@]}
|
|
91 do
|
|
92 tmp=`cat $outdir/${gene}_${func}_n.txt`
|
|
93 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
94 done
|
|
95 tmp=`cat $outdir/all_${func}_n.txt`
|
|
96 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
|
4
|
97
|
53
|
98 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
|
|
99 do
|
|
100 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
101 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
102 else
|
|
103 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
|
|
104 fi
|
|
105 done < $outdir/result.txt
|
|
106
|
|
107 done
|
|
108
|
0
|
109 echo "</table>" >> $output
|
53
|
110 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
|
|
111 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
|
|
112 echo "<a href='merged.txt'>all data</a><br />" >> $output
|
|
113 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output
|
|
114 echo "<a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output
|
|
115 echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output
|
2
|
116
|
4
|
117 echo "<img src='all.png'/><br />" >> $output
|
26
|
118 echo "<a href='all.txt'>download data</a><br />" >> $output
|
4
|
119 if [ -a $outdir/ca.png ]
|
|
120 then
|
|
121 echo "<img src='ca.png'/><br />" >> $output
|
26
|
122 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
4
|
123 fi
|
|
124 if [ -a $outdir/cg.png ]
|
|
125 then
|
|
126 echo "<img src='cg.png'/><br />" >> $output
|
26
|
127 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
4
|
128 fi
|
22
|
129 if [ -a $outdir/scatter.png ]
|
|
130 then
|
|
131 echo "<img src='scatter.png'/><br />" >> $output
|
26
|
132 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
133 fi
|
49
|
134 if [ -a $outdir/frequency_ranges.png ]
|
|
135 then
|
|
136 echo "<img src='frequency_ranges.png'/><br />" >> $output
|
|
137 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
|
|
138 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
|
|
139 fi
|
26
|
140 if [ -a $outdir/aa_histogram.png ]
|
|
141 then
|
|
142 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
143 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
22
|
144 fi
|
2
|
145
|
0
|
146 for gene in ${genes[@]}
|
|
147 do
|
|
148 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
|
|
149 while IFS=, read from a c g t
|
|
150 do
|
|
151 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
53
|
152 done < $outdir/transitions_${gene}_sum.txt
|
0
|
153 echo "</table>" >> $output
|
|
154 done
|
|
155
|
|
156 echo "<table border='1'><caption>All transition table</caption>" >> $output
|
|
157 while IFS=, read from a c g t
|
|
158 do
|
|
159 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
53
|
160 done < $outdir/transitions_all_sum.txt
|
0
|
161 echo "</table>" >> $output
|
|
162
|
|
163 echo "</html>" >> $output
|
2
|
164
|
47
|
165
|
|
166 #optional output for naive
|
|
167
|
|
168 if [[ "$naive_output" != "None" ]]
|
|
169 then
|
|
170 echo "naive_output: $naive_output"
|
50
|
171 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
|
|
172 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt ${naive_output} 2>&1
|
47
|
173 Rscript $dir/naive_output.r $naive_output $outdir/merged.txt $naive_output 2>&1
|
|
174 fi
|
|
175
|
|
176
|
|
177
|
|
178
|
|
179
|
|
180
|
|
181
|
|
182
|
|
183
|
|
184
|
|
185
|
2
|
186 #rm $outdir/HS12RSS.txt
|
|
187 #rm $outdir/HS23RSS.txt
|