0
|
1 #!/bin/bash
|
4
|
2 set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
|
6 output=$3
|
|
7 outdir=$4
|
|
8 title=$5
|
22
|
9 include_fr1=$6
|
34
|
10 functionality=$7
|
|
11 unique=$8
|
47
|
12 naive_output=$9
|
52
|
13 filter_unique=${10}
|
0
|
14 mkdir $outdir
|
|
15
|
35
|
16 type="`file $input`"
|
|
17 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
18 echo "Zip archive"
|
|
19 echo "unzip $input -d $PWD/files/"
|
|
20 unzip $input -d $PWD/files/
|
|
21 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
22 echo "ZX archive"
|
|
23 echo "tar -xJf $input -C $PWD/files/"
|
|
24 mkdir -p $PWD/files/$title
|
|
25 tar -xJf $input -C $PWD/files/$title
|
|
26 fi
|
|
27
|
0
|
28 cat $PWD/files/*/1_* > $PWD/summary.txt
|
41
|
29 cat $PWD/files/*/3_* > $PWD/sequences.txt
|
47
|
30 cat $PWD/files/*/5_* > $PWD/aa.txt
|
39
|
31 cat $PWD/files/*/6_* > $PWD/junction.txt
|
0
|
32 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
33 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
34 cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
35
|
26
|
36 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
37
|
|
38 echo "${BLASTN_DIR}"
|
|
39
|
|
40 echo "identification ($method)"
|
|
41
|
|
42 if [[ "${method}" == "custom" ]] ; then
|
|
43 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
44 else
|
|
45 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
46 ID_index=$((ID_index+1))
|
|
47 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
48 sequence_index=$((sequence_index+1))
|
|
49
|
|
50 echo "$ID_index ${sequence_index}"
|
|
51
|
|
52 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
|
|
53
|
|
54 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
55 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
56 fi
|
|
57
|
|
58
|
|
59
|
4
|
60 echo "merging"
|
52
|
61 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique}
|
0
|
62
|
4
|
63 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
|
|
64 echo "R mutation analysis"
|
22
|
65 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
|
53
|
66
|
|
67 #echo "." > $output
|
|
68 #exit 0
|
|
69
|
4
|
70 echo "python mutation analysis"
|
32
|
71 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
26
|
72 echo "R AA histogram"
|
|
73 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
|
4
|
74
|
0
|
75 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
76
|
53
|
77 funcs=(sum mean median)
|
0
|
78
|
53
|
79
|
|
80 echo "<html><center><h1>$title</h1></center>" >> $output
|
|
81
|
|
82 for func in ${funcs[@]}
|
4
|
83 do
|
53
|
84 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/result.txt
|
|
85
|
|
86 echo "<table border='1' width='100%'><caption><h3>${func} table</h3></caption>" >> $output
|
|
87 echo "<tr><th'>info</th>" >> $output
|
|
88 for gene in ${genes[@]}
|
|
89 do
|
|
90 tmp=`cat $outdir/${gene}_${func}_n.txt`
|
|
91 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
92 done
|
|
93 tmp=`cat $outdir/all_${func}_n.txt`
|
|
94 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
|
4
|
95
|
53
|
96 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
|
|
97 do
|
|
98 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
99 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
100 else
|
|
101 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
|
|
102 fi
|
|
103 done < $outdir/result.txt
|
|
104
|
|
105 done
|
|
106
|
0
|
107 echo "</table>" >> $output
|
53
|
108 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
|
|
109 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
|
|
110 echo "<a href='merged.txt'>all data</a><br />" >> $output
|
|
111 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output
|
|
112 echo "<a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output
|
|
113 echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output
|
2
|
114
|
4
|
115 echo "<img src='all.png'/><br />" >> $output
|
26
|
116 echo "<a href='all.txt'>download data</a><br />" >> $output
|
4
|
117 if [ -a $outdir/ca.png ]
|
|
118 then
|
|
119 echo "<img src='ca.png'/><br />" >> $output
|
26
|
120 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
4
|
121 fi
|
|
122 if [ -a $outdir/cg.png ]
|
|
123 then
|
|
124 echo "<img src='cg.png'/><br />" >> $output
|
26
|
125 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
4
|
126 fi
|
22
|
127 if [ -a $outdir/scatter.png ]
|
|
128 then
|
|
129 echo "<img src='scatter.png'/><br />" >> $output
|
26
|
130 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
131 fi
|
49
|
132 if [ -a $outdir/frequency_ranges.png ]
|
|
133 then
|
|
134 echo "<img src='frequency_ranges.png'/><br />" >> $output
|
|
135 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
|
|
136 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
|
|
137 fi
|
26
|
138 if [ -a $outdir/aa_histogram.png ]
|
|
139 then
|
|
140 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
141 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
22
|
142 fi
|
2
|
143
|
0
|
144 for gene in ${genes[@]}
|
|
145 do
|
|
146 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
|
|
147 while IFS=, read from a c g t
|
|
148 do
|
|
149 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
53
|
150 done < $outdir/transitions_${gene}_sum.txt
|
0
|
151 echo "</table>" >> $output
|
|
152 done
|
|
153
|
|
154 echo "<table border='1'><caption>All transition table</caption>" >> $output
|
|
155 while IFS=, read from a c g t
|
|
156 do
|
|
157 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
53
|
158 done < $outdir/transitions_all_sum.txt
|
0
|
159 echo "</table>" >> $output
|
|
160
|
|
161 echo "</html>" >> $output
|
2
|
162
|
47
|
163
|
|
164 #optional output for naive
|
|
165
|
|
166 if [[ "$naive_output" != "None" ]]
|
|
167 then
|
|
168 echo "naive_output: $naive_output"
|
50
|
169 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
|
|
170 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt ${naive_output} 2>&1
|
47
|
171 Rscript $dir/naive_output.r $naive_output $outdir/merged.txt $naive_output 2>&1
|
|
172 fi
|
|
173
|
|
174
|
|
175
|
|
176
|
|
177
|
|
178
|
|
179
|
|
180
|
|
181
|
|
182
|
|
183
|
2
|
184 #rm $outdir/HS12RSS.txt
|
|
185 #rm $outdir/HS23RSS.txt
|