0
|
1 #!/bin/bash
|
4
|
2 set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
|
6 output=$3
|
|
7 outdir=$4
|
|
8 title=$5
|
22
|
9 include_fr1=$6
|
34
|
10 functionality=$7
|
|
11 unique=$8
|
47
|
12 naive_output=$9
|
0
|
13 mkdir $outdir
|
|
14
|
35
|
15 type="`file $input`"
|
|
16 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
17 echo "Zip archive"
|
|
18 echo "unzip $input -d $PWD/files/"
|
|
19 unzip $input -d $PWD/files/
|
|
20 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
21 echo "ZX archive"
|
|
22 echo "tar -xJf $input -C $PWD/files/"
|
|
23 mkdir -p $PWD/files/$title
|
|
24 tar -xJf $input -C $PWD/files/$title
|
|
25 fi
|
|
26
|
0
|
27 cat $PWD/files/*/1_* > $PWD/summary.txt
|
41
|
28 cat $PWD/files/*/3_* > $PWD/sequences.txt
|
47
|
29 cat $PWD/files/*/5_* > $PWD/aa.txt
|
39
|
30 cat $PWD/files/*/6_* > $PWD/junction.txt
|
0
|
31 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
32 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
33 cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
34
|
26
|
35 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
36
|
|
37 echo "${BLASTN_DIR}"
|
|
38
|
|
39 echo "identification ($method)"
|
|
40
|
|
41 if [[ "${method}" == "custom" ]] ; then
|
|
42 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
43 else
|
|
44 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
45 ID_index=$((ID_index+1))
|
|
46 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
47 sequence_index=$((sequence_index+1))
|
|
48
|
|
49 echo "$ID_index ${sequence_index}"
|
|
50
|
|
51 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
|
|
52
|
|
53 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
54 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
55 fi
|
|
56
|
|
57
|
|
58
|
4
|
59 echo "merging"
|
41
|
60 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique
|
0
|
61
|
4
|
62 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
|
|
63 echo "R mutation analysis"
|
22
|
64 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
|
4
|
65 echo "python mutation analysis"
|
32
|
66 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
26
|
67 echo "R AA histogram"
|
|
68 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
|
4
|
69
|
|
70 cat $outdir/mutations.txt $outdir/hotspot_analysis.txt > $outdir/result.txt
|
|
71
|
0
|
72 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
73
|
|
74
|
1
|
75 echo "<html><center><h1>$title</h1></center><table border='1'>" > $output
|
0
|
76 echo "<tr><th>info</th>" >> $output
|
4
|
77 for gene in ${genes[@]}
|
|
78 do
|
|
79 tmp=`cat $outdir/${gene}_n.txt`
|
|
80 echo "<th><a href='matched_${gene}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
81 done
|
|
82 tmp=`cat $outdir/total_n.txt`
|
|
83 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
|
|
84
|
0
|
85 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
|
25
|
86 do
|
|
87 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
88 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
89 else
|
0
|
90 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
|
25
|
91 fi
|
4
|
92 done < $outdir/result.txt
|
0
|
93 echo "</table>" >> $output
|
43
|
94 echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br /><a href='merged.txt'>all data</a><br /><a href='mutation_by_id.txt'>mutations by id</a><br /><a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output
|
2
|
95
|
|
96
|
4
|
97 echo "<img src='all.png'/><br />" >> $output
|
26
|
98 echo "<a href='all.txt'>download data</a><br />" >> $output
|
4
|
99 if [ -a $outdir/ca.png ]
|
|
100 then
|
|
101 echo "<img src='ca.png'/><br />" >> $output
|
26
|
102 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
4
|
103 fi
|
|
104 if [ -a $outdir/cg.png ]
|
|
105 then
|
|
106 echo "<img src='cg.png'/><br />" >> $output
|
26
|
107 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
4
|
108 fi
|
22
|
109 if [ -a $outdir/scatter.png ]
|
|
110 then
|
|
111 echo "<img src='scatter.png'/><br />" >> $output
|
26
|
112 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
113 fi
|
|
114 if [ -a $outdir/aa_histogram.png ]
|
|
115 then
|
|
116 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
117 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
22
|
118 fi
|
2
|
119
|
0
|
120 for gene in ${genes[@]}
|
|
121 do
|
|
122 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
|
|
123 while IFS=, read from a c g t
|
|
124 do
|
|
125 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
4
|
126 done < $outdir/transitions_${gene}.txt
|
0
|
127 echo "</table>" >> $output
|
|
128 done
|
|
129
|
|
130 echo "<table border='1'><caption>All transition table</caption>" >> $output
|
|
131 while IFS=, read from a c g t
|
|
132 do
|
|
133 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
134 done < $outdir/transitions.txt
|
|
135 echo "</table>" >> $output
|
|
136
|
|
137 echo "</html>" >> $output
|
2
|
138
|
47
|
139
|
|
140 #optional output for naive
|
|
141
|
|
142 if [[ "$naive_output" != "None" ]]
|
|
143 then
|
|
144 echo "naive_output: $naive_output"
|
|
145 python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
|
|
146 Rscript $dir/naive_output.r $naive_output $outdir/merged.txt $naive_output 2>&1
|
|
147 fi
|
|
148
|
|
149
|
|
150
|
|
151
|
|
152
|
|
153
|
|
154
|
|
155
|
|
156
|
|
157
|
|
158
|
2
|
159 #rm $outdir/HS12RSS.txt
|
|
160 #rm $outdir/HS23RSS.txt
|