0
|
1 #!/bin/bash
|
4
|
2 set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
|
6 output=$3
|
|
7 outdir=$4
|
|
8 title=$5
|
22
|
9 include_fr1=$6
|
34
|
10 functionality=$7
|
|
11 unique=$8
|
0
|
12 mkdir $outdir
|
|
13
|
35
|
14 type="`file $input`"
|
|
15 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
16 echo "Zip archive"
|
|
17 echo "unzip $input -d $PWD/files/"
|
|
18 unzip $input -d $PWD/files/
|
|
19 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
20 echo "ZX archive"
|
|
21 echo "tar -xJf $input -C $PWD/files/"
|
|
22 mkdir -p $PWD/files/$title
|
|
23 tar -xJf $input -C $PWD/files/$title
|
|
24 fi
|
|
25
|
0
|
26 cat $PWD/files/*/1_* > $PWD/summary.txt
|
|
27 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
28 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
29 cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
30
|
26
|
31 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
32
|
|
33 echo "${BLASTN_DIR}"
|
|
34
|
|
35 echo "identification ($method)"
|
|
36
|
|
37 if [[ "${method}" == "custom" ]] ; then
|
|
38 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
39 else
|
|
40 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
41 ID_index=$((ID_index+1))
|
|
42 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
43 sequence_index=$((sequence_index+1))
|
|
44
|
|
45 echo "$ID_index ${sequence_index}"
|
|
46
|
|
47 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
|
|
48
|
|
49 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
50 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
51 fi
|
|
52
|
|
53
|
|
54
|
4
|
55 echo "merging"
|
34
|
56 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique
|
0
|
57
|
4
|
58 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
|
|
59 echo "R mutation analysis"
|
22
|
60 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
|
4
|
61 echo "python mutation analysis"
|
32
|
62 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
26
|
63 echo "R AA histogram"
|
|
64 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
|
4
|
65
|
|
66 cat $outdir/mutations.txt $outdir/hotspot_analysis.txt > $outdir/result.txt
|
|
67
|
0
|
68 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
69
|
|
70
|
1
|
71 echo "<html><center><h1>$title</h1></center><table border='1'>" > $output
|
0
|
72 echo "<tr><th>info</th>" >> $output
|
4
|
73 for gene in ${genes[@]}
|
|
74 do
|
|
75 tmp=`cat $outdir/${gene}_n.txt`
|
|
76 echo "<th><a href='matched_${gene}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
77 done
|
|
78 tmp=`cat $outdir/total_n.txt`
|
|
79 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
|
|
80
|
0
|
81 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
|
25
|
82 do
|
|
83 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
84 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
85 else
|
0
|
86 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
|
25
|
87 fi
|
4
|
88 done < $outdir/result.txt
|
0
|
89 echo "</table>" >> $output
|
34
|
90 echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br /><a href='merged.txt'>all data</a><br />" >> $output
|
2
|
91
|
|
92
|
4
|
93 echo "<img src='all.png'/><br />" >> $output
|
26
|
94 echo "<a href='all.txt'>download data</a><br />" >> $output
|
4
|
95 if [ -a $outdir/ca.png ]
|
|
96 then
|
|
97 echo "<img src='ca.png'/><br />" >> $output
|
26
|
98 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
4
|
99 fi
|
|
100 if [ -a $outdir/cg.png ]
|
|
101 then
|
|
102 echo "<img src='cg.png'/><br />" >> $output
|
26
|
103 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
4
|
104 fi
|
22
|
105 if [ -a $outdir/scatter.png ]
|
|
106 then
|
|
107 echo "<img src='scatter.png'/><br />" >> $output
|
26
|
108 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
109 fi
|
|
110 if [ -a $outdir/aa_histogram.png ]
|
|
111 then
|
|
112 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
113 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
22
|
114 fi
|
2
|
115
|
0
|
116 for gene in ${genes[@]}
|
|
117 do
|
|
118 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
|
|
119 while IFS=, read from a c g t
|
|
120 do
|
|
121 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
4
|
122 done < $outdir/transitions_${gene}.txt
|
0
|
123 echo "</table>" >> $output
|
|
124 done
|
|
125
|
|
126 echo "<table border='1'><caption>All transition table</caption>" >> $output
|
|
127 while IFS=, read from a c g t
|
|
128 do
|
|
129 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
130 done < $outdir/transitions.txt
|
|
131 echo "</table>" >> $output
|
|
132
|
|
133 echo "</html>" >> $output
|
2
|
134
|
|
135 #rm $outdir/HS12RSS.txt
|
|
136 #rm $outdir/HS23RSS.txt
|