0
|
1 #!/bin/bash
|
4
|
2 set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
|
6 output=$3
|
|
7 outdir=$4
|
|
8 title=$5
|
22
|
9 include_fr1=$6
|
34
|
10 functionality=$7
|
|
11 unique=$8
|
0
|
12 mkdir $outdir
|
|
13
|
|
14 unzip $input -d $PWD/files/ > $PWD/unziplog.log
|
|
15 cat $PWD/files/*/1_* > $PWD/summary.txt
|
|
16 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
17 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
18 cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
19
|
26
|
20 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
21
|
|
22 echo "${BLASTN_DIR}"
|
|
23
|
|
24 echo "identification ($method)"
|
|
25
|
|
26 if [[ "${method}" == "custom" ]] ; then
|
|
27 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
28 else
|
|
29 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
30 ID_index=$((ID_index+1))
|
|
31 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
32 sequence_index=$((sequence_index+1))
|
|
33
|
|
34 echo "$ID_index ${sequence_index}"
|
|
35
|
|
36 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
|
|
37
|
|
38 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
39 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
40 fi
|
|
41
|
|
42
|
|
43
|
4
|
44 echo "merging"
|
34
|
45 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique
|
0
|
46
|
4
|
47 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
|
|
48 echo "R mutation analysis"
|
22
|
49 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
|
4
|
50 echo "python mutation analysis"
|
32
|
51 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
26
|
52 echo "R AA histogram"
|
|
53 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
|
4
|
54
|
|
55 cat $outdir/mutations.txt $outdir/hotspot_analysis.txt > $outdir/result.txt
|
|
56
|
0
|
57 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
58
|
|
59
|
1
|
60 echo "<html><center><h1>$title</h1></center><table border='1'>" > $output
|
0
|
61 echo "<tr><th>info</th>" >> $output
|
4
|
62 for gene in ${genes[@]}
|
|
63 do
|
|
64 tmp=`cat $outdir/${gene}_n.txt`
|
|
65 echo "<th><a href='matched_${gene}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
66 done
|
|
67 tmp=`cat $outdir/total_n.txt`
|
|
68 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
|
|
69
|
0
|
70 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
|
25
|
71 do
|
|
72 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
73 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
74 else
|
0
|
75 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
|
25
|
76 fi
|
4
|
77 done < $outdir/result.txt
|
0
|
78 echo "</table>" >> $output
|
34
|
79 echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br /><a href='merged.txt'>all data</a><br />" >> $output
|
2
|
80
|
|
81
|
4
|
82 echo "<img src='all.png'/><br />" >> $output
|
26
|
83 echo "<a href='all.txt'>download data</a><br />" >> $output
|
4
|
84 if [ -a $outdir/ca.png ]
|
|
85 then
|
|
86 echo "<img src='ca.png'/><br />" >> $output
|
26
|
87 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
4
|
88 fi
|
|
89 if [ -a $outdir/cg.png ]
|
|
90 then
|
|
91 echo "<img src='cg.png'/><br />" >> $output
|
26
|
92 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
4
|
93 fi
|
22
|
94 if [ -a $outdir/scatter.png ]
|
|
95 then
|
|
96 echo "<img src='scatter.png'/><br />" >> $output
|
26
|
97 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
98 fi
|
|
99 if [ -a $outdir/aa_histogram.png ]
|
|
100 then
|
|
101 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
102 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
22
|
103 fi
|
2
|
104
|
0
|
105 for gene in ${genes[@]}
|
|
106 do
|
|
107 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
|
|
108 while IFS=, read from a c g t
|
|
109 do
|
|
110 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
4
|
111 done < $outdir/transitions_${gene}.txt
|
0
|
112 echo "</table>" >> $output
|
|
113 done
|
|
114
|
|
115 echo "<table border='1'><caption>All transition table</caption>" >> $output
|
|
116 while IFS=, read from a c g t
|
|
117 do
|
|
118 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
119 done < $outdir/transitions.txt
|
|
120 echo "</table>" >> $output
|
|
121
|
|
122 echo "</html>" >> $output
|
2
|
123
|
|
124 #rm $outdir/HS12RSS.txt
|
|
125 #rm $outdir/HS23RSS.txt
|