|
0
|
1 #!/bin/bash
|
|
4
|
2 set -e
|
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
|
4 input=$1
|
|
19
|
5 method=$2
|
|
|
6 output=$3
|
|
|
7 outdir=$4
|
|
|
8 title=$5
|
|
0
|
9 mkdir $outdir
|
|
|
10
|
|
|
11 unzip $input -d $PWD/files/ > $PWD/unziplog.log
|
|
|
12 cat $PWD/files/*/1_* > $PWD/summary.txt
|
|
|
13 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
|
14 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
|
15 cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
|
3
|
16
|
|
20
|
17 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
|
19
|
18
|
|
|
19 echo "${BLASTN_DIR}"
|
|
|
20
|
|
|
21
|
|
15
|
22
|
|
4
|
23
|
|
19
|
24 echo "identification ($method)"
|
|
|
25
|
|
|
26 if [[ "${method}" == "custom" ]] ; then
|
|
|
27 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
|
28 else
|
|
|
29 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
|
30 ID_index=$((ID_index+1))
|
|
|
31 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
|
32 sequence_index=$((sequence_index+1))
|
|
|
33
|
|
|
34 echo "$ID_index ${sequence_index}"
|
|
|
35
|
|
|
36 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
|
|
|
37
|
|
|
38 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
|
39 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
|
40 fi
|
|
|
41
|
|
|
42
|
|
|
43
|
|
4
|
44 echo "merging"
|
|
19
|
45 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method
|
|
0
|
46
|
|
4
|
47 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
|
|
|
48 echo "R mutation analysis"
|
|
|
49 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir 2>&1
|
|
|
50 echo "python mutation analysis"
|
|
|
51 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --output $outdir/hotspot_analysis.txt
|
|
|
52
|
|
|
53 cat $outdir/mutations.txt $outdir/hotspot_analysis.txt > $outdir/result.txt
|
|
|
54
|
|
0
|
55 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
|
56
|
|
|
57
|
|
1
|
58 echo "<html><center><h1>$title</h1></center><table border='1'>" > $output
|
|
0
|
59 echo "<tr><th>info</th>" >> $output
|
|
4
|
60 for gene in ${genes[@]}
|
|
|
61 do
|
|
|
62 tmp=`cat $outdir/${gene}_n.txt`
|
|
|
63 echo "<th><a href='matched_${gene}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
|
64 done
|
|
|
65 tmp=`cat $outdir/total_n.txt`
|
|
|
66 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
|
|
|
67
|
|
0
|
68 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
|
|
|
69 do
|
|
|
70 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
|
|
4
|
71 done < $outdir/result.txt
|
|
0
|
72 echo "</table>" >> $output
|
|
21
|
73 echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
|
|
2
|
74
|
|
|
75
|
|
4
|
76 echo "<img src='all.png'/><br />" >> $output
|
|
|
77 if [ -a $outdir/ca.png ]
|
|
|
78 then
|
|
|
79 echo "<img src='ca.png'/><br />" >> $output
|
|
|
80 fi
|
|
|
81 if [ -a $outdir/cg.png ]
|
|
|
82 then
|
|
|
83 echo "<img src='cg.png'/><br />" >> $output
|
|
|
84 fi
|
|
2
|
85
|
|
0
|
86 for gene in ${genes[@]}
|
|
|
87 do
|
|
|
88 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
|
|
|
89 while IFS=, read from a c g t
|
|
|
90 do
|
|
|
91 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
4
|
92 done < $outdir/transitions_${gene}.txt
|
|
0
|
93 echo "</table>" >> $output
|
|
|
94 done
|
|
|
95
|
|
|
96 echo "<table border='1'><caption>All transition table</caption>" >> $output
|
|
|
97 while IFS=, read from a c g t
|
|
|
98 do
|
|
|
99 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
|
100 done < $outdir/transitions.txt
|
|
|
101 echo "</table>" >> $output
|
|
|
102
|
|
|
103 echo "</html>" >> $output
|
|
2
|
104
|
|
|
105 #rm $outdir/HS12RSS.txt
|
|
|
106 #rm $outdir/HS23RSS.txt
|