0
|
1 #!/bin/bash
|
4
|
2 set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
|
6 output=$3
|
|
7 outdir=$4
|
|
8 title=$5
|
22
|
9 include_fr1=$6
|
34
|
10 functionality=$7
|
|
11 unique=$8
|
0
|
12 mkdir $outdir
|
|
13
|
35
|
14 type="`file $input`"
|
|
15 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
16 echo "Zip archive"
|
|
17 echo "unzip $input -d $PWD/files/"
|
|
18 unzip $input -d $PWD/files/
|
|
19 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
20 echo "ZX archive"
|
|
21 echo "tar -xJf $input -C $PWD/files/"
|
|
22 mkdir -p $PWD/files/$title
|
|
23 tar -xJf $input -C $PWD/files/$title
|
|
24 fi
|
|
25
|
0
|
26 cat $PWD/files/*/1_* > $PWD/summary.txt
|
39
|
27 cat $PWD/files/*/6_* > $PWD/junction.txt
|
0
|
28 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
29 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
30 cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
31
|
26
|
32 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
33
|
|
34 echo "${BLASTN_DIR}"
|
|
35
|
|
36 echo "identification ($method)"
|
|
37
|
|
38 if [[ "${method}" == "custom" ]] ; then
|
|
39 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
40 else
|
|
41 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
42 ID_index=$((ID_index+1))
|
|
43 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
44 sequence_index=$((sequence_index+1))
|
|
45
|
|
46 echo "$ID_index ${sequence_index}"
|
|
47
|
|
48 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
|
|
49
|
|
50 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
51 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
52 fi
|
|
53
|
|
54
|
|
55
|
4
|
56 echo "merging"
|
39
|
57 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/junction.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique
|
0
|
58
|
4
|
59 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
|
|
60 echo "R mutation analysis"
|
22
|
61 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
|
4
|
62 echo "python mutation analysis"
|
32
|
63 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
26
|
64 echo "R AA histogram"
|
|
65 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
|
4
|
66
|
|
67 cat $outdir/mutations.txt $outdir/hotspot_analysis.txt > $outdir/result.txt
|
|
68
|
0
|
69 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
70
|
|
71
|
1
|
72 echo "<html><center><h1>$title</h1></center><table border='1'>" > $output
|
0
|
73 echo "<tr><th>info</th>" >> $output
|
4
|
74 for gene in ${genes[@]}
|
|
75 do
|
|
76 tmp=`cat $outdir/${gene}_n.txt`
|
|
77 echo "<th><a href='matched_${gene}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
78 done
|
|
79 tmp=`cat $outdir/total_n.txt`
|
|
80 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
|
|
81
|
0
|
82 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
|
25
|
83 do
|
|
84 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
85 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
86 else
|
0
|
87 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
|
25
|
88 fi
|
4
|
89 done < $outdir/result.txt
|
0
|
90 echo "</table>" >> $output
|
34
|
91 echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br /><a href='merged.txt'>all data</a><br />" >> $output
|
2
|
92
|
|
93
|
4
|
94 echo "<img src='all.png'/><br />" >> $output
|
26
|
95 echo "<a href='all.txt'>download data</a><br />" >> $output
|
4
|
96 if [ -a $outdir/ca.png ]
|
|
97 then
|
|
98 echo "<img src='ca.png'/><br />" >> $output
|
26
|
99 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
4
|
100 fi
|
|
101 if [ -a $outdir/cg.png ]
|
|
102 then
|
|
103 echo "<img src='cg.png'/><br />" >> $output
|
26
|
104 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
4
|
105 fi
|
22
|
106 if [ -a $outdir/scatter.png ]
|
|
107 then
|
|
108 echo "<img src='scatter.png'/><br />" >> $output
|
26
|
109 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
110 fi
|
|
111 if [ -a $outdir/aa_histogram.png ]
|
|
112 then
|
|
113 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
114 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
22
|
115 fi
|
2
|
116
|
0
|
117 for gene in ${genes[@]}
|
|
118 do
|
|
119 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
|
|
120 while IFS=, read from a c g t
|
|
121 do
|
|
122 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
4
|
123 done < $outdir/transitions_${gene}.txt
|
0
|
124 echo "</table>" >> $output
|
|
125 done
|
|
126
|
|
127 echo "<table border='1'><caption>All transition table</caption>" >> $output
|
|
128 while IFS=, read from a c g t
|
|
129 do
|
|
130 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
131 done < $outdir/transitions.txt
|
|
132 echo "</table>" >> $output
|
|
133
|
|
134 echo "</html>" >> $output
|
2
|
135
|
|
136 #rm $outdir/HS12RSS.txt
|
|
137 #rm $outdir/HS23RSS.txt
|