0
|
1 #!/bin/bash
|
4
|
2 set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
|
6 output=$3
|
|
7 outdir=$4
|
|
8 title=$5
|
22
|
9 include_fr1=$6
|
34
|
10 functionality=$7
|
|
11 unique=$8
|
0
|
12 mkdir $outdir
|
|
13
|
35
|
14 type="`file $input`"
|
|
15 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
16 echo "Zip archive"
|
|
17 echo "unzip $input -d $PWD/files/"
|
|
18 unzip $input -d $PWD/files/
|
|
19 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
20 echo "ZX archive"
|
|
21 echo "tar -xJf $input -C $PWD/files/"
|
|
22 mkdir -p $PWD/files/$title
|
|
23 tar -xJf $input -C $PWD/files/$title
|
|
24 fi
|
|
25
|
0
|
26 cat $PWD/files/*/1_* > $PWD/summary.txt
|
41
|
27 cat $PWD/files/*/3_* > $PWD/sequences.txt
|
39
|
28 cat $PWD/files/*/6_* > $PWD/junction.txt
|
0
|
29 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
30 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
31 cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
32
|
26
|
33 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
34
|
|
35 echo "${BLASTN_DIR}"
|
|
36
|
|
37 echo "identification ($method)"
|
|
38
|
|
39 if [[ "${method}" == "custom" ]] ; then
|
|
40 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
41 else
|
|
42 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
43 ID_index=$((ID_index+1))
|
|
44 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
45 sequence_index=$((sequence_index+1))
|
|
46
|
|
47 echo "$ID_index ${sequence_index}"
|
|
48
|
|
49 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
|
|
50
|
|
51 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
52 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
53 fi
|
|
54
|
|
55
|
|
56
|
4
|
57 echo "merging"
|
41
|
58 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique
|
0
|
59
|
4
|
60 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
|
|
61 echo "R mutation analysis"
|
22
|
62 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
|
4
|
63 echo "python mutation analysis"
|
32
|
64 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
26
|
65 echo "R AA histogram"
|
|
66 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
|
4
|
67
|
|
68 cat $outdir/mutations.txt $outdir/hotspot_analysis.txt > $outdir/result.txt
|
|
69
|
0
|
70 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
71
|
|
72
|
1
|
73 echo "<html><center><h1>$title</h1></center><table border='1'>" > $output
|
0
|
74 echo "<tr><th>info</th>" >> $output
|
4
|
75 for gene in ${genes[@]}
|
|
76 do
|
|
77 tmp=`cat $outdir/${gene}_n.txt`
|
|
78 echo "<th><a href='matched_${gene}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
79 done
|
|
80 tmp=`cat $outdir/total_n.txt`
|
|
81 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
|
|
82
|
0
|
83 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
|
25
|
84 do
|
|
85 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
86 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
87 else
|
0
|
88 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
|
25
|
89 fi
|
4
|
90 done < $outdir/result.txt
|
0
|
91 echo "</table>" >> $output
|
40
|
92 echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br /><a href='merged.txt'>all data</a><br /><a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output
|
2
|
93
|
|
94
|
4
|
95 echo "<img src='all.png'/><br />" >> $output
|
26
|
96 echo "<a href='all.txt'>download data</a><br />" >> $output
|
4
|
97 if [ -a $outdir/ca.png ]
|
|
98 then
|
|
99 echo "<img src='ca.png'/><br />" >> $output
|
26
|
100 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
4
|
101 fi
|
|
102 if [ -a $outdir/cg.png ]
|
|
103 then
|
|
104 echo "<img src='cg.png'/><br />" >> $output
|
26
|
105 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
4
|
106 fi
|
22
|
107 if [ -a $outdir/scatter.png ]
|
|
108 then
|
|
109 echo "<img src='scatter.png'/><br />" >> $output
|
26
|
110 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
111 fi
|
|
112 if [ -a $outdir/aa_histogram.png ]
|
|
113 then
|
|
114 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
115 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
22
|
116 fi
|
2
|
117
|
0
|
118 for gene in ${genes[@]}
|
|
119 do
|
|
120 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
|
|
121 while IFS=, read from a c g t
|
|
122 do
|
|
123 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
4
|
124 done < $outdir/transitions_${gene}.txt
|
0
|
125 echo "</table>" >> $output
|
|
126 done
|
|
127
|
|
128 echo "<table border='1'><caption>All transition table</caption>" >> $output
|
|
129 while IFS=, read from a c g t
|
|
130 do
|
|
131 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
132 done < $outdir/transitions.txt
|
|
133 echo "</table>" >> $output
|
|
134
|
|
135 echo "</html>" >> $output
|
2
|
136
|
|
137 #rm $outdir/HS12RSS.txt
|
|
138 #rm $outdir/HS23RSS.txt
|