annotate wrapper.sh @ 25:58a62d2c0377 draft

Uploaded
author davidvanzessen
date Tue, 07 Apr 2015 07:32:43 -0400
parents d84c9791d8c4
children 2433a1e110e1
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
1 #!/bin/bash
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
2 set -e
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
3 dir="$(cd "$(dirname "$0")" && pwd)"
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
4 input=$1
19
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
5 method=$2
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
6 output=$3
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
7 outdir=$4
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
8 title=$5
22
d84c9791d8c4 Uploaded
davidvanzessen
parents: 21
diff changeset
9 include_fr1=$6
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
10 mkdir $outdir
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
11
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
12 unzip $input -d $PWD/files/ > $PWD/unziplog.log
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
13 cat $PWD/files/*/1_* > $PWD/summary.txt
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
14 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
15 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
16 cat $PWD/files/*/10_* > $PWD/hotspots.txt
3
a0b27058dcac Uploaded
davidvanzessen
parents: 2
diff changeset
17
22
d84c9791d8c4 Uploaded
davidvanzessen
parents: 21
diff changeset
18 BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
19
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
19
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
20 echo "${BLASTN_DIR}"
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
21
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
22
15
8a40634fd4b3 Uploaded
davidvanzessen
parents: 12
diff changeset
23
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
24
19
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
25 echo "identification ($method)"
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
26
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
27 if [[ "${method}" == "custom" ]] ; then
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
28 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
29 else
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
30 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
31 ID_index=$((ID_index+1))
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
32 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
33 sequence_index=$((sequence_index+1))
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
34
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
35 echo "$ID_index ${sequence_index}"
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
36
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
37 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
38
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
39 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
40 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
41 fi
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
42
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
43
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
44
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
45 echo "merging"
19
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
46 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
47
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
48 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
49 echo "R mutation analysis"
22
d84c9791d8c4 Uploaded
davidvanzessen
parents: 21
diff changeset
50 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
51 echo "python mutation analysis"
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
52 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --output $outdir/hotspot_analysis.txt
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
53
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
54 cat $outdir/mutations.txt $outdir/hotspot_analysis.txt > $outdir/result.txt
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
55
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
56 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
57
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
58
1
856b5b718d21 Uploaded
davidvanzessen
parents: 0
diff changeset
59 echo "<html><center><h1>$title</h1></center><table border='1'>" > $output
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
60 echo "<tr><th>info</th>" >> $output
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
61 for gene in ${genes[@]}
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
62 do
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
63 tmp=`cat $outdir/${gene}_n.txt`
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
64 echo "<th><a href='matched_${gene}.txt'>${gene} (N = $tmp)</a></th>" >> $output
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
65 done
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
66 tmp=`cat $outdir/total_n.txt`
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
67 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
68
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
69 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
25
58a62d2c0377 Uploaded
davidvanzessen
parents: 22
diff changeset
70 do
58a62d2c0377 Uploaded
davidvanzessen
parents: 22
diff changeset
71 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
58a62d2c0377 Uploaded
davidvanzessen
parents: 22
diff changeset
72 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
58a62d2c0377 Uploaded
davidvanzessen
parents: 22
diff changeset
73 else
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
74 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
25
58a62d2c0377 Uploaded
davidvanzessen
parents: 22
diff changeset
75 fi
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
76 done < $outdir/result.txt
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
77 echo "</table>" >> $output
21
c9f9623f1f76 Uploaded
davidvanzessen
parents: 20
diff changeset
78 echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
2
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
79
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
80
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
81 echo "<img src='all.png'/><br />" >> $output
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
82 if [ -a $outdir/ca.png ]
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
83 then
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
84 echo "<img src='ca.png'/><br />" >> $output
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
85 fi
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
86 if [ -a $outdir/cg.png ]
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
87 then
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
88 echo "<img src='cg.png'/><br />" >> $output
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
89 fi
22
d84c9791d8c4 Uploaded
davidvanzessen
parents: 21
diff changeset
90 if [ -a $outdir/scatter.png ]
d84c9791d8c4 Uploaded
davidvanzessen
parents: 21
diff changeset
91 then
d84c9791d8c4 Uploaded
davidvanzessen
parents: 21
diff changeset
92 echo "<img src='scatter.png'/><br />" >> $output
d84c9791d8c4 Uploaded
davidvanzessen
parents: 21
diff changeset
93 fi
2
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
94
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
95 for gene in ${genes[@]}
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
96 do
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
97 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
98 while IFS=, read from a c g t
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
99 do
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
100 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
101 done < $outdir/transitions_${gene}.txt
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
102 echo "</table>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
103 done
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
104
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
105 echo "<table border='1'><caption>All transition table</caption>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
106 while IFS=, read from a c g t
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
107 do
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
108 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
109 done < $outdir/transitions.txt
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
110 echo "</table>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
111
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
112 echo "</html>" >> $output
2
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
113
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
114 #rm $outdir/HS12RSS.txt
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
115 #rm $outdir/HS23RSS.txt