annotate wrapper.sh @ 19:c518cf0d4adb draft

Uploaded
author davidvanzessen
date Wed, 01 Apr 2015 05:09:59 -0400
parents 2bb24ebbbc38
children 850857bc8605
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
1 #!/bin/bash
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
2 set -e
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
3 dir="$(cd "$(dirname "$0")" && pwd)"
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
4 input=$1
19
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
5 method=$2
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
6 output=$3
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
7 outdir=$4
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
8 title=$5
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
9 mkdir $outdir
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
10
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
11 unzip $input -d $PWD/files/ > $PWD/unziplog.log
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
12 cat $PWD/files/*/1_* > $PWD/summary.txt
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
13 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
14 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
15 cat $PWD/files/*/10_* > $PWD/hotspots.txt
3
a0b27058dcac Uploaded
davidvanzessen
parents: 2
diff changeset
16
19
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
17 BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
18
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
19 echo "${BLASTN_DIR}"
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
20
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
21
15
8a40634fd4b3 Uploaded
davidvanzessen
parents: 12
diff changeset
22
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
23
19
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
24 echo "identification ($method)"
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
25
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
26 if [[ "${method}" == "custom" ]] ; then
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
27 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
28 else
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
29 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
30 ID_index=$((ID_index+1))
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
31 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
32 sequence_index=$((sequence_index+1))
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
33
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
34 echo "$ID_index ${sequence_index}"
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
35
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
36 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
37
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
38 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
39 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
40 fi
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
41
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
42
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
43
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
44 echo "merging"
19
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
45 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
46
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
47 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
48 echo "R mutation analysis"
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
49 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir 2>&1
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
50 echo "python mutation analysis"
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
51 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --output $outdir/hotspot_analysis.txt
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
52
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
53 cat $outdir/mutations.txt $outdir/hotspot_analysis.txt > $outdir/result.txt
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
54
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
55 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
56
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
57
1
856b5b718d21 Uploaded
davidvanzessen
parents: 0
diff changeset
58 echo "<html><center><h1>$title</h1></center><table border='1'>" > $output
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
59 echo "<tr><th>info</th>" >> $output
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
60 for gene in ${genes[@]}
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
61 do
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
62 tmp=`cat $outdir/${gene}_n.txt`
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
63 echo "<th><a href='matched_${gene}.txt'>${gene} (N = $tmp)</a></th>" >> $output
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
64 done
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
65 tmp=`cat $outdir/total_n.txt`
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
66 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
67
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
68 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
69 do
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
70 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
71 done < $outdir/result.txt
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
72 echo "</table>" >> $output
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
73 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
2
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
74
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
75
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
76 echo "<img src='all.png'/><br />" >> $output
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
77 if [ -a $outdir/ca.png ]
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
78 then
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
79 echo "<img src='ca.png'/><br />" >> $output
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
80 fi
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
81 if [ -a $outdir/cg.png ]
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
82 then
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
83 echo "<img src='cg.png'/><br />" >> $output
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
84 fi
2
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
85
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
86 for gene in ${genes[@]}
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
87 do
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
88 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
89 while IFS=, read from a c g t
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
90 do
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
91 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
92 done < $outdir/transitions_${gene}.txt
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
93 echo "</table>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
94 done
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
95
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
96 echo "<table border='1'><caption>All transition table</caption>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
97 while IFS=, read from a c g t
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
98 do
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
99 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
100 done < $outdir/transitions.txt
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
101 echo "</table>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
102
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
103 echo "</html>" >> $output
2
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
104
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
105 #rm $outdir/HS12RSS.txt
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
106 #rm $outdir/HS23RSS.txt