annotate wrapper.sh @ 11:0510cf1f7cbc draft

Uploaded
author davidvanzessen
date Tue, 04 Aug 2015 09:59:26 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
1 #!/bin/bash
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
2 set -e
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
3 dir="$(cd "$(dirname "$0")" && pwd)"
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
4 input=$1
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
5 method=$2
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
6 output=$3
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
7 outdir=$4
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
8 title=$5
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
9 include_fr1=$6
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
10 functionality=$7
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
11 unique=$8
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
12 mkdir $outdir
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
13
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
14 type="`file $input`"
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
15 if [[ "$type" == *"Zip archive"* ]] ; then
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
16 echo "Zip archive"
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
17 echo "unzip $input -d $PWD/files/"
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
18 unzip $input -d $PWD/files/
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
19 elif [[ "$type" == *"XZ compressed data"* ]] ; then
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
20 echo "ZX archive"
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
21 echo "tar -xJf $input -C $PWD/files/"
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
22 mkdir -p $PWD/files/$title
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
23 tar -xJf $input -C $PWD/files/$title
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
24 fi
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
25
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
26 cat $PWD/files/*/1_* > $PWD/summary.txt
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
27 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
28 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
29 cat $PWD/files/*/10_* > $PWD/hotspots.txt
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
30
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
31 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
32
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
33 echo "${BLASTN_DIR}"
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
34
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
35 echo "identification ($method)"
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
36
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
37 if [[ "${method}" == "custom" ]] ; then
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
38 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
39 else
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
40 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
41 ID_index=$((ID_index+1))
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
42 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
43 sequence_index=$((sequence_index+1))
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
44
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
45 echo "$ID_index ${sequence_index}"
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
46
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
47 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
48
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
49 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
50 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
51 fi
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
52
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
53
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
54
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
55 echo "merging"
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
56 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
57
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
58 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
59 echo "R mutation analysis"
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
60 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
61 echo "python mutation analysis"
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
62 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
63 echo "R AA histogram"
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
64 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
65
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
66 cat $outdir/mutations.txt $outdir/hotspot_analysis.txt > $outdir/result.txt
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
67
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
68 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
69
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
70
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
71 echo "<html><center><h1>$title</h1></center><table border='1'>" > $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
72 echo "<tr><th>info</th>" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
73 for gene in ${genes[@]}
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
74 do
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
75 tmp=`cat $outdir/${gene}_n.txt`
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
76 echo "<th><a href='matched_${gene}.txt'>${gene} (N = $tmp)</a></th>" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
77 done
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
78 tmp=`cat $outdir/total_n.txt`
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
79 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
80
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
81 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
82 do
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
83 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
84 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
85 else
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
86 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
87 fi
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
88 done < $outdir/result.txt
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
89 echo "</table>" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
90 echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br /><a href='merged.txt'>all data</a><br />" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
91
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
92
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
93 echo "<img src='all.png'/><br />" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
94 echo "<a href='all.txt'>download data</a><br />" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
95 if [ -a $outdir/ca.png ]
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
96 then
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
97 echo "<img src='ca.png'/><br />" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
98 echo "<a href='ca.txt'>download data</a><br />" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
99 fi
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
100 if [ -a $outdir/cg.png ]
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
101 then
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
102 echo "<img src='cg.png'/><br />" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
103 echo "<a href='cg.txt'>download data</a><br />" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
104 fi
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
105 if [ -a $outdir/scatter.png ]
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
106 then
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
107 echo "<img src='scatter.png'/><br />" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
108 echo "<a href='scatter.txt'>download data</a><br />" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
109 fi
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
110 if [ -a $outdir/aa_histogram.png ]
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
111 then
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
112 echo "<img src='aa_histogram.png'/><br />" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
113 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
114 fi
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
115
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
116 for gene in ${genes[@]}
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
117 do
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
118 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
119 while IFS=, read from a c g t
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
120 do
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
121 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
122 done < $outdir/transitions_${gene}.txt
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
123 echo "</table>" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
124 done
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
125
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
126 echo "<table border='1'><caption>All transition table</caption>" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
127 while IFS=, read from a c g t
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
128 do
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
129 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
130 done < $outdir/transitions.txt
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
131 echo "</table>" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
132
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
133 echo "</html>" >> $output
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
134
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
135 #rm $outdir/HS12RSS.txt
0510cf1f7cbc Uploaded
davidvanzessen
parents:
diff changeset
136 #rm $outdir/HS23RSS.txt