comparison wrapper.sh @ 11:0510cf1f7cbc draft

Uploaded
author davidvanzessen
date Tue, 04 Aug 2015 09:59:26 -0400
parents
children
comparison
equal deleted inserted replaced
10:edbf4fba5fc7 11:0510cf1f7cbc
1 #!/bin/bash
2 set -e
3 dir="$(cd "$(dirname "$0")" && pwd)"
4 input=$1
5 method=$2
6 output=$3
7 outdir=$4
8 title=$5
9 include_fr1=$6
10 functionality=$7
11 unique=$8
12 mkdir $outdir
13
14 type="`file $input`"
15 if [[ "$type" == *"Zip archive"* ]] ; then
16 echo "Zip archive"
17 echo "unzip $input -d $PWD/files/"
18 unzip $input -d $PWD/files/
19 elif [[ "$type" == *"XZ compressed data"* ]] ; then
20 echo "ZX archive"
21 echo "tar -xJf $input -C $PWD/files/"
22 mkdir -p $PWD/files/$title
23 tar -xJf $input -C $PWD/files/$title
24 fi
25
26 cat $PWD/files/*/1_* > $PWD/summary.txt
27 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
28 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
29 cat $PWD/files/*/10_* > $PWD/hotspots.txt
30
31 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
32
33 echo "${BLASTN_DIR}"
34
35 echo "identification ($method)"
36
37 if [[ "${method}" == "custom" ]] ; then
38 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
39 else
40 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
41 ID_index=$((ID_index+1))
42 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
43 sequence_index=$((sequence_index+1))
44
45 echo "$ID_index ${sequence_index}"
46
47 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
48
49 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
50 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
51 fi
52
53
54
55 echo "merging"
56 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique
57
58 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
59 echo "R mutation analysis"
60 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
61 echo "python mutation analysis"
62 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
63 echo "R AA histogram"
64 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
65
66 cat $outdir/mutations.txt $outdir/hotspot_analysis.txt > $outdir/result.txt
67
68 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
69
70
71 echo "<html><center><h1>$title</h1></center><table border='1'>" > $output
72 echo "<tr><th>info</th>" >> $output
73 for gene in ${genes[@]}
74 do
75 tmp=`cat $outdir/${gene}_n.txt`
76 echo "<th><a href='matched_${gene}.txt'>${gene} (N = $tmp)</a></th>" >> $output
77 done
78 tmp=`cat $outdir/total_n.txt`
79 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
80
81 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
82 do
83 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
84 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
85 else
86 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
87 fi
88 done < $outdir/result.txt
89 echo "</table>" >> $output
90 echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br /><a href='merged.txt'>all data</a><br />" >> $output
91
92
93 echo "<img src='all.png'/><br />" >> $output
94 echo "<a href='all.txt'>download data</a><br />" >> $output
95 if [ -a $outdir/ca.png ]
96 then
97 echo "<img src='ca.png'/><br />" >> $output
98 echo "<a href='ca.txt'>download data</a><br />" >> $output
99 fi
100 if [ -a $outdir/cg.png ]
101 then
102 echo "<img src='cg.png'/><br />" >> $output
103 echo "<a href='cg.txt'>download data</a><br />" >> $output
104 fi
105 if [ -a $outdir/scatter.png ]
106 then
107 echo "<img src='scatter.png'/><br />" >> $output
108 echo "<a href='scatter.txt'>download data</a><br />" >> $output
109 fi
110 if [ -a $outdir/aa_histogram.png ]
111 then
112 echo "<img src='aa_histogram.png'/><br />" >> $output
113 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
114 fi
115
116 for gene in ${genes[@]}
117 do
118 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
119 while IFS=, read from a c g t
120 do
121 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
122 done < $outdir/transitions_${gene}.txt
123 echo "</table>" >> $output
124 done
125
126 echo "<table border='1'><caption>All transition table</caption>" >> $output
127 while IFS=, read from a c g t
128 do
129 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
130 done < $outdir/transitions.txt
131 echo "</table>" >> $output
132
133 echo "</html>" >> $output
134
135 #rm $outdir/HS12RSS.txt
136 #rm $outdir/HS23RSS.txt