annotate wrapper.sh @ 52:d3542f87a304 draft

Uploaded
author davidvanzessen
date Fri, 29 Jan 2016 08:11:31 -0500
parents 8ba6afa1247a
children 7290a88ea202
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
1 #!/bin/bash
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
2 set -e
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
3 dir="$(cd "$(dirname "$0")" && pwd)"
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
4 input=$1
19
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
5 method=$2
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
6 output=$3
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
7 outdir=$4
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
8 title=$5
22
d84c9791d8c4 Uploaded
davidvanzessen
parents: 21
diff changeset
9 include_fr1=$6
34
d436daae9d68 Uploaded
davidvanzessen
parents: 32
diff changeset
10 functionality=$7
d436daae9d68 Uploaded
davidvanzessen
parents: 32
diff changeset
11 unique=$8
47
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
12 naive_output=$9
52
d3542f87a304 Uploaded
davidvanzessen
parents: 50
diff changeset
13 filter_unique=${10}
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
14 mkdir $outdir
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
15
35
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
16 type="`file $input`"
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
17 if [[ "$type" == *"Zip archive"* ]] ; then
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
18 echo "Zip archive"
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
19 echo "unzip $input -d $PWD/files/"
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
20 unzip $input -d $PWD/files/
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
21 elif [[ "$type" == *"XZ compressed data"* ]] ; then
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
22 echo "ZX archive"
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
23 echo "tar -xJf $input -C $PWD/files/"
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
24 mkdir -p $PWD/files/$title
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
25 tar -xJf $input -C $PWD/files/$title
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
26 fi
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
27
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
28 cat $PWD/files/*/1_* > $PWD/summary.txt
41
1b45c7d7d941 Uploaded
davidvanzessen
parents: 40
diff changeset
29 cat $PWD/files/*/3_* > $PWD/sequences.txt
47
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
30 cat $PWD/files/*/5_* > $PWD/aa.txt
39
7377bf7e632d Uploaded
davidvanzessen
parents: 35
diff changeset
31 cat $PWD/files/*/6_* > $PWD/junction.txt
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
32 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
33 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
34 cat $PWD/files/*/10_* > $PWD/hotspots.txt
3
a0b27058dcac Uploaded
davidvanzessen
parents: 2
diff changeset
35
26
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
36 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
19
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
37
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
38 echo "${BLASTN_DIR}"
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
39
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
40 echo "identification ($method)"
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
41
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
42 if [[ "${method}" == "custom" ]] ; then
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
43 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
44 else
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
45 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
46 ID_index=$((ID_index+1))
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
47 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
48 sequence_index=$((sequence_index+1))
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
49
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
50 echo "$ID_index ${sequence_index}"
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
51
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
52 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
53
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
54 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
55 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
56 fi
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
57
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
58
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
59
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
60 echo "merging"
52
d3542f87a304 Uploaded
davidvanzessen
parents: 50
diff changeset
61 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique}
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
62
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
63 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
64 echo "R mutation analysis"
22
d84c9791d8c4 Uploaded
davidvanzessen
parents: 21
diff changeset
65 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
66 echo "python mutation analysis"
32
2a7343e4be5a Uploaded
davidvanzessen
parents: 31
diff changeset
67 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
26
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
68 echo "R AA histogram"
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
69 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
70 cat $outdir/mutations.txt $outdir/hotspot_analysis.txt > $outdir/result.txt
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
71
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
72 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
73
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
74
1
856b5b718d21 Uploaded
davidvanzessen
parents: 0
diff changeset
75 echo "<html><center><h1>$title</h1></center><table border='1'>" > $output
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
76 echo "<tr><th>info</th>" >> $output
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
77 for gene in ${genes[@]}
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
78 do
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
79 tmp=`cat $outdir/${gene}_n.txt`
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
80 echo "<th><a href='matched_${gene}.txt'>${gene} (N = $tmp)</a></th>" >> $output
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
81 done
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
82 tmp=`cat $outdir/total_n.txt`
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
83 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
84
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
85 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
25
58a62d2c0377 Uploaded
davidvanzessen
parents: 22
diff changeset
86 do
58a62d2c0377 Uploaded
davidvanzessen
parents: 22
diff changeset
87 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
58a62d2c0377 Uploaded
davidvanzessen
parents: 22
diff changeset
88 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
58a62d2c0377 Uploaded
davidvanzessen
parents: 22
diff changeset
89 else
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
90 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
25
58a62d2c0377 Uploaded
davidvanzessen
parents: 22
diff changeset
91 fi
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
92 done < $outdir/result.txt
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
93 echo "</table>" >> $output
49
5c6b9e99d576 Uploaded
davidvanzessen
parents: 47
diff changeset
94 echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br /><a href='merged.txt'>all data</a><br /><a href='mutation_by_id.txt'>mutations by id</a><br /><a href='aa_id_mutations.txt'>AA mutations location by id</a><br /><a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output
2
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
95
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
96
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
97 echo "<img src='all.png'/><br />" >> $output
26
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
98 echo "<a href='all.txt'>download data</a><br />" >> $output
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
99 if [ -a $outdir/ca.png ]
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
100 then
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
101 echo "<img src='ca.png'/><br />" >> $output
26
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
102 echo "<a href='ca.txt'>download data</a><br />" >> $output
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
103 fi
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
104 if [ -a $outdir/cg.png ]
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
105 then
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
106 echo "<img src='cg.png'/><br />" >> $output
26
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
107 echo "<a href='cg.txt'>download data</a><br />" >> $output
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
108 fi
22
d84c9791d8c4 Uploaded
davidvanzessen
parents: 21
diff changeset
109 if [ -a $outdir/scatter.png ]
d84c9791d8c4 Uploaded
davidvanzessen
parents: 21
diff changeset
110 then
d84c9791d8c4 Uploaded
davidvanzessen
parents: 21
diff changeset
111 echo "<img src='scatter.png'/><br />" >> $output
26
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
112 echo "<a href='scatter.txt'>download data</a><br />" >> $output
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
113 fi
49
5c6b9e99d576 Uploaded
davidvanzessen
parents: 47
diff changeset
114 if [ -a $outdir/frequency_ranges.png ]
5c6b9e99d576 Uploaded
davidvanzessen
parents: 47
diff changeset
115 then
5c6b9e99d576 Uploaded
davidvanzessen
parents: 47
diff changeset
116 echo "<img src='frequency_ranges.png'/><br />" >> $output
5c6b9e99d576 Uploaded
davidvanzessen
parents: 47
diff changeset
117 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
5c6b9e99d576 Uploaded
davidvanzessen
parents: 47
diff changeset
118 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
5c6b9e99d576 Uploaded
davidvanzessen
parents: 47
diff changeset
119 fi
26
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
120 if [ -a $outdir/aa_histogram.png ]
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
121 then
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
122 echo "<img src='aa_histogram.png'/><br />" >> $output
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
123 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
22
d84c9791d8c4 Uploaded
davidvanzessen
parents: 21
diff changeset
124 fi
2
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
125
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
126 for gene in ${genes[@]}
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
127 do
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
128 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
129 while IFS=, read from a c g t
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
130 do
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
131 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
132 done < $outdir/transitions_${gene}.txt
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
133 echo "</table>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
134 done
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
135
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
136 echo "<table border='1'><caption>All transition table</caption>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
137 while IFS=, read from a c g t
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
138 do
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
139 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
140 done < $outdir/transitions.txt
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
141 echo "</table>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
142
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
143 echo "</html>" >> $output
2
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
144
47
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
145
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
146 #optional output for naive
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
147
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
148 if [[ "$naive_output" != "None" ]]
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
149 then
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
150 echo "naive_output: $naive_output"
50
8ba6afa1247a Uploaded
davidvanzessen
parents: 49
diff changeset
151 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
8ba6afa1247a Uploaded
davidvanzessen
parents: 49
diff changeset
152 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt ${naive_output} 2>&1
47
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
153 Rscript $dir/naive_output.r $naive_output $outdir/merged.txt $naive_output 2>&1
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
154 fi
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
155
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
156
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
157
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
158
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
159
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
160
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
161
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
162
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
163
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
164
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
165
2
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
166 #rm $outdir/HS12RSS.txt
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
167 #rm $outdir/HS23RSS.txt