annotate wrapper.sh @ 100:ff5be711382b draft

Uploaded
author davidvanzessen
date Fri, 17 Jun 2016 05:36:32 -0400
parents 86206431cbb0
children 3cffb8a38bb1
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
1 #!/bin/bash
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
2 set -e
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
3 dir="$(cd "$(dirname "$0")" && pwd)"
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
4 input=$1
19
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
5 method=$2
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
6 output=$3
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
7 outdir=$4
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
8 title=$5
22
d84c9791d8c4 Uploaded
davidvanzessen
parents: 21
diff changeset
9 include_fr1=$6
34
d436daae9d68 Uploaded
davidvanzessen
parents: 32
diff changeset
10 functionality=$7
d436daae9d68 Uploaded
davidvanzessen
parents: 32
diff changeset
11 unique=$8
69
7acdcd5c52ef Uploaded
davidvanzessen
parents: 66
diff changeset
12 naive_output_ca=$9
7acdcd5c52ef Uploaded
davidvanzessen
parents: 66
diff changeset
13 naive_output_cg=${10}
7acdcd5c52ef Uploaded
davidvanzessen
parents: 66
diff changeset
14 naive_output_cm=${11}
7acdcd5c52ef Uploaded
davidvanzessen
parents: 66
diff changeset
15 filter_unique=${12}
7acdcd5c52ef Uploaded
davidvanzessen
parents: 66
diff changeset
16 class_filter=${13}
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
17 mkdir $outdir
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
18
55
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
19 echo "---------------- read parameters ----------------"
63
a7381fd96dad Uploaded
davidvanzessen
parents: 62
diff changeset
20 echo "---------------- read parameters ----------------<br />" > $output
55
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
21
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
22 echo "unpacking IMGT file"
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
23
89
480fdd383fdb Uploaded
davidvanzessen
parents: 85
diff changeset
24
480fdd383fdb Uploaded
davidvanzessen
parents: 85
diff changeset
25
35
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
26 type="`file $input`"
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
27 if [[ "$type" == *"Zip archive"* ]] ; then
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
28 echo "Zip archive"
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
29 echo "unzip $input -d $PWD/files/"
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
30 unzip $input -d $PWD/files/
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
31 elif [[ "$type" == *"XZ compressed data"* ]] ; then
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
32 echo "ZX archive"
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
33 echo "tar -xJf $input -C $PWD/files/"
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
34 mkdir -p $PWD/files/$title
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
35 tar -xJf $input -C $PWD/files/$title
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
36 fi
8dba36531e6e Uploaded
davidvanzessen
parents: 34
diff changeset
37
64
0fdd90f7c654 Uploaded
davidvanzessen
parents: 63
diff changeset
38 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt
0fdd90f7c654 Uploaded
davidvanzessen
parents: 63
diff changeset
39 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt
0fdd90f7c654 Uploaded
davidvanzessen
parents: 63
diff changeset
40 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt
0fdd90f7c654 Uploaded
davidvanzessen
parents: 63
diff changeset
41 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt
0fdd90f7c654 Uploaded
davidvanzessen
parents: 63
diff changeset
42 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt
0fdd90f7c654 Uploaded
davidvanzessen
parents: 63
diff changeset
43 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt
0fdd90f7c654 Uploaded
davidvanzessen
parents: 63
diff changeset
44 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt
0fdd90f7c654 Uploaded
davidvanzessen
parents: 63
diff changeset
45
0fdd90f7c654 Uploaded
davidvanzessen
parents: 63
diff changeset
46 #cat $PWD/files/*/1_* > $PWD/summary.txt
0fdd90f7c654 Uploaded
davidvanzessen
parents: 63
diff changeset
47 #cat $PWD/files/*/3_* > $PWD/sequences.txt
0fdd90f7c654 Uploaded
davidvanzessen
parents: 63
diff changeset
48 #cat $PWD/files/*/5_* > $PWD/aa.txt
0fdd90f7c654 Uploaded
davidvanzessen
parents: 63
diff changeset
49 #cat $PWD/files/*/6_* > $PWD/junction.txt
0fdd90f7c654 Uploaded
davidvanzessen
parents: 63
diff changeset
50 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
0fdd90f7c654 Uploaded
davidvanzessen
parents: 63
diff changeset
51 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt
0fdd90f7c654 Uploaded
davidvanzessen
parents: 63
diff changeset
52 #cat $PWD/files/*/10_* > $PWD/hotspots.txt
3
a0b27058dcac Uploaded
davidvanzessen
parents: 2
diff changeset
53
26
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
54 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
19
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
55
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
56 echo "${BLASTN_DIR}"
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
57
89
480fdd383fdb Uploaded
davidvanzessen
parents: 85
diff changeset
58 echo "---------------- identification ($method) ----------------"
480fdd383fdb Uploaded
davidvanzessen
parents: 85
diff changeset
59 echo "---------------- identification ($method) ----------------<br />" >> $output
55
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
60
19
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
61 if [[ "${method}" == "custom" ]] ; then
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
62 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
63 else
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
64 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
65 ID_index=$((ID_index+1))
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
66 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
67 sequence_index=$((sequence_index+1))
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
68
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
69 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
70
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
71 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
72 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
73 fi
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
74
55
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
75 echo "---------------- merge_and_filter.r ----------------"
63
a7381fd96dad Uploaded
davidvanzessen
parents: 62
diff changeset
76 echo "---------------- merge_and_filter.r ----------------<br />" >> $output
19
c518cf0d4adb Uploaded
davidvanzessen
parents: 16
diff changeset
77
90
f0e8dac22c6e Uploaded
davidvanzessen
parents: 89
diff changeset
78 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} 2>&1
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
79
98
5ffbf40cdd4b Uploaded
davidvanzessen
parents: 95
diff changeset
80 echo "---------------- creating new IMGT zip ----------------"
95
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
81 echo "---------------- creating new IMGT zip ----------------<br />" >> $output
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
82
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
83 mkdir $outdir/new_IMGT
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
84
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
85 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt"
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
86 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt"
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
87 cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt"
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
88 cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt"
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
89 cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt"
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
90 cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt"
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
91 cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt"
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
92 cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt"
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
93 cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt"
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
94 cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt"
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
95
99
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
96 mkdir $outdir/new_IMGT_ca
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
97 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
98
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
99 mkdir $outdir/new_IMGT_cg
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
100 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
101
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
102 mkdir $outdir/new_IMGT_cm
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
103 cp $outdir/new_IMGT/* $outdir/new_IMGT_cm
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
104
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
105 Rscript $dir/tmp/igat.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
106 Rscript $dir/tmp/igat.r $outdir/new_IMGT_ca/ $outdir/merged.txt "ca "2>&1
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
107 Rscript $dir/tmp/igat.r $outdir/new_IMGT_cg/ $outdir/merged.txt "cg "2>&1
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
108 Rscript $dir/tmp/igat.r $outdir/new_IMGT_cm/ $outdir/merged.txt "cm "2>&1
95
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
109
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
110
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
111 tmp="$PWD"
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
112 cd $outdir/new_IMGT/ #tar weirdness...
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
113 tar -cJf ../new_IMGT.txz *
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
114 cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT/IgAT.xlsm
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
115 zip -r ../IgAT.zip *
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
116
99
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
117 cd $outdir/new_IMGT_ca/
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
118 tar -cJf ../new_IMGT_ca.txz *
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
119 cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT_ca/IgAT.xlsm
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
120 zip -r ../IgAT_ca.zip *
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
121
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
122 cd $outdir/new_IMGT_cg/
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
123 tar -cJf ../new_IMGT_cg.txz *
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
124 cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT_cg/IgAT.xlsm
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
125 zip -r ../IgAT_cg.zip *
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
126
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
127 cd $outdir/new_IMGT_cm/
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
128 tar -cJf ../new_IMGT_cm.txz *
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
129 cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT_cm/IgAT.xlsm
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
130 zip -r ../IgAT_cm.zip *
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
131
95
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
132 cd $tmp
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
133
55
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
134 echo "---------------- mutation_analysis.r ----------------"
63
a7381fd96dad Uploaded
davidvanzessen
parents: 62
diff changeset
135 echo "---------------- mutation_analysis.r ----------------<br />" >> $output
55
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
136
82
564c4f6da203 Uploaded
davidvanzessen
parents: 81
diff changeset
137 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched"
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
138 echo "R mutation analysis"
82
564c4f6da203 Uploaded
davidvanzessen
parents: 81
diff changeset
139 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1
53
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
140
55
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
141
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
142 echo "---------------- mutation_analysis.py ----------------"
63
a7381fd96dad Uploaded
davidvanzessen
parents: 62
diff changeset
143 echo "---------------- mutation_analysis.py ----------------<br />" >> $output
55
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
144
82
564c4f6da203 Uploaded
davidvanzessen
parents: 81
diff changeset
145 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
55
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
146
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
147 echo "---------------- aa_histogram.r ----------------"
63
a7381fd96dad Uploaded
davidvanzessen
parents: 62
diff changeset
148 echo "---------------- aa_histogram.r ----------------<br />" >> $output
55
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
149
26
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
150 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
151
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
152 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
153
53
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
154 funcs=(sum mean median)
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
155
82
564c4f6da203 Uploaded
davidvanzessen
parents: 81
diff changeset
156 echo "---------------- sequence_overview.r ----------------"
95
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
157 echo "---------------- sequence_overview.r ----------------" >> $output
82
564c4f6da203 Uploaded
davidvanzessen
parents: 81
diff changeset
158
564c4f6da203 Uploaded
davidvanzessen
parents: 81
diff changeset
159 mkdir $outdir/sequence_overview
564c4f6da203 Uploaded
davidvanzessen
parents: 81
diff changeset
160
90
f0e8dac22c6e Uploaded
davidvanzessen
parents: 89
diff changeset
161 #Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
100
ff5be711382b Uploaded
davidvanzessen
parents: 99
diff changeset
162 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1
82
564c4f6da203 Uploaded
davidvanzessen
parents: 81
diff changeset
163
564c4f6da203 Uploaded
davidvanzessen
parents: 81
diff changeset
164 echo "<table border='1'>" > $outdir/base_overview.html
564c4f6da203 Uploaded
davidvanzessen
parents: 81
diff changeset
165
92
b869a126e2c4 Uploaded
davidvanzessen
parents: 90
diff changeset
166 while IFS=$'\t' read ID class seq A C G T
82
564c4f6da203 Uploaded
davidvanzessen
parents: 81
diff changeset
167 do
85
07f7da724a77 Uploaded
davidvanzessen
parents: 84
diff changeset
168 echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
82
564c4f6da203 Uploaded
davidvanzessen
parents: 81
diff changeset
169 done < $outdir/sequence_overview/ntoverview.txt
564c4f6da203 Uploaded
davidvanzessen
parents: 81
diff changeset
170
53
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
171
62
4262e880472d Uploaded
davidvanzessen
parents: 58
diff changeset
172 echo "<html><center><h1>$title</h1></center>" > $output
4262e880472d Uploaded
davidvanzessen
parents: 58
diff changeset
173
4262e880472d Uploaded
davidvanzessen
parents: 58
diff changeset
174 #display the matched/unmatched for clearity
4262e880472d Uploaded
davidvanzessen
parents: 58
diff changeset
175
98
5ffbf40cdd4b Uploaded
davidvanzessen
parents: 95
diff changeset
176 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`"
62
4262e880472d Uploaded
davidvanzessen
parents: 58
diff changeset
177 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
4262e880472d Uploaded
davidvanzessen
parents: 58
diff changeset
178 total_count=$((matched_count + unmatched_count))
4262e880472d Uploaded
davidvanzessen
parents: 58
diff changeset
179 perc_count=$((unmatched_count / total_count * 100))
4262e880472d Uploaded
davidvanzessen
parents: 58
diff changeset
180 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
4262e880472d Uploaded
davidvanzessen
parents: 58
diff changeset
181 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
4262e880472d Uploaded
davidvanzessen
parents: 58
diff changeset
182
4262e880472d Uploaded
davidvanzessen
parents: 58
diff changeset
183 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
4262e880472d Uploaded
davidvanzessen
parents: 58
diff changeset
184 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
4262e880472d Uploaded
davidvanzessen
parents: 58
diff changeset
185 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
4262e880472d Uploaded
davidvanzessen
parents: 58
diff changeset
186
55
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
187 echo "---------------- main tables ----------------"
53
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
188 for func in ${funcs[@]}
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
189 do
55
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
190
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
191 echo "---------------- $func table ----------------"
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
192
94
e39176ccddc8 Uploaded
davidvanzessen
parents: 92
diff changeset
193 cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt
53
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
194
98
5ffbf40cdd4b Uploaded
davidvanzessen
parents: 95
diff changeset
195 echo "<table border='1' width='100%'><caption><h3><a href='data_${func}.txt'>${func} table</a></h3></caption>" >> $output
58
8bb4d6009e08 Uploaded
davidvanzessen
parents: 55
diff changeset
196 echo "<tr><th>info</th>" >> $output
53
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
197 for gene in ${genes[@]}
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
198 do
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
199 tmp=`cat $outdir/${gene}_${func}_n.txt`
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
200 echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
201 done
78
b523ce95d857 Uploaded
davidvanzessen
parents: 77
diff changeset
202
b523ce95d857 Uploaded
davidvanzessen
parents: 77
diff changeset
203 tmp=`cat $outdir/unmatched_${func}_n.txt`
79
0513b46178c4 Uploaded
davidvanzessen
parents: 78
diff changeset
204 echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th>" >> $output
53
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
205 tmp=`cat $outdir/all_${func}_n.txt`
89
480fdd383fdb Uploaded
davidvanzessen
parents: 85
diff changeset
206 echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
207
78
b523ce95d857 Uploaded
davidvanzessen
parents: 77
diff changeset
208 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
53
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
209 do
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
210 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
211 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
212 else
78
b523ce95d857 Uploaded
davidvanzessen
parents: 77
diff changeset
213 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${unx}/${uny} (${unz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
53
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
214 fi
94
e39176ccddc8 Uploaded
davidvanzessen
parents: 92
diff changeset
215 done < $outdir/data_${func}.txt
e39176ccddc8 Uploaded
davidvanzessen
parents: 92
diff changeset
216 echo "</table>" >> $output
e39176ccddc8 Uploaded
davidvanzessen
parents: 92
diff changeset
217 #echo "<a href='data_${func}.txt'>Download data</a>" >> $output
53
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
218 done
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
219
55
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
220 echo "---------------- download links ----------------"
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
221
94
e39176ccddc8 Uploaded
davidvanzessen
parents: 92
diff changeset
222
53
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
223 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
224 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
225 echo "<a href='merged.txt'>all data</a><br />" >> $output
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
226 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
227 echo "<a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
228 echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output
77
c5c86d15cb94 Uploaded
davidvanzessen
parents: 76
diff changeset
229 echo "<a href='sequence_overview/index.html'>Sequence Overview</a><br />" >> $output
81
a778156dad3d Uploaded
davidvanzessen
parents: 80
diff changeset
230 echo "<a href='base_overview.html'>Base overview</a><br />" >> $output
95
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
231 echo "<a href='baseline.pdf'>Baseline PDF</a><br />" >> $output
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
232 echo "<a href='baseline.txt'>Baseline Table</a><br />" >> $output
99
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
233 echo "<a href='baseline_ca.pdf'>Baseline ca PDF</a><br />" >> $output
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
234 echo "<a href='baseline_ca.txt'>Baseline ca Table</a><br />" >> $output
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
235 echo "<a href='baseline_cg.pdf'>Baseline cg PDF</a><br />" >> $output
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
236 echo "<a href='baseline_cg.txt'>Baseline cg Table</a><br />" >> $output
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
237 echo "<a href='baseline_cm.pdf'>Baseline cm PDF</a><br />" >> $output
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
238 echo "<a href='baseline_cm.txt'>Baseline cm Table</a><br />" >> $output
95
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
239 echo "<a href='IgAT.zip'>IgAT zip</a><br />" >> $output
99
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
240 echo "<a href='IgAT_ca.zip'>IgAT ca zip</a><br />" >> $output
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
241 echo "<a href='IgAT_cg.zip'>IgAT cg zip</a><br />" >> $output
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
242 echo "<a href='IgAT_cm.zip'>IgAT cm zip</a><br />" >> $output
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
243 echo "<a href='new_IMGT.txz'>Filtered IMGT zip</a><br />" >> $output
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
244 echo "<a href='new_IMGT_ca.txz'>Filtered ca IMGT zip</a><br />" >> $output
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
245 echo "<a href='new_IMGT_cg.txz'>Filtered cg IMGT zip</a><br />" >> $output
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
246 echo "<a href='new_IMGT_cm.txz'>Filtered cm IMGT zip</a><br />" >> $output
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
247
2
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
248
55
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
249 echo "---------------- images ----------------"
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
250
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
251 echo "<img src='all.png'/><br />" >> $output
26
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
252 echo "<a href='all.txt'>download data</a><br />" >> $output
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
253 if [ -a $outdir/ca.png ]
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
254 then
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
255 echo "<img src='ca.png'/><br />" >> $output
26
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
256 echo "<a href='ca.txt'>download data</a><br />" >> $output
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
257 fi
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
258 if [ -a $outdir/cg.png ]
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
259 then
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
260 echo "<img src='cg.png'/><br />" >> $output
26
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
261 echo "<a href='cg.txt'>download data</a><br />" >> $output
4
069419cccba4 Uploaded
davidvanzessen
parents: 3
diff changeset
262 fi
22
d84c9791d8c4 Uploaded
davidvanzessen
parents: 21
diff changeset
263 if [ -a $outdir/scatter.png ]
d84c9791d8c4 Uploaded
davidvanzessen
parents: 21
diff changeset
264 then
d84c9791d8c4 Uploaded
davidvanzessen
parents: 21
diff changeset
265 echo "<img src='scatter.png'/><br />" >> $output
26
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
266 echo "<a href='scatter.txt'>download data</a><br />" >> $output
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
267 fi
49
5c6b9e99d576 Uploaded
davidvanzessen
parents: 47
diff changeset
268 if [ -a $outdir/frequency_ranges.png ]
5c6b9e99d576 Uploaded
davidvanzessen
parents: 47
diff changeset
269 then
5c6b9e99d576 Uploaded
davidvanzessen
parents: 47
diff changeset
270 echo "<img src='frequency_ranges.png'/><br />" >> $output
5c6b9e99d576 Uploaded
davidvanzessen
parents: 47
diff changeset
271 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
5c6b9e99d576 Uploaded
davidvanzessen
parents: 47
diff changeset
272 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
5c6b9e99d576 Uploaded
davidvanzessen
parents: 47
diff changeset
273 fi
26
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
274 if [ -a $outdir/aa_histogram.png ]
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
275 then
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
276 echo "<img src='aa_histogram.png'/><br />" >> $output
2433a1e110e1 Uploaded
davidvanzessen
parents: 25
diff changeset
277 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
22
d84c9791d8c4 Uploaded
davidvanzessen
parents: 21
diff changeset
278 fi
2
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
279
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
280 for gene in ${genes[@]}
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
281 do
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
282 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
283 while IFS=, read from a c g t
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
284 do
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
285 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
53
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
286 done < $outdir/transitions_${gene}_sum.txt
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
287 echo "</table>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
288 done
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
289
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
290 echo "<table border='1'><caption>All transition table</caption>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
291 while IFS=, read from a c g t
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
292 do
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
293 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
53
7290a88ea202 Uploaded
davidvanzessen
parents: 52
diff changeset
294 done < $outdir/transitions_all_sum.txt
0
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
295 echo "</table>" >> $output
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
296
74d2bc479bee Uploaded
davidvanzessen
parents:
diff changeset
297 echo "</html>" >> $output
2
2f4298673519 Uploaded
davidvanzessen
parents: 1
diff changeset
298
95
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
299 echo "---------------- baseline ----------------"
99
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
300 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
301 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt"
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
302 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt"
86206431cbb0 Uploaded
davidvanzessen
parents: 98
diff changeset
303 bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt"
47
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
304
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
305 #optional output for naive
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
306
82
564c4f6da203 Uploaded
davidvanzessen
parents: 81
diff changeset
307 echo "---------------- naive_output.r ----------------"
55
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
308
47
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
309 if [[ "$naive_output" != "None" ]]
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
310 then
55
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
311 echo "---------------- imgt_loader.r ----------------"
50
8ba6afa1247a Uploaded
davidvanzessen
parents: 49
diff changeset
312 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
80
a4c2ddeadec0 Uploaded
davidvanzessen
parents: 79
diff changeset
313 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1
95
a66eb1c5374c Uploaded
davidvanzessen
parents: 94
diff changeset
314
55
0d5add1a9800 Uploaded
davidvanzessen
parents: 54
diff changeset
315 echo "---------------- naive_output.r ----------------"
81
a778156dad3d Uploaded
davidvanzessen
parents: 80
diff changeset
316 Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1
47
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
317 fi
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
318
81
a778156dad3d Uploaded
davidvanzessen
parents: 80
diff changeset
319 echo "</table>" >> $outdir/base_overview.html
a778156dad3d Uploaded
davidvanzessen
parents: 80
diff changeset
320
a778156dad3d Uploaded
davidvanzessen
parents: 80
diff changeset
321 echo "---------------- Done! ----------------"
47
099cc1254f74 Uploaded
davidvanzessen
parents: 43
diff changeset
322