0
|
1 #!/bin/bash
|
4
|
2 set -e
|
0
|
3 dir="$(cd "$(dirname "$0")" && pwd)"
|
|
4 input=$1
|
19
|
5 method=$2
|
|
6 output=$3
|
|
7 outdir=$4
|
|
8 title=$5
|
22
|
9 include_fr1=$6
|
34
|
10 functionality=$7
|
|
11 unique=$8
|
47
|
12 naive_output=$9
|
0
|
13 mkdir $outdir
|
|
14
|
35
|
15 type="`file $input`"
|
|
16 if [[ "$type" == *"Zip archive"* ]] ; then
|
|
17 echo "Zip archive"
|
|
18 echo "unzip $input -d $PWD/files/"
|
|
19 unzip $input -d $PWD/files/
|
|
20 elif [[ "$type" == *"XZ compressed data"* ]] ; then
|
|
21 echo "ZX archive"
|
|
22 echo "tar -xJf $input -C $PWD/files/"
|
|
23 mkdir -p $PWD/files/$title
|
|
24 tar -xJf $input -C $PWD/files/$title
|
|
25 fi
|
|
26
|
0
|
27 cat $PWD/files/*/1_* > $PWD/summary.txt
|
41
|
28 cat $PWD/files/*/3_* > $PWD/sequences.txt
|
47
|
29 cat $PWD/files/*/5_* > $PWD/aa.txt
|
39
|
30 cat $PWD/files/*/6_* > $PWD/junction.txt
|
0
|
31 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
|
|
32 cat $PWD/files/*/8_* > $PWD/mutationstats.txt
|
|
33 cat $PWD/files/*/10_* > $PWD/hotspots.txt
|
3
|
34
|
26
|
35 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
|
19
|
36
|
|
37 echo "${BLASTN_DIR}"
|
|
38
|
|
39 echo "identification ($method)"
|
|
40
|
|
41 if [[ "${method}" == "custom" ]] ; then
|
|
42 python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
|
|
43 else
|
|
44 ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
|
|
45 ID_index=$((ID_index+1))
|
|
46 sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
|
|
47 sequence_index=$((sequence_index+1))
|
|
48
|
|
49 echo "$ID_index ${sequence_index}"
|
|
50
|
|
51 cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
|
|
52
|
|
53 echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
|
|
54 ${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
|
|
55 fi
|
|
56
|
|
57
|
|
58
|
4
|
59 echo "merging"
|
41
|
60 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique
|
0
|
61
|
4
|
62 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm"
|
|
63 echo "R mutation analysis"
|
22
|
64 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
|
4
|
65 echo "python mutation analysis"
|
32
|
66 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
|
26
|
67 echo "R AA histogram"
|
|
68 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
|
4
|
69 cat $outdir/mutations.txt $outdir/hotspot_analysis.txt > $outdir/result.txt
|
|
70
|
0
|
71 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
|
|
72
|
|
73
|
1
|
74 echo "<html><center><h1>$title</h1></center><table border='1'>" > $output
|
0
|
75 echo "<tr><th>info</th>" >> $output
|
4
|
76 for gene in ${genes[@]}
|
|
77 do
|
|
78 tmp=`cat $outdir/${gene}_n.txt`
|
|
79 echo "<th><a href='matched_${gene}.txt'>${gene} (N = $tmp)</a></th>" >> $output
|
|
80 done
|
|
81 tmp=`cat $outdir/total_n.txt`
|
|
82 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output
|
|
83
|
0
|
84 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz
|
25
|
85 do
|
|
86 if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
|
|
87 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
|
|
88 else
|
0
|
89 echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
|
25
|
90 fi
|
4
|
91 done < $outdir/result.txt
|
0
|
92 echo "</table>" >> $output
|
49
|
93 echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br /><a href='merged.txt'>all data</a><br /><a href='mutation_by_id.txt'>mutations by id</a><br /><a href='aa_id_mutations.txt'>AA mutations location by id</a><br /><a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output
|
2
|
94
|
|
95
|
4
|
96 echo "<img src='all.png'/><br />" >> $output
|
26
|
97 echo "<a href='all.txt'>download data</a><br />" >> $output
|
4
|
98 if [ -a $outdir/ca.png ]
|
|
99 then
|
|
100 echo "<img src='ca.png'/><br />" >> $output
|
26
|
101 echo "<a href='ca.txt'>download data</a><br />" >> $output
|
4
|
102 fi
|
|
103 if [ -a $outdir/cg.png ]
|
|
104 then
|
|
105 echo "<img src='cg.png'/><br />" >> $output
|
26
|
106 echo "<a href='cg.txt'>download data</a><br />" >> $output
|
4
|
107 fi
|
22
|
108 if [ -a $outdir/scatter.png ]
|
|
109 then
|
|
110 echo "<img src='scatter.png'/><br />" >> $output
|
26
|
111 echo "<a href='scatter.txt'>download data</a><br />" >> $output
|
|
112 fi
|
49
|
113 if [ -a $outdir/frequency_ranges.png ]
|
|
114 then
|
|
115 echo "<img src='frequency_ranges.png'/><br />" >> $output
|
|
116 echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
|
|
117 echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
|
|
118 fi
|
26
|
119 if [ -a $outdir/aa_histogram.png ]
|
|
120 then
|
|
121 echo "<img src='aa_histogram.png'/><br />" >> $output
|
|
122 echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
|
22
|
123 fi
|
2
|
124
|
0
|
125 for gene in ${genes[@]}
|
|
126 do
|
|
127 echo "<table border='1'><caption>$gene transition table</caption>" >> $output
|
|
128 while IFS=, read from a c g t
|
|
129 do
|
|
130 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
4
|
131 done < $outdir/transitions_${gene}.txt
|
0
|
132 echo "</table>" >> $output
|
|
133 done
|
|
134
|
|
135 echo "<table border='1'><caption>All transition table</caption>" >> $output
|
|
136 while IFS=, read from a c g t
|
|
137 do
|
|
138 echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
|
|
139 done < $outdir/transitions.txt
|
|
140 echo "</table>" >> $output
|
|
141
|
|
142 echo "</html>" >> $output
|
2
|
143
|
47
|
144
|
|
145 #optional output for naive
|
|
146
|
|
147 if [[ "$naive_output" != "None" ]]
|
|
148 then
|
|
149 echo "naive_output: $naive_output"
|
50
|
150 #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
|
|
151 Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt ${naive_output} 2>&1
|
47
|
152 Rscript $dir/naive_output.r $naive_output $outdir/merged.txt $naive_output 2>&1
|
|
153 fi
|
|
154
|
|
155
|
|
156
|
|
157
|
|
158
|
|
159
|
|
160
|
|
161
|
|
162
|
|
163
|
|
164
|
2
|
165 #rm $outdir/HS12RSS.txt
|
|
166 #rm $outdir/HS23RSS.txt
|