| 
0
 | 
     1 #!/bin/bash
 | 
| 
4
 | 
     2 set -e
 | 
| 
0
 | 
     3 dir="$(cd "$(dirname "$0")" && pwd)"
 | 
| 
 | 
     4 input=$1
 | 
| 
19
 | 
     5 method=$2
 | 
| 
 | 
     6 output=$3
 | 
| 
 | 
     7 outdir=$4
 | 
| 
 | 
     8 title=$5
 | 
| 
22
 | 
     9 include_fr1=$6
 | 
| 
34
 | 
    10 functionality=$7
 | 
| 
 | 
    11 unique=$8
 | 
| 
69
 | 
    12 naive_output_ca=$9
 | 
| 
 | 
    13 naive_output_cg=${10}
 | 
| 
 | 
    14 naive_output_cm=${11}
 | 
| 
 | 
    15 filter_unique=${12}
 | 
| 
 | 
    16 class_filter=${13}
 | 
| 
0
 | 
    17 mkdir $outdir
 | 
| 
 | 
    18 
 | 
| 
55
 | 
    19 echo "---------------- read parameters ----------------"
 | 
| 
63
 | 
    20 echo "---------------- read parameters ----------------<br />" > $output
 | 
| 
55
 | 
    21 
 | 
| 
 | 
    22 echo "unpacking IMGT file"
 | 
| 
 | 
    23 
 | 
| 
35
 | 
    24 type="`file $input`"
 | 
| 
 | 
    25 if [[ "$type" == *"Zip archive"* ]] ; then
 | 
| 
 | 
    26 	echo "Zip archive"
 | 
| 
 | 
    27 	echo "unzip $input -d $PWD/files/"
 | 
| 
 | 
    28 	unzip $input -d $PWD/files/
 | 
| 
 | 
    29 elif [[ "$type" == *"XZ compressed data"* ]] ; then
 | 
| 
 | 
    30 	echo "ZX archive"
 | 
| 
 | 
    31 	echo "tar -xJf $input -C $PWD/files/"
 | 
| 
 | 
    32 	mkdir -p $PWD/files/$title
 | 
| 
 | 
    33 	tar -xJf $input -C $PWD/files/$title
 | 
| 
 | 
    34 fi
 | 
| 
 | 
    35 
 | 
| 
64
 | 
    36 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt
 | 
| 
 | 
    37 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt
 | 
| 
 | 
    38 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt
 | 
| 
 | 
    39 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt
 | 
| 
 | 
    40 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt
 | 
| 
 | 
    41 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt
 | 
| 
 | 
    42 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt
 | 
| 
 | 
    43 
 | 
| 
 | 
    44 
 | 
| 
 | 
    45 
 | 
| 
 | 
    46 #cat $PWD/files/*/1_* > $PWD/summary.txt
 | 
| 
 | 
    47 #cat $PWD/files/*/3_* > $PWD/sequences.txt
 | 
| 
 | 
    48 #cat $PWD/files/*/5_* > $PWD/aa.txt
 | 
| 
 | 
    49 #cat $PWD/files/*/6_* > $PWD/junction.txt
 | 
| 
 | 
    50 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt
 | 
| 
 | 
    51 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt
 | 
| 
 | 
    52 #cat $PWD/files/*/10_* > $PWD/hotspots.txt
 | 
| 
3
 | 
    53 
 | 
| 
26
 | 
    54 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin"
 | 
| 
19
 | 
    55 
 | 
| 
 | 
    56 echo "${BLASTN_DIR}"
 | 
| 
 | 
    57 
 | 
| 
 | 
    58 echo "identification ($method)"
 | 
| 
63
 | 
    59 echo "identification ($method)<br />" >> $output
 | 
| 
19
 | 
    60 
 | 
| 
55
 | 
    61 echo "blast or custom"
 | 
| 
 | 
    62 
 | 
| 
19
 | 
    63 if [[ "${method}" == "custom" ]] ; then
 | 
| 
55
 | 
    64 	echo "custom"
 | 
| 
19
 | 
    65 	python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt
 | 
| 
 | 
    66 else
 | 
| 
55
 | 
    67 	echo "blast"
 | 
| 
19
 | 
    68 	ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l)
 | 
| 
 | 
    69 	ID_index=$((ID_index+1))
 | 
| 
 | 
    70 	sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l)
 | 
| 
 | 
    71 	sequence_index=$((sequence_index+1))
 | 
| 
 | 
    72 	
 | 
| 
 | 
    73 	echo "$ID_index ${sequence_index}"
 | 
| 
 | 
    74 	
 | 
| 
 | 
    75 	cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta
 | 
| 
 | 
    76 	
 | 
| 
 | 
    77 	echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt
 | 
| 
 | 
    78 	${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt
 | 
| 
 | 
    79 fi
 | 
| 
 | 
    80 
 | 
| 
55
 | 
    81 echo "---------------- merge_and_filter.r ----------------"
 | 
| 
63
 | 
    82 echo "---------------- merge_and_filter.r ----------------<br />" >> $output
 | 
| 
19
 | 
    83 
 | 
| 
66
 | 
    84 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter}
 | 
| 
0
 | 
    85 
 | 
| 
55
 | 
    86 echo "---------------- mutation_analysis.r ----------------"
 | 
| 
63
 | 
    87 echo "---------------- mutation_analysis.r ----------------<br />" >> $output
 | 
| 
55
 | 
    88 
 | 
| 
78
 | 
    89 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched"
 | 
| 
4
 | 
    90 echo "R mutation analysis"
 | 
| 
22
 | 
    91 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1
 | 
| 
53
 | 
    92 
 | 
| 
 | 
    93 #echo "." > $output
 | 
| 
 | 
    94 #exit 0
 | 
| 
 | 
    95 
 | 
| 
55
 | 
    96 
 | 
| 
 | 
    97 
 | 
| 
 | 
    98 echo "---------------- mutation_analysis.py ----------------"
 | 
| 
63
 | 
    99 echo "---------------- mutation_analysis.py ----------------<br />" >> $output
 | 
| 
55
 | 
   100 
 | 
| 
32
 | 
   101 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt
 | 
| 
55
 | 
   102 
 | 
| 
 | 
   103 echo "---------------- aa_histogram.r ----------------"
 | 
| 
63
 | 
   104 echo "---------------- aa_histogram.r ----------------<br />" >> $output
 | 
| 
55
 | 
   105 
 | 
| 
26
 | 
   106 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1
 | 
| 
4
 | 
   107 
 | 
| 
0
 | 
   108 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm)
 | 
| 
 | 
   109 
 | 
| 
53
 | 
   110 funcs=(sum mean median)
 | 
| 
0
 | 
   111 
 | 
| 
53
 | 
   112 
 | 
| 
62
 | 
   113 echo "<html><center><h1>$title</h1></center>" > $output
 | 
| 
 | 
   114 
 | 
| 
 | 
   115 #display the matched/unmatched for clearity
 | 
| 
 | 
   116 
 | 
| 
 | 
   117 matched_count="`cat $outdir/merged.txt | tail -n +2 | wc -l`"
 | 
| 
 | 
   118 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`"
 | 
| 
 | 
   119 total_count=$((matched_count + unmatched_count))
 | 
| 
 | 
   120 perc_count=$((unmatched_count / total_count * 100))
 | 
| 
 | 
   121 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"`
 | 
| 
 | 
   122 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"`
 | 
| 
 | 
   123 
 | 
| 
 | 
   124 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output
 | 
| 
 | 
   125 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output
 | 
| 
 | 
   126 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output
 | 
| 
 | 
   127 
 | 
| 
55
 | 
   128 echo "---------------- main tables ----------------"
 | 
| 
53
 | 
   129 for func in ${funcs[@]}
 | 
| 
4
 | 
   130 do
 | 
| 
55
 | 
   131 
 | 
| 
 | 
   132 	echo "---------------- $func table ----------------"
 | 
| 
 | 
   133 	
 | 
| 
53
 | 
   134 	cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/result.txt
 | 
| 
 | 
   135 
 | 
| 
 | 
   136 	echo "<table border='1' width='100%'><caption><h3>${func} table</h3></caption>" >> $output
 | 
| 
58
 | 
   137 	echo "<tr><th>info</th>" >> $output
 | 
| 
53
 | 
   138 	for gene in ${genes[@]}
 | 
| 
 | 
   139 	do
 | 
| 
 | 
   140 		tmp=`cat $outdir/${gene}_${func}_n.txt`
 | 
| 
 | 
   141 		echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
 | 
| 
 | 
   142 	done
 | 
| 
78
 | 
   143 	
 | 
| 
 | 
   144 	tmp=`cat $outdir/unmatched_${func}_n.txt`
 | 
| 
79
 | 
   145 	echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th>" >> $output
 | 
| 
53
 | 
   146 	tmp=`cat $outdir/all_${func}_n.txt`
 | 
| 
78
 | 
   147 	echo "<th><a href='matched_${func}_all.txt'>all (N = $tmp)</a></th>" >> $output
 | 
| 
4
 | 
   148 
 | 
| 
78
 | 
   149 	while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
 | 
| 
53
 | 
   150 	do
 | 
| 
 | 
   151 		if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
 | 
| 
 | 
   152 			echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
 | 
| 
 | 
   153 		else
 | 
| 
78
 | 
   154 			echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${unx}/${uny} (${unz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
 | 
| 
53
 | 
   155 		fi
 | 
| 
 | 
   156 	done < $outdir/result.txt
 | 
| 
 | 
   157 
 | 
| 
 | 
   158 done
 | 
| 
 | 
   159 
 | 
| 
55
 | 
   160 echo "---------------- download links ----------------"
 | 
| 
 | 
   161 
 | 
| 
0
 | 
   162 echo "</table>" >> $output
 | 
| 
53
 | 
   163 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
 | 
| 
 | 
   164 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
 | 
| 
 | 
   165 echo "<a href='merged.txt'>all data</a><br />" >> $output
 | 
| 
 | 
   166 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output
 | 
| 
 | 
   167 echo "<a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output
 | 
| 
 | 
   168 echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output
 | 
| 
77
 | 
   169 echo "<a href='sequence_overview/index.html'>Sequence Overview</a><br />" >> $output
 | 
| 
81
 | 
   170 echo "<a href='base_overview.html'>Base overview</a><br />" >> $output
 | 
| 
2
 | 
   171 
 | 
| 
55
 | 
   172 echo "---------------- images ----------------"
 | 
| 
 | 
   173 
 | 
| 
4
 | 
   174 echo "<img src='all.png'/><br />" >> $output
 | 
| 
26
 | 
   175 echo "<a href='all.txt'>download data</a><br />" >> $output
 | 
| 
4
 | 
   176 if [ -a $outdir/ca.png ] 
 | 
| 
 | 
   177 then
 | 
| 
 | 
   178 	echo "<img src='ca.png'/><br />" >> $output
 | 
| 
26
 | 
   179 	echo "<a href='ca.txt'>download data</a><br />" >> $output
 | 
| 
4
 | 
   180 fi
 | 
| 
 | 
   181 if [ -a $outdir/cg.png ]
 | 
| 
 | 
   182 then
 | 
| 
 | 
   183 	echo "<img src='cg.png'/><br />" >> $output
 | 
| 
26
 | 
   184 	echo "<a href='cg.txt'>download data</a><br />" >> $output
 | 
| 
4
 | 
   185 fi
 | 
| 
22
 | 
   186 if [ -a $outdir/scatter.png ]
 | 
| 
 | 
   187 then
 | 
| 
 | 
   188 	echo "<img src='scatter.png'/><br />" >> $output
 | 
| 
26
 | 
   189 	echo "<a href='scatter.txt'>download data</a><br />" >> $output
 | 
| 
 | 
   190 fi
 | 
| 
49
 | 
   191 if [ -a $outdir/frequency_ranges.png ]
 | 
| 
 | 
   192 then
 | 
| 
 | 
   193 	echo "<img src='frequency_ranges.png'/><br />" >> $output
 | 
| 
 | 
   194 	echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output
 | 
| 
 | 
   195 	echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output
 | 
| 
 | 
   196 fi
 | 
| 
26
 | 
   197 if [ -a $outdir/aa_histogram.png ]
 | 
| 
 | 
   198 then
 | 
| 
 | 
   199 	echo "<img src='aa_histogram.png'/><br />" >> $output
 | 
| 
 | 
   200 	echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output
 | 
| 
22
 | 
   201 fi
 | 
| 
2
 | 
   202 
 | 
| 
0
 | 
   203 for gene in ${genes[@]}
 | 
| 
 | 
   204 do
 | 
| 
 | 
   205 	echo "<table border='1'><caption>$gene transition table</caption>" >> $output
 | 
| 
 | 
   206 	while IFS=, read from a c g t
 | 
| 
 | 
   207 		do
 | 
| 
 | 
   208 			echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
 | 
| 
53
 | 
   209 	done < $outdir/transitions_${gene}_sum.txt
 | 
| 
0
 | 
   210 	echo "</table>" >> $output
 | 
| 
 | 
   211 done
 | 
| 
 | 
   212 
 | 
| 
 | 
   213 echo "<table border='1'><caption>All transition table</caption>" >> $output
 | 
| 
 | 
   214 while IFS=, read from a c g t
 | 
| 
 | 
   215 	do
 | 
| 
 | 
   216 		echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
 | 
| 
53
 | 
   217 done < $outdir/transitions_all_sum.txt
 | 
| 
0
 | 
   218 echo "</table>" >> $output
 | 
| 
 | 
   219 
 | 
| 
 | 
   220 echo "</html>" >> $output
 | 
| 
2
 | 
   221 
 | 
| 
47
 | 
   222 
 | 
| 
 | 
   223 #optional output for naive
 | 
| 
 | 
   224 
 | 
| 
55
 | 
   225 echo "---------------- aa_histogram.r ----------------"
 | 
| 
 | 
   226 
 | 
| 
47
 | 
   227 if [[ "$naive_output" != "None" ]]
 | 
| 
 | 
   228 then
 | 
| 
55
 | 
   229 	echo "---------------- imgt_loader.r ----------------"
 | 
| 
50
 | 
   230 	#python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
 | 
| 
80
 | 
   231 	Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1
 | 
| 
55
 | 
   232 	echo "---------------- naive_output.r ----------------"
 | 
| 
81
 | 
   233 	Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1
 | 
| 
47
 | 
   234 fi
 | 
| 
 | 
   235 
 | 
| 
76
 | 
   236 echo "---------------- sequence_overview.r ----------------"
 | 
| 
47
 | 
   237 
 | 
| 
76
 | 
   238 mkdir $outdir/sequence_overview
 | 
| 
 | 
   239 
 | 
| 
 | 
   240 Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/sequence_overview 2>&1
 | 
| 
47
 | 
   241 
 | 
| 
81
 | 
   242 echo "<table border='1'>" > $outdir/base_overview.html
 | 
| 
47
 | 
   243 
 | 
| 
81
 | 
   244 while read ID class seq A C G T
 | 
| 
 | 
   245 do
 | 
| 
 | 
   246 	echo "<tr><td>$ID</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html
 | 
| 
 | 
   247 done < $outdir/sequence_overview/ntoverview.txt
 | 
| 
 | 
   248 
 | 
| 
 | 
   249 echo "</table>" >> $outdir/base_overview.html
 | 
| 
 | 
   250 
 | 
| 
 | 
   251 echo "---------------- Done! ----------------"
 | 
| 
47
 | 
   252 
 | 
| 
 | 
   253 
 | 
| 
 | 
   254 
 | 
| 
 | 
   255 
 | 
| 
 | 
   256 
 | 
| 
 | 
   257 
 | 
| 
 | 
   258 
 | 
| 
2
 | 
   259 #rm $outdir/HS12RSS.txt
 | 
| 
 | 
   260 #rm $outdir/HS23RSS.txt
 |