| 0 | 1 #!/bin/bash | 
| 4 | 2 set -e | 
| 0 | 3 dir="$(cd "$(dirname "$0")" && pwd)" | 
|  | 4 input=$1 | 
| 19 | 5 method=$2 | 
|  | 6 output=$3 | 
|  | 7 outdir=$4 | 
|  | 8 title=$5 | 
| 22 | 9 include_fr1=$6 | 
| 34 | 10 functionality=$7 | 
|  | 11 unique=$8 | 
| 47 | 12 naive_output=$9 | 
| 52 | 13 filter_unique=${10} | 
| 0 | 14 mkdir $outdir | 
|  | 15 | 
| 35 | 16 type="`file $input`" | 
|  | 17 if [[ "$type" == *"Zip archive"* ]] ; then | 
|  | 18 	echo "Zip archive" | 
|  | 19 	echo "unzip $input -d $PWD/files/" | 
|  | 20 	unzip $input -d $PWD/files/ | 
|  | 21 elif [[ "$type" == *"XZ compressed data"* ]] ; then | 
|  | 22 	echo "ZX archive" | 
|  | 23 	echo "tar -xJf $input -C $PWD/files/" | 
|  | 24 	mkdir -p $PWD/files/$title | 
|  | 25 	tar -xJf $input -C $PWD/files/$title | 
|  | 26 fi | 
|  | 27 | 
| 0 | 28 cat $PWD/files/*/1_* > $PWD/summary.txt | 
| 41 | 29 cat $PWD/files/*/3_* > $PWD/sequences.txt | 
| 47 | 30 cat $PWD/files/*/5_* > $PWD/aa.txt | 
| 39 | 31 cat $PWD/files/*/6_* > $PWD/junction.txt | 
| 0 | 32 cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt | 
|  | 33 cat $PWD/files/*/8_* > $PWD/mutationstats.txt | 
|  | 34 cat $PWD/files/*/10_* > $PWD/hotspots.txt | 
| 3 | 35 | 
| 26 | 36 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin" | 
| 19 | 37 | 
|  | 38 echo "${BLASTN_DIR}" | 
|  | 39 | 
|  | 40 echo "identification ($method)" | 
|  | 41 | 
|  | 42 if [[ "${method}" == "custom" ]] ; then | 
|  | 43 	python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt | 
|  | 44 else | 
|  | 45 	ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) | 
|  | 46 	ID_index=$((ID_index+1)) | 
|  | 47 	sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) | 
|  | 48 	sequence_index=$((sequence_index+1)) | 
|  | 49 | 
|  | 50 	echo "$ID_index ${sequence_index}" | 
|  | 51 | 
|  | 52 	cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta | 
|  | 53 | 
|  | 54 	echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt | 
|  | 55 	${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt | 
|  | 56 fi | 
|  | 57 | 
|  | 58 | 
|  | 59 | 
| 4 | 60 echo "merging" | 
| 52 | 61 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} | 
| 0 | 62 | 
| 4 | 63 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm" | 
|  | 64 echo "R mutation analysis" | 
| 22 | 65 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1 | 
| 4 | 66 echo "python mutation analysis" | 
| 32 | 67 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt | 
| 26 | 68 echo "R AA histogram" | 
|  | 69 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1 | 
| 4 | 70 cat $outdir/mutations.txt $outdir/hotspot_analysis.txt > $outdir/result.txt | 
|  | 71 | 
| 0 | 72 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) | 
|  | 73 | 
|  | 74 | 
| 1 | 75 echo "<html><center><h1>$title</h1></center><table border='1'>" > $output | 
| 0 | 76 echo "<tr><th>info</th>" >> $output | 
| 4 | 77 for gene in ${genes[@]} | 
|  | 78 do | 
|  | 79 	tmp=`cat $outdir/${gene}_n.txt` | 
|  | 80 	echo "<th><a href='matched_${gene}.txt'>${gene} (N = $tmp)</a></th>" >> $output | 
|  | 81 done | 
|  | 82 tmp=`cat $outdir/total_n.txt` | 
|  | 83 echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output | 
|  | 84 | 
| 0 | 85 while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz | 
| 25 | 86 do | 
|  | 87 	if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh | 
|  | 88 		echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output | 
|  | 89 	else | 
| 0 | 90 		echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output | 
| 25 | 91 	fi | 
| 4 | 92 done < $outdir/result.txt | 
| 0 | 93 echo "</table>" >> $output | 
| 49 | 94 echo "<a href='unmatched.txt'>unmatched</a><br /><a href='motif_per_seq.txt'>motif per sequence</a><br /><a href='merged.txt'>all data</a><br /><a href='mutation_by_id.txt'>mutations by id</a><br /><a href='aa_id_mutations.txt'>AA mutations location by id</a><br /><a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output | 
| 2 | 95 | 
|  | 96 | 
| 4 | 97 echo "<img src='all.png'/><br />" >> $output | 
| 26 | 98 echo "<a href='all.txt'>download data</a><br />" >> $output | 
| 4 | 99 if [ -a $outdir/ca.png ] | 
|  | 100 then | 
|  | 101 	echo "<img src='ca.png'/><br />" >> $output | 
| 26 | 102 	echo "<a href='ca.txt'>download data</a><br />" >> $output | 
| 4 | 103 fi | 
|  | 104 if [ -a $outdir/cg.png ] | 
|  | 105 then | 
|  | 106 	echo "<img src='cg.png'/><br />" >> $output | 
| 26 | 107 	echo "<a href='cg.txt'>download data</a><br />" >> $output | 
| 4 | 108 fi | 
| 22 | 109 if [ -a $outdir/scatter.png ] | 
|  | 110 then | 
|  | 111 	echo "<img src='scatter.png'/><br />" >> $output | 
| 26 | 112 	echo "<a href='scatter.txt'>download data</a><br />" >> $output | 
|  | 113 fi | 
| 49 | 114 if [ -a $outdir/frequency_ranges.png ] | 
|  | 115 then | 
|  | 116 	echo "<img src='frequency_ranges.png'/><br />" >> $output | 
|  | 117 	echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output | 
|  | 118 	echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output | 
|  | 119 fi | 
| 26 | 120 if [ -a $outdir/aa_histogram.png ] | 
|  | 121 then | 
|  | 122 	echo "<img src='aa_histogram.png'/><br />" >> $output | 
|  | 123 	echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output | 
| 22 | 124 fi | 
| 2 | 125 | 
| 0 | 126 for gene in ${genes[@]} | 
|  | 127 do | 
|  | 128 	echo "<table border='1'><caption>$gene transition table</caption>" >> $output | 
|  | 129 	while IFS=, read from a c g t | 
|  | 130 		do | 
|  | 131 			echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | 
| 4 | 132 	done < $outdir/transitions_${gene}.txt | 
| 0 | 133 	echo "</table>" >> $output | 
|  | 134 done | 
|  | 135 | 
|  | 136 echo "<table border='1'><caption>All transition table</caption>" >> $output | 
|  | 137 while IFS=, read from a c g t | 
|  | 138 	do | 
|  | 139 		echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | 
|  | 140 done < $outdir/transitions.txt | 
|  | 141 echo "</table>" >> $output | 
|  | 142 | 
|  | 143 echo "</html>" >> $output | 
| 2 | 144 | 
| 47 | 145 | 
|  | 146 #optional output for naive | 
|  | 147 | 
|  | 148 if [[ "$naive_output" != "None" ]] | 
|  | 149 then | 
|  | 150 	echo "naive_output: $naive_output" | 
| 50 | 151 	#python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output | 
|  | 152 	Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt ${naive_output} 2>&1 | 
| 47 | 153 	Rscript $dir/naive_output.r $naive_output $outdir/merged.txt $naive_output 2>&1 | 
|  | 154 fi | 
|  | 155 | 
|  | 156 | 
|  | 157 | 
|  | 158 | 
|  | 159 | 
|  | 160 | 
|  | 161 | 
|  | 162 | 
|  | 163 | 
|  | 164 | 
|  | 165 | 
| 2 | 166 #rm $outdir/HS12RSS.txt | 
|  | 167 #rm $outdir/HS23RSS.txt |