| 0 | 1 #!/bin/bash | 
| 4 | 2 set -e | 
| 0 | 3 dir="$(cd "$(dirname "$0")" && pwd)" | 
|  | 4 input=$1 | 
| 19 | 5 method=$2 | 
|  | 6 output=$3 | 
|  | 7 outdir=$4 | 
|  | 8 title=$5 | 
| 22 | 9 include_fr1=$6 | 
| 34 | 10 functionality=$7 | 
|  | 11 unique=$8 | 
| 69 | 12 naive_output_ca=$9 | 
|  | 13 naive_output_cg=${10} | 
|  | 14 naive_output_cm=${11} | 
|  | 15 filter_unique=${12} | 
|  | 16 class_filter=${13} | 
| 0 | 17 mkdir $outdir | 
|  | 18 | 
| 55 | 19 echo "---------------- read parameters ----------------" | 
| 63 | 20 echo "---------------- read parameters ----------------<br />" > $output | 
| 55 | 21 | 
|  | 22 echo "unpacking IMGT file" | 
|  | 23 | 
| 35 | 24 type="`file $input`" | 
|  | 25 if [[ "$type" == *"Zip archive"* ]] ; then | 
|  | 26 	echo "Zip archive" | 
|  | 27 	echo "unzip $input -d $PWD/files/" | 
|  | 28 	unzip $input -d $PWD/files/ | 
|  | 29 elif [[ "$type" == *"XZ compressed data"* ]] ; then | 
|  | 30 	echo "ZX archive" | 
|  | 31 	echo "tar -xJf $input -C $PWD/files/" | 
|  | 32 	mkdir -p $PWD/files/$title | 
|  | 33 	tar -xJf $input -C $PWD/files/$title | 
|  | 34 fi | 
|  | 35 | 
| 64 | 36 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt | 
|  | 37 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt | 
|  | 38 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt | 
|  | 39 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt | 
|  | 40 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt | 
|  | 41 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt | 
|  | 42 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt | 
|  | 43 | 
|  | 44 | 
|  | 45 | 
|  | 46 #cat $PWD/files/*/1_* > $PWD/summary.txt | 
|  | 47 #cat $PWD/files/*/3_* > $PWD/sequences.txt | 
|  | 48 #cat $PWD/files/*/5_* > $PWD/aa.txt | 
|  | 49 #cat $PWD/files/*/6_* > $PWD/junction.txt | 
|  | 50 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt | 
|  | 51 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt | 
|  | 52 #cat $PWD/files/*/10_* > $PWD/hotspots.txt | 
| 3 | 53 | 
| 26 | 54 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin" | 
| 19 | 55 | 
|  | 56 echo "${BLASTN_DIR}" | 
|  | 57 | 
|  | 58 echo "identification ($method)" | 
| 63 | 59 echo "identification ($method)<br />" >> $output | 
| 19 | 60 | 
| 55 | 61 echo "blast or custom" | 
|  | 62 | 
| 19 | 63 if [[ "${method}" == "custom" ]] ; then | 
| 55 | 64 	echo "custom" | 
| 19 | 65 	python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt | 
|  | 66 else | 
| 55 | 67 	echo "blast" | 
| 19 | 68 	ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) | 
|  | 69 	ID_index=$((ID_index+1)) | 
|  | 70 	sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) | 
|  | 71 	sequence_index=$((sequence_index+1)) | 
|  | 72 | 
|  | 73 	echo "$ID_index ${sequence_index}" | 
|  | 74 | 
|  | 75 	cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.fasta | 
|  | 76 | 
|  | 77 	echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt | 
|  | 78 	${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt | 
|  | 79 fi | 
|  | 80 | 
| 55 | 81 echo "---------------- merge_and_filter.r ----------------" | 
| 63 | 82 echo "---------------- merge_and_filter.r ----------------<br />" >> $output | 
| 19 | 83 | 
| 66 | 84 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} | 
| 0 | 85 | 
| 55 | 86 echo "---------------- mutation_analysis.r ----------------" | 
| 63 | 87 echo "---------------- mutation_analysis.r ----------------<br />" >> $output | 
| 55 | 88 | 
| 4 | 89 genes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm" | 
|  | 90 echo "R mutation analysis" | 
| 22 | 91 Rscript $dir/mutation_analysis.r $outdir/merged.txt $genes $outdir ${include_fr1} 2>&1 | 
| 53 | 92 | 
|  | 93 #echo "." > $output | 
|  | 94 #exit 0 | 
|  | 95 | 
| 55 | 96 | 
|  | 97 | 
|  | 98 echo "---------------- mutation_analysis.py ----------------" | 
| 63 | 99 echo "---------------- mutation_analysis.py ----------------<br />" >> $output | 
| 55 | 100 | 
| 32 | 101 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $genes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt | 
| 26 | 102 echo "R AA histogram" | 
| 55 | 103 | 
|  | 104 echo "---------------- aa_histogram.r ----------------" | 
| 63 | 105 echo "---------------- aa_histogram.r ----------------<br />" >> $output | 
| 55 | 106 | 
| 26 | 107 Rscript $dir/aa_histogram.r $outdir/aa_mutations.txt $outdir/aa_histogram.png 2>&1 | 
| 4 | 108 | 
| 0 | 109 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) | 
|  | 110 | 
| 53 | 111 funcs=(sum mean median) | 
| 0 | 112 | 
| 53 | 113 | 
| 62 | 114 echo "<html><center><h1>$title</h1></center>" > $output | 
|  | 115 | 
|  | 116 #display the matched/unmatched for clearity | 
|  | 117 | 
|  | 118 matched_count="`cat $outdir/merged.txt | tail -n +2 | wc -l`" | 
|  | 119 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`" | 
|  | 120 total_count=$((matched_count + unmatched_count)) | 
|  | 121 perc_count=$((unmatched_count / total_count * 100)) | 
|  | 122 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"` | 
|  | 123 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"` | 
|  | 124 | 
|  | 125 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output | 
|  | 126 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output | 
|  | 127 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output | 
|  | 128 | 
| 55 | 129 echo "---------------- main tables ----------------" | 
| 53 | 130 for func in ${funcs[@]} | 
| 4 | 131 do | 
| 55 | 132 | 
|  | 133 	echo "---------------- $func table ----------------" | 
|  | 134 | 
| 53 | 135 	cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/result.txt | 
|  | 136 | 
|  | 137 	echo "<table border='1' width='100%'><caption><h3>${func} table</h3></caption>" >> $output | 
| 58 | 138 	echo "<tr><th>info</th>" >> $output | 
| 53 | 139 	for gene in ${genes[@]} | 
|  | 140 	do | 
|  | 141 		tmp=`cat $outdir/${gene}_${func}_n.txt` | 
|  | 142 		echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output | 
|  | 143 	done | 
|  | 144 	tmp=`cat $outdir/all_${func}_n.txt` | 
|  | 145 	echo "<th><a href='matched_all.txt'>all (N = $tmp)</a></th>" >> $output | 
| 4 | 146 | 
| 53 | 147 	while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz allx ally allz | 
|  | 148 	do | 
|  | 149 		if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh | 
|  | 150 			echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output | 
|  | 151 		else | 
|  | 152 			echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output | 
|  | 153 		fi | 
|  | 154 	done < $outdir/result.txt | 
|  | 155 | 
|  | 156 done | 
|  | 157 | 
| 55 | 158 echo "---------------- download links ----------------" | 
|  | 159 | 
| 0 | 160 echo "</table>" >> $output | 
| 53 | 161 echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output | 
|  | 162 echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output | 
|  | 163 echo "<a href='merged.txt'>all data</a><br />" >> $output | 
|  | 164 echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output | 
|  | 165 echo "<a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output | 
|  | 166 echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output | 
| 77 | 167 echo "<a href='sequence_overview/index.html'>Sequence Overview</a><br />" >> $output | 
| 2 | 168 | 
| 55 | 169 echo "---------------- images ----------------" | 
|  | 170 | 
| 4 | 171 echo "<img src='all.png'/><br />" >> $output | 
| 26 | 172 echo "<a href='all.txt'>download data</a><br />" >> $output | 
| 4 | 173 if [ -a $outdir/ca.png ] | 
|  | 174 then | 
|  | 175 	echo "<img src='ca.png'/><br />" >> $output | 
| 26 | 176 	echo "<a href='ca.txt'>download data</a><br />" >> $output | 
| 4 | 177 fi | 
|  | 178 if [ -a $outdir/cg.png ] | 
|  | 179 then | 
|  | 180 	echo "<img src='cg.png'/><br />" >> $output | 
| 26 | 181 	echo "<a href='cg.txt'>download data</a><br />" >> $output | 
| 4 | 182 fi | 
| 22 | 183 if [ -a $outdir/scatter.png ] | 
|  | 184 then | 
|  | 185 	echo "<img src='scatter.png'/><br />" >> $output | 
| 26 | 186 	echo "<a href='scatter.txt'>download data</a><br />" >> $output | 
|  | 187 fi | 
| 49 | 188 if [ -a $outdir/frequency_ranges.png ] | 
|  | 189 then | 
|  | 190 	echo "<img src='frequency_ranges.png'/><br />" >> $output | 
|  | 191 	echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output | 
|  | 192 	echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output | 
|  | 193 fi | 
| 26 | 194 if [ -a $outdir/aa_histogram.png ] | 
|  | 195 then | 
|  | 196 	echo "<img src='aa_histogram.png'/><br />" >> $output | 
|  | 197 	echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output | 
| 22 | 198 fi | 
| 2 | 199 | 
| 0 | 200 for gene in ${genes[@]} | 
|  | 201 do | 
|  | 202 	echo "<table border='1'><caption>$gene transition table</caption>" >> $output | 
|  | 203 	while IFS=, read from a c g t | 
|  | 204 		do | 
|  | 205 			echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | 
| 53 | 206 	done < $outdir/transitions_${gene}_sum.txt | 
| 0 | 207 	echo "</table>" >> $output | 
|  | 208 done | 
|  | 209 | 
|  | 210 echo "<table border='1'><caption>All transition table</caption>" >> $output | 
|  | 211 while IFS=, read from a c g t | 
|  | 212 	do | 
|  | 213 		echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | 
| 53 | 214 done < $outdir/transitions_all_sum.txt | 
| 0 | 215 echo "</table>" >> $output | 
|  | 216 | 
|  | 217 echo "</html>" >> $output | 
| 2 | 218 | 
| 47 | 219 | 
|  | 220 #optional output for naive | 
|  | 221 | 
| 55 | 222 echo "---------------- aa_histogram.r ----------------" | 
|  | 223 | 
| 47 | 224 if [[ "$naive_output" != "None" ]] | 
|  | 225 then | 
| 55 | 226 	echo "---------------- imgt_loader.r ----------------" | 
| 50 | 227 	#python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output | 
| 69 | 228 	Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt ${naive_output_ca} 2>&1 | 
| 55 | 229 	echo "---------------- naive_output.r ----------------" | 
| 69 | 230 	Rscript $dir/naive_output.r $naive_output_ca $outdir/merged.txt $naive_output_ca $naive_output_cg $naive_output_cm 2>&1 | 
| 47 | 231 fi | 
|  | 232 | 
| 76 | 233 echo "---------------- sequence_overview.r ----------------" | 
| 47 | 234 | 
| 76 | 235 mkdir $outdir/sequence_overview | 
|  | 236 | 
|  | 237 Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/sequence_overview 2>&1 | 
| 47 | 238 | 
|  | 239 | 
|  | 240 | 
|  | 241 | 
|  | 242 | 
|  | 243 | 
|  | 244 | 
|  | 245 | 
|  | 246 | 
| 2 | 247 #rm $outdir/HS12RSS.txt | 
|  | 248 #rm $outdir/HS23RSS.txt |