| 0 | 1 #!/bin/bash | 
| 110 | 2 #set -e | 
| 0 | 3 dir="$(cd "$(dirname "$0")" && pwd)" | 
|  | 4 input=$1 | 
| 19 | 5 method=$2 | 
| 102 | 6 log=$3 #becomes the main html page at the end | 
| 19 | 7 outdir=$4 | 
| 102 | 8 output="$outdir/index.html" #copied to $log location at the end | 
| 19 | 9 title=$5 | 
| 22 | 10 include_fr1=$6 | 
| 34 | 11 functionality=$7 | 
|  | 12 unique=$8 | 
| 69 | 13 naive_output_ca=$9 | 
|  | 14 naive_output_cg=${10} | 
|  | 15 naive_output_cm=${11} | 
|  | 16 filter_unique=${12} | 
|  | 17 class_filter=${13} | 
| 114 | 18 empty_region_filter=${14} | 
| 0 | 19 mkdir $outdir | 
|  | 20 | 
| 110 | 21 tar -xzf $dir/style.tar.gz -C $outdir | 
|  | 22 | 
| 55 | 23 echo "---------------- read parameters ----------------" | 
| 102 | 24 echo "---------------- read parameters ----------------<br />" > $log | 
| 55 | 25 | 
|  | 26 echo "unpacking IMGT file" | 
|  | 27 | 
| 35 | 28 type="`file $input`" | 
|  | 29 if [[ "$type" == *"Zip archive"* ]] ; then | 
|  | 30 	echo "Zip archive" | 
|  | 31 	echo "unzip $input -d $PWD/files/" | 
|  | 32 	unzip $input -d $PWD/files/ | 
|  | 33 elif [[ "$type" == *"XZ compressed data"* ]] ; then | 
|  | 34 	echo "ZX archive" | 
|  | 35 	echo "tar -xJf $input -C $PWD/files/" | 
|  | 36 	mkdir -p $PWD/files/$title | 
|  | 37 	tar -xJf $input -C $PWD/files/$title | 
|  | 38 fi | 
|  | 39 | 
| 64 | 40 cat `find $PWD/files/ -name "1_*"` > $PWD/summary.txt | 
|  | 41 cat `find $PWD/files/ -name "3_*"` > $PWD/sequences.txt | 
|  | 42 cat `find $PWD/files/ -name "5_*"` > $PWD/aa.txt | 
|  | 43 cat `find $PWD/files/ -name "6_*"` > $PWD/junction.txt | 
|  | 44 cat `find $PWD/files/ -name "7_*"` > $PWD/mutationanalysis.txt | 
|  | 45 cat `find $PWD/files/ -name "8_*"` > $PWD/mutationstats.txt | 
|  | 46 cat `find $PWD/files/ -name "10_*"` > $PWD/hotspots.txt | 
|  | 47 | 
|  | 48 #cat $PWD/files/*/1_* > $PWD/summary.txt | 
|  | 49 #cat $PWD/files/*/3_* > $PWD/sequences.txt | 
|  | 50 #cat $PWD/files/*/5_* > $PWD/aa.txt | 
|  | 51 #cat $PWD/files/*/6_* > $PWD/junction.txt | 
|  | 52 #cat $PWD/files/*/7_* > $PWD/mutationanalysis.txt | 
|  | 53 #cat $PWD/files/*/8_* > $PWD/mutationstats.txt | 
|  | 54 #cat $PWD/files/*/10_* > $PWD/hotspots.txt | 
| 3 | 55 | 
| 26 | 56 #BLASTN_DIR="/home/galaxy/tmp/blast/ncbi-blast-2.2.30+/bin" | 
| 19 | 57 | 
|  | 58 echo "${BLASTN_DIR}" | 
|  | 59 | 
| 89 | 60 echo "---------------- identification ($method) ----------------" | 
| 102 | 61 echo "---------------- identification ($method) ----------------<br />" >> $log | 
| 55 | 62 | 
| 19 | 63 if [[ "${method}" == "custom" ]] ; then | 
|  | 64 	python $dir/gene_identification.py --input $PWD/summary.txt --output $outdir/identified_genes.txt | 
|  | 65 else | 
|  | 66 	ID_index=$(cat $PWD/summary.txt | grep -o -P ".+Sequence ID" | grep -o -P "\t" | wc -l) | 
|  | 67 	ID_index=$((ID_index+1)) | 
|  | 68 	sequence_index=$(cat $PWD/summary.txt | grep -o -P ".+\tSequence" | grep -o -P "\t" | wc -l) | 
|  | 69 	sequence_index=$((sequence_index+1)) | 
|  | 70 | 
| 110 | 71 	cat $PWD/summary.txt | tail -n+2 | cut -f ${ID_index},${sequence_index} | awk '{print ">" $1 "\n" $2}' > $PWD/sequences.tmp | 
|  | 72 | 
|  | 73 	cat $PWD/sequences.tmp | grep -B1 -vE "^$" sequences.fasta #filter out empty sequences | 
|  | 74 | 
|  | 75 	rm $PWD/sequences.tmp | 
| 19 | 76 | 
|  | 77 	echo -e "qseqid\tsseqid\tpident\tlength\tmismatch\tgapopen\tqstart\tqend\tsstart\tsend\tevalue\tbitscore" > $outdir/identified_genes.txt | 
|  | 78 	${BLASTN_DIR}/blastn -task blastn -db $dir/subclass_definition.db -query $PWD/sequences.fasta -outfmt 6 >> $outdir/identified_genes.txt | 
|  | 79 fi | 
|  | 80 | 
| 55 | 81 echo "---------------- merge_and_filter.r ----------------" | 
| 102 | 82 echo "---------------- merge_and_filter.r ----------------<br />" >> $log | 
| 19 | 83 | 
| 114 | 84 Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1 | 
| 0 | 85 | 
| 98 | 86 echo "---------------- creating new IMGT zip ----------------" | 
| 102 | 87 echo "---------------- creating new IMGT zip ----------------<br />" >> $log | 
| 95 | 88 | 
|  | 89 mkdir $outdir/new_IMGT | 
|  | 90 | 
|  | 91 cat `find $PWD/files/ -name "1_*"` > "$outdir/new_IMGT/1_Summary.txt" | 
|  | 92 cat `find $PWD/files/ -name "2_*"` > "$outdir/new_IMGT/2_IMGT-gapped-nt-sequences.txt" | 
|  | 93 cat `find $PWD/files/ -name "3_*"` > "$outdir/new_IMGT/3_Nt-sequences.txt" | 
|  | 94 cat `find $PWD/files/ -name "4_*"` > "$outdir/new_IMGT/4_IMGT-gapped-AA-sequences.txt" | 
|  | 95 cat `find $PWD/files/ -name "5_*"` > "$outdir/new_IMGT/5_AA-sequences.txt" | 
|  | 96 cat `find $PWD/files/ -name "6_*"` > "$outdir/new_IMGT/6_Junction.txt" | 
|  | 97 cat `find $PWD/files/ -name "7_*"` > "$outdir/new_IMGT/7_V-REGION-mutation-and-AA-change-table.txt" | 
|  | 98 cat `find $PWD/files/ -name "8_*"` > "$outdir/new_IMGT/8_V-REGION-nt-mutation-statistics.txt" | 
|  | 99 cat `find $PWD/files/ -name "9_*"` > "$outdir/new_IMGT/9_V-REGION-AA-change-statistics.txt" | 
|  | 100 cat `find $PWD/files/ -name "10_*"` > "$outdir/new_IMGT/10_V-REGION-mutation-hotspots.txt" | 
|  | 101 | 
| 99 | 102 mkdir $outdir/new_IMGT_ca | 
|  | 103 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca | 
|  | 104 | 
| 116 | 105 mkdir $outdir/new_IMGT_ca1 | 
|  | 106 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca1 | 
|  | 107 | 
|  | 108 mkdir $outdir/new_IMGT_ca2 | 
|  | 109 cp $outdir/new_IMGT/* $outdir/new_IMGT_ca2 | 
|  | 110 | 
| 99 | 111 mkdir $outdir/new_IMGT_cg | 
|  | 112 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg | 
|  | 113 | 
| 116 | 114 mkdir $outdir/new_IMGT_cg1 | 
|  | 115 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg1 | 
|  | 116 | 
|  | 117 mkdir $outdir/new_IMGT_cg2 | 
|  | 118 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg2 | 
|  | 119 | 
|  | 120 mkdir $outdir/new_IMGT_cg3 | 
|  | 121 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg3 | 
|  | 122 | 
|  | 123 mkdir $outdir/new_IMGT_cg4 | 
|  | 124 cp $outdir/new_IMGT/* $outdir/new_IMGT_cg4 | 
|  | 125 | 
| 99 | 126 mkdir $outdir/new_IMGT_cm | 
|  | 127 cp $outdir/new_IMGT/* $outdir/new_IMGT_cm | 
|  | 128 | 
| 114 | 129 Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1 | 
| 116 | 130 | 
| 114 | 131 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca/ $outdir/merged.txt "ca" 2>&1 | 
| 116 | 132 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca1/ $outdir/merged.txt "ca1" 2>&1 | 
|  | 133 Rscript $dir/new_imgt.r $outdir/new_IMGT_ca2/ $outdir/merged.txt "ca2" 2>&1 | 
|  | 134 | 
| 114 | 135 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg/ $outdir/merged.txt "cg" 2>&1 | 
| 116 | 136 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg1/ $outdir/merged.txt "cg1" 2>&1 | 
|  | 137 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg2/ $outdir/merged.txt "cg2" 2>&1 | 
|  | 138 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg3/ $outdir/merged.txt "cg3" 2>&1 | 
|  | 139 Rscript $dir/new_imgt.r $outdir/new_IMGT_cg4/ $outdir/merged.txt "cg4" 2>&1 | 
|  | 140 | 
| 114 | 141 Rscript $dir/new_imgt.r $outdir/new_IMGT_cm/ $outdir/merged.txt "cm" 2>&1 | 
| 95 | 142 | 
|  | 143 | 
|  | 144 tmp="$PWD" | 
|  | 145 cd $outdir/new_IMGT/ #tar weirdness... | 
|  | 146 tar -cJf ../new_IMGT.txz * | 
|  | 147 | 
| 99 | 148 cd $outdir/new_IMGT_ca/ | 
|  | 149 tar -cJf ../new_IMGT_ca.txz * | 
|  | 150 | 
| 117 | 151 cd $outdir/new_IMGT_ca1/ | 
|  | 152 tar -cJf ../new_IMGT_ca1.txz * | 
|  | 153 | 
|  | 154 cd $outdir/new_IMGT_ca2/ | 
|  | 155 tar -cJf ../new_IMGT_ca2.txz * | 
|  | 156 | 
| 99 | 157 cd $outdir/new_IMGT_cg/ | 
|  | 158 tar -cJf ../new_IMGT_cg.txz * | 
|  | 159 | 
| 117 | 160 cd $outdir/new_IMGT_cg1/ | 
|  | 161 tar -cJf ../new_IMGT_cg1.txz * | 
|  | 162 | 
|  | 163 cd $outdir/new_IMGT_cg2/ | 
|  | 164 tar -cJf ../new_IMGT_cg2.txz * | 
|  | 165 | 
|  | 166 cd $outdir/new_IMGT_cg3/ | 
|  | 167 tar -cJf ../new_IMGT_cg3.txz * | 
|  | 168 | 
|  | 169 cd $outdir/new_IMGT_cg4/ | 
|  | 170 tar -cJf ../new_IMGT_cg4.txz * | 
|  | 171 | 
| 99 | 172 cd $outdir/new_IMGT_cm/ | 
|  | 173 tar -cJf ../new_IMGT_cm.txz * | 
|  | 174 | 
| 95 | 175 cd $tmp | 
|  | 176 | 
| 55 | 177 echo "---------------- mutation_analysis.r ----------------" | 
| 102 | 178 echo "---------------- mutation_analysis.r ----------------<br />" >> $log | 
| 55 | 179 | 
| 82 | 180 classes="ca,ca1,ca2,cg,cg1,cg2,cg3,cg4,cm,unmatched" | 
| 4 | 181 echo "R mutation analysis" | 
| 82 | 182 Rscript $dir/mutation_analysis.r $outdir/merged.txt $classes $outdir ${include_fr1} 2>&1 | 
| 53 | 183 | 
| 55 | 184 | 
|  | 185 echo "---------------- mutation_analysis.py ----------------" | 
| 102 | 186 echo "---------------- mutation_analysis.py ----------------<br />" >> $log | 
| 55 | 187 | 
| 82 | 188 python $dir/mutation_analysis.py --input $outdir/merged.txt --genes $classes --includefr1 "${include_fr1}" --output $outdir/hotspot_analysis.txt | 
| 55 | 189 | 
|  | 190 echo "---------------- aa_histogram.r ----------------" | 
| 105 | 191 echo "---------------- aa_histogram.r ----------------<br />" >> $log | 
| 55 | 192 | 
| 107 | 193 Rscript $dir/aa_histogram.r $outdir/aa_id_mutations.txt $outdir/absent_aa_id.txt "ca,cg,cm" $outdir/ 2>&1 | 
| 110 | 194 if [ -e "$outdir/aa_histogram_.png" ]; then | 
|  | 195         mv $outdir/aa_histogram_.png $outdir/aa_histogram.png | 
|  | 196         mv $outdir/aa_histogram_.txt $outdir/aa_histogram.txt | 
|  | 197 fi | 
| 4 | 198 | 
| 0 | 199 genes=(ca ca1 ca2 cg cg1 cg2 cg3 cg4 cm) | 
|  | 200 | 
| 53 | 201 funcs=(sum mean median) | 
| 110 | 202 funcs=(sum) | 
| 0 | 203 | 
| 82 | 204 echo "---------------- sequence_overview.r ----------------" | 
| 102 | 205 echo "---------------- sequence_overview.r ----------------<br />" >> $log | 
| 82 | 206 | 
|  | 207 mkdir $outdir/sequence_overview | 
|  | 208 | 
| 90 | 209 #Rscript $dir/sequence_overview.r $outdir/identified_genes.txt $PWD/sequences.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 | 
| 100 | 210 Rscript $dir/sequence_overview.r $outdir/before_unique_filter.txt $outdir/merged.txt $outdir/sequence_overview $classes $outdir/hotspot_analysis_sum.txt 2>&1 | 
| 82 | 211 | 
|  | 212 echo "<table border='1'>" > $outdir/base_overview.html | 
|  | 213 | 
| 92 | 214 while IFS=$'\t' read ID class seq A C G T | 
| 82 | 215 do | 
| 85 | 216 	echo "<tr><td>$ID</td><td>$seq</td><td>$class</td><td>$A</td><td>$C</td><td>$G</td><td>$T</td></tr>" >> $outdir/base_overview.html | 
| 82 | 217 done < $outdir/sequence_overview/ntoverview.txt | 
|  | 218 | 
| 62 | 219 echo "<html><center><h1>$title</h1></center>" > $output | 
| 110 | 220 echo "<script type='text/javascript' src='jquery-1.11.0.min.js'></script>" >> $output | 
|  | 221 echo "<script type='text/javascript' src='tabber.js'></script>" >> $output | 
|  | 222 echo "<script type='text/javascript' src='script.js'></script>" >> $output | 
|  | 223 echo "<link rel='stylesheet' type='text/css' href='style.css'>" >> $output | 
| 62 | 224 | 
|  | 225 #display the matched/unmatched for clearity | 
|  | 226 | 
| 98 | 227 matched_count="`cat $outdir/merged.txt | grep -v 'unmatched' | tail -n +2 | wc -l`" | 
| 62 | 228 unmatched_count="`cat $outdir/unmatched.txt | tail -n +2 | wc -l`" | 
|  | 229 total_count=$((matched_count + unmatched_count)) | 
|  | 230 perc_count=$((unmatched_count / total_count * 100)) | 
|  | 231 perc_count=`bc -l <<< "scale=2; ${unmatched_count} / ${total_count} * 100"` | 
|  | 232 perc_count=`bc -l <<< "scale=2; (${unmatched_count} / ${total_count} * 100 ) / 1"` | 
|  | 233 | 
|  | 234 echo "<center><h2>Total: ${total_count}</h2></center>" >> $output | 
|  | 235 echo "<center><h2>Matched: ${matched_count} Unmatched: ${unmatched_count}</h2></center>" >> $output | 
|  | 236 echo "<center><h2>Percentage unmatched: ${perc_count}</h2></center>" >> $output | 
|  | 237 | 
| 55 | 238 echo "---------------- main tables ----------------" | 
| 102 | 239 echo "---------------- main tables ----------------<br />" >> $log | 
| 110 | 240 | 
|  | 241 echo "<div class='tabber'>" >> $output | 
|  | 242 echo "<div class='tabbertab' title='SHM Overview'>" >> $output | 
|  | 243 | 
| 53 | 244 for func in ${funcs[@]} | 
| 4 | 245 do | 
| 55 | 246 | 
|  | 247 	echo "---------------- $func table ----------------" | 
| 102 | 248 	echo "---------------- $func table ----------------<br />" >> $log | 
| 55 | 249 | 
| 94 | 250 	cat $outdir/mutations_${func}.txt $outdir/hotspot_analysis_${func}.txt > $outdir/data_${func}.txt | 
| 53 | 251 | 
| 98 | 252 	echo "<table border='1' width='100%'><caption><h3><a href='data_${func}.txt'>${func} table</a></h3></caption>" >> $output | 
| 58 | 253 	echo "<tr><th>info</th>" >> $output | 
| 53 | 254 	for gene in ${genes[@]} | 
|  | 255 	do | 
|  | 256 		tmp=`cat $outdir/${gene}_${func}_n.txt` | 
|  | 257 		echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output | 
|  | 258 	done | 
| 78 | 259 | 
| 114 | 260 	tmp=`cat $outdir/all_${func}_n.txt` | 
|  | 261 	echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output | 
| 78 | 262 	tmp=`cat $outdir/unmatched_${func}_n.txt` | 
| 79 | 263 	echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th>" >> $output | 
| 4 | 264 | 
| 78 | 265 	while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz | 
| 53 | 266 	do | 
|  | 267 		if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh | 
|  | 268 			echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output | 
|  | 269 		else | 
| 114 | 270 			echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output | 
| 53 | 271 		fi | 
| 94 | 272 	done < $outdir/data_${func}.txt | 
|  | 273 	echo "</table>" >> $output | 
|  | 274 	#echo "<a href='data_${func}.txt'>Download data</a>" >> $output | 
| 53 | 275 done | 
|  | 276 | 
| 110 | 277 echo "</div>" >> $output #SHM overview tab end | 
|  | 278 | 
|  | 279 echo "---------------- images ----------------" | 
|  | 280 echo "---------------- images ----------------<br />" >> $log | 
|  | 281 | 
|  | 282 echo "<div class='tabbertab' title='SHM Frequency'>" >> $output | 
|  | 283 | 
|  | 284 if [ -a $outdir/scatter.png ] | 
|  | 285 then | 
|  | 286 	echo "<img src='scatter.png'/><br />" >> $output | 
|  | 287 	echo "<a href='scatter.txt'>download data</a><br />" >> $output | 
|  | 288 fi | 
|  | 289 if [ -a $outdir/frequency_ranges.png ] | 
|  | 290 then | 
|  | 291 	echo "<img src='frequency_ranges.png'/><br />" >> $output | 
|  | 292 	echo "<a href='frequency_ranges_classes.txt'>download class data</a><br />" >> $output | 
|  | 293 	echo "<a href='frequency_ranges_subclasses.txt'>download subclass data</a><br />" >> $output | 
|  | 294 fi | 
|  | 295 | 
|  | 296 echo "</div>" >> $output #SHM frequency tab end | 
|  | 297 | 
|  | 298 echo "<div class='tabbertab' title='Transition tables'>" >> $output | 
|  | 299 | 
| 114 | 300 echo "<table border='0'>" >> $output | 
|  | 301 | 
| 110 | 302 for gene in ${genes[@]} | 
|  | 303 do | 
| 114 | 304 	echo "<tr>" >> $output | 
|  | 305 	echo "<td><h1>${gene}</h1></td>" >> $output | 
|  | 306 	echo "<td><img src='transitions_heatmap_${gene}.png' /></td>" >> $output | 
|  | 307 	echo "<td><img src='transitions_stacked_${gene}.png' /></td>" >> $output | 
|  | 308 	echo "<td><table border='1'>" >> $output | 
| 110 | 309 	while IFS=, read from a c g t | 
|  | 310 		do | 
|  | 311 			echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | 
|  | 312 	done < $outdir/transitions_${gene}_sum.txt | 
| 114 | 313 	echo "</table></td>" >> $output | 
|  | 314 | 
|  | 315 	echo "</tr>" >> $output | 
| 110 | 316 done | 
| 55 | 317 | 
| 114 | 318 echo "<tr>" >> $output | 
|  | 319 echo "<td><h1>All</h1></td>" >> $output | 
|  | 320 echo "<td><img src='transitions_heatmap_all.png' /></td>" >> $output | 
|  | 321 echo "<td><img src='transitions_stacked_all.png' /></td>" >> $output | 
|  | 322 echo "<td><table border='1'>" >> $output | 
| 110 | 323 while IFS=, read from a c g t | 
|  | 324 	do | 
|  | 325 		echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output | 
|  | 326 done < $outdir/transitions_all_sum.txt | 
| 114 | 327 echo "</table></td>" >> $output | 
|  | 328 | 
|  | 329 echo "</tr>" >> $output | 
|  | 330 | 
| 110 | 331 echo "</table>" >> $output | 
|  | 332 | 
|  | 333 echo "</div>" >> $output #transition tables tab end | 
|  | 334 | 
|  | 335 echo "<div class='tabbertab' title='Antigen Selection'>" >> $output | 
|  | 336 | 
|  | 337 if [ -a $outdir/aa_histogram.png ] | 
|  | 338 then | 
|  | 339 	echo "<img src='aa_histogram.png'/><br />" >> $output | 
|  | 340 	echo "<a href='aa_histogram.txt'>download data</a><br />" >> $output | 
|  | 341 	echo "<img src='aa_histogram_ca.png'/><br />" >> $output | 
|  | 342 	echo "<a href='aa_histogram_ca.txt'>download data</a><br />" >> $output | 
|  | 343 	echo "<img src='aa_histogram_cg.png'/><br />" >> $output | 
|  | 344 	echo "<a href='aa_histogram_cg.txt'>download data</a><br />" >> $output | 
|  | 345 	echo "<img src='aa_histogram_cm.png'/><br />" >> $output | 
|  | 346 	echo "<a href='aa_histogram_cm.txt'>download data</a><br />" >> $output | 
|  | 347 fi | 
|  | 348 | 
|  | 349 echo "<embed src='baseline_ca.pdf' width='700px' height='1000px'>" >> $output | 
|  | 350 echo "<embed src='baseline_cg.pdf' width='700px' height='1000px'>" >> $output | 
|  | 351 echo "<embed src='baseline_cm.pdf' width='700px' height='1000px'>" >> $output | 
|  | 352 | 
|  | 353 echo "</div>" >> $output #antigen selection tab end | 
|  | 354 | 
|  | 355 echo "<div class='tabbertab' title='CSR'>" >> $output | 
|  | 356 | 
|  | 357 if [ -a $outdir/ca.png ] | 
|  | 358 then | 
|  | 359 	echo "<img src='ca.png'/><br />" >> $output | 
|  | 360 	echo "<a href='ca.txt'>download data</a><br />" >> $output | 
|  | 361 fi | 
|  | 362 if [ -a $outdir/cg.png ] | 
|  | 363 then | 
|  | 364 	echo "<img src='cg.png'/><br />" >> $output | 
|  | 365 	echo "<a href='cg.txt'>download data</a><br />" >> $output | 
|  | 366 fi | 
|  | 367 | 
|  | 368 echo "</div>" >> $output #CSR tab end | 
|  | 369 | 
|  | 370 echo "<div class='tabbertab' title='Downloads'>" >> $output | 
| 94 | 371 | 
| 114 | 372 echo "<table border='1' width='700px'>" >> $output | 
|  | 373 echo "<tr><td>The complete dataset</td><td><a href='merged.txt'>Download</a></td></tr>" >> $output | 
|  | 374 echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt'>Download</a></td></tr>" >> $output | 
|  | 375 echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt'>Download</a></td></tr>" >> $output | 
|  | 376 echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt'>Download</a></td></tr>" >> $output | 
|  | 377 echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt'>Download</a></td></tr>" >> $output | 
|  | 378 echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt'>Download</a></td></tr>" >> $output | 
|  | 379 echo "<tr><td>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</td><td><a href='sequence_overview/index.html'>Download</a></td></tr>" >> $output | 
|  | 380 echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>Download</a></td></tr>" >> $output | 
|  | 381 echo "<tr><td>Baseline PDF (<href a='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf'>Download</a></td></tr>" >> $output | 
|  | 382 echo "<tr><td>Baseline data</td><td><a href='baseline.txt'>Download</a></td></tr>" >> $output | 
|  | 383 echo "<tr><td>Baseline ca PDF</td><td><a href='baseline_ca.pdf'>Download</a></td></tr>" >> $output | 
|  | 384 echo "<tr><td>Baseline ca data</td><td><a href='baseline_ca.txt'>Download</a></td></tr>" >> $output | 
|  | 385 echo "<tr><td>Baseline cg PDF</td><td><a href='baseline_cg.pdf'>Download</a></td></tr>" >> $output | 
|  | 386 echo "<tr><td>Baseline cg data</td><td><a href='baseline_cg.txt'>Download</a></td></tr>" >> $output | 
|  | 387 echo "<tr><td>Baseline cm PDF</td><td><a href='baseline_cm.pdf'>Download</a></td></tr>" >> $output | 
|  | 388 echo "<tr><td>Baseline cm data</td><td><a href='baseline_cm.txt'>Download</a></td></tr>" >> $output | 
|  | 389 #echo "<tr><td></td><td><a href='IgAT.zip'>IgAT zip</a></td></tr>" >> $output | 
|  | 390 #echo "<tr><td></td><td><a href='IgAT_ca.zip'>IgAT ca zip</a></td></tr>" >> $output | 
|  | 391 #echo "<tr><td></td><td><a href='IgAT_cg.zip'>IgAT cg zip</a></td></tr>" >> $output | 
|  | 392 #echo "<tr><td></td><td><a href='IgAT_cm.zip'>IgAT cm zip</a></td></tr>" >> $output | 
|  | 393 echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz'>Download</a></td></tr>" >> $output | 
|  | 394 echo "<tr><td>An IMGT archive with just the matched and filtered ca sequences</td><td><a href='new_IMGT_ca.txz'>Download</a></td></tr>" >> $output | 
| 116 | 395 echo "<tr><td>An IMGT archive with just the matched and filtered ca1 sequences</td><td><a href='new_IMGT_ca1.txz'>Download</a></td></tr>" >> $output | 
|  | 396 echo "<tr><td>An IMGT archive with just the matched and filtered ca2 sequences</td><td><a href='new_IMGT_ca2.txz'>Download</a></td></tr>" >> $output | 
| 114 | 397 echo "<tr><td>An IMGT archive with just the matched and filtered cg sequences</td><td><a href='new_IMGT_cg.txz'>Download</a></td></tr>" >> $output | 
| 116 | 398 echo "<tr><td>An IMGT archive with just the matched and filtered cg1 sequences</td><td><a href='new_IMGT_cg1.txz'>Download</a></td></tr>" >> $output | 
|  | 399 echo "<tr><td>An IMGT archive with just the matched and filtered cg2 sequences</td><td><a href='new_IMGT_cg2.txz'>Download</a></td></tr>" >> $output | 
|  | 400 echo "<tr><td>An IMGT archive with just the matched and filtered cg3 sequences</td><td><a href='new_IMGT_cg3.txz'>Download</a></td></tr>" >> $output | 
|  | 401 echo "<tr><td>An IMGT archive with just the matched and filtered cg4 sequences</td><td><a href='new_IMGT_cg4.txz'>Download</a></td></tr>" >> $output | 
| 114 | 402 echo "<tr><td>An IMGT archive with just the matched and filtered cm sequences</td><td><a href='new_IMGT_cm.txz'>Download</a></td></tr>" >> $output | 
|  | 403 echo "</table>" >> $output | 
| 99 | 404 | 
| 110 | 405 echo "</div>" >> $output #downloads tab end | 
| 55 | 406 | 
| 110 | 407 echo "</div>" >> $output #tabs end | 
| 0 | 408 | 
|  | 409 echo "</html>" >> $output | 
| 2 | 410 | 
| 95 | 411 echo "---------------- baseline ----------------" | 
| 102 | 412 echo "---------------- baseline ----------------<br />" >> $log | 
| 101 | 413 tmp="$PWD" | 
|  | 414 | 
|  | 415 mkdir $outdir/baseline | 
|  | 416 | 
|  | 417 | 
|  | 418 mkdir $outdir/baseline/ca_cg_cm | 
| 102 | 419 if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then | 
|  | 420 	cd $outdir/baseline/ca_cg_cm | 
| 114 | 421 	bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" | 
| 102 | 422 else | 
|  | 423 	echo "No sequences" > "$outdir/baseline.txt" | 
|  | 424 fi | 
| 101 | 425 | 
|  | 426 mkdir $outdir/baseline/ca | 
| 102 | 427 if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then | 
|  | 428 	cd $outdir/baseline/ca | 
| 114 | 429 	bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt" | 
| 102 | 430 else | 
|  | 431 	echo "No ca sequences" > "$outdir/baseline_ca.txt" | 
|  | 432 fi | 
| 101 | 433 | 
|  | 434 mkdir $outdir/baseline/cg | 
| 102 | 435 if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then | 
|  | 436 	cd $outdir/baseline/cg | 
| 114 | 437 	bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt" | 
| 102 | 438 else | 
|  | 439 	echo "No cg sequences" > "$outdir/baseline_cg.txt" | 
|  | 440 fi | 
| 101 | 441 | 
|  | 442 mkdir $outdir/baseline/cm | 
| 102 | 443 if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then | 
|  | 444 	cd $outdir/baseline/cm | 
| 114 | 445 	bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt" | 
| 102 | 446 else | 
|  | 447 	echo "No cm sequences" > "$outdir/baseline_cm.txt" | 
|  | 448 fi | 
| 47 | 449 | 
| 101 | 450 cd $tmp | 
|  | 451 | 
| 82 | 452 echo "---------------- naive_output.r ----------------" | 
| 102 | 453 echo "---------------- naive_output.r ----------------<br />" >> $log | 
| 55 | 454 | 
| 47 | 455 if [[ "$naive_output" != "None" ]] | 
|  | 456 then | 
| 114 | 457 	#echo "---------------- imgt_loader.r ----------------" | 
|  | 458 	#echo "---------------- imgt_loader.r ----------------<br />" >> $log | 
| 50 | 459 	#python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output | 
| 114 | 460 	#Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1 | 
| 95 | 461 | 
| 114 | 462 	#echo "---------------- naive_output.r ----------------" | 
|  | 463 	#echo "---------------- naive_output.r ----------------<br />" >> $log | 
|  | 464 	#Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1 | 
|  | 465 | 
|  | 466 	cp $outdir/new_IMGT_ca.txz ${naive_output_ca} | 
|  | 467 	cp $outdir/new_IMGT_cg.txz ${naive_output_cg} | 
|  | 468 	cp $outdir/new_IMGT_cm.txz ${naive_output_cm} | 
| 47 | 469 fi | 
|  | 470 | 
| 81 | 471 echo "</table>" >> $outdir/base_overview.html | 
|  | 472 | 
| 105 | 473 mv $log $outdir/log.html | 
|  | 474 | 
| 110 | 475 echo "<html><center><h1><a href='index.html'>Click here for the results</a></h1>Tip: Open it in a new tab (middle mouse button or right mouse button -> 'open in new tab' on the link above)<br />" > $log | 
|  | 476 echo "<table border = 1>" >> $log | 
|  | 477 echo "<thead><tr><th>Info</th><th>Sequences</th><th>Percentage</th></tr></thead>" >> $log | 
|  | 478 tIFS="$TMP" | 
|  | 479 IFS=$'\t' | 
|  | 480 while read step seq perc | 
|  | 481 	do | 
|  | 482 		echo "<tr>" >> $log | 
|  | 483 		echo "<td>$step</td>" >> $log | 
|  | 484 		echo "<td>$seq</td>" >> $log | 
|  | 485 		echo "<td>${perc}%</td>" >> $log | 
|  | 486 		echo "</tr>" >> $log | 
|  | 487 done < $outdir/filtering_steps.txt | 
|  | 488 echo "</table border></center></html>" >> $log | 
|  | 489 | 
|  | 490 IFS="$tIFS" | 
|  | 491 | 
| 105 | 492 | 
| 81 | 493 echo "---------------- Done! ----------------" | 
| 107 | 494 echo "---------------- Done! ----------------<br />" >> $outdir/log.html | 
| 47 | 495 | 
| 110 | 496 | 
|  | 497 | 
|  | 498 | 
|  | 499 | 
|  | 500 | 
|  | 501 | 
|  | 502 | 
|  | 503 | 
|  | 504 | 
|  | 505 | 
|  | 506 | 
|  | 507 | 
|  | 508 | 
|  | 509 | 
|  | 510 | 
|  | 511 | 
|  | 512 | 
|  | 513 | 
|  | 514 | 
|  | 515 |