Mercurial > repos > davidvanzessen > mutation_analysis

diff wrapper.sh @ 114:e7b550d52eb7 draft
Uploaded
author: davidvanzessen
date: Tue, 09 Aug 2016 07:20:41 -0400
parents: ade5cf6fd2dc
children: 4984c2a06c43
--- a/wrapper.sh	Thu Aug 04 04:52:51 2016 -0400
+++ b/wrapper.sh	Tue Aug 09 07:20:41 2016 -0400
@@ -15,6 +15,7 @@
 naive_output_cm=${11}
 filter_unique=${12}
 class_filter=${13}
+empty_region_filter=${14}
 mkdir $outdir
 
 tar -xzf $dir/style.tar.gz -C $outdir
@@ -80,7 +81,7 @@
 echo "---------------- merge_and_filter.r ----------------"
 echo "---------------- merge_and_filter.r ----------------<br />" >> $log
 
-Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} 2>&1
+Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1
 
 echo "---------------- creating new IMGT zip ----------------"
 echo "---------------- creating new IMGT zip ----------------<br />" >> $log
@@ -107,32 +108,24 @@
 mkdir $outdir/new_IMGT_cm
 cp $outdir/new_IMGT/* $outdir/new_IMGT_cm
 
-Rscript $dir/tmp/igat.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
-Rscript $dir/tmp/igat.r $outdir/new_IMGT_ca/ $outdir/merged.txt "ca" 2>&1
-Rscript $dir/tmp/igat.r $outdir/new_IMGT_cg/ $outdir/merged.txt "cg" 2>&1
-Rscript $dir/tmp/igat.r $outdir/new_IMGT_cm/ $outdir/merged.txt "cm" 2>&1
+Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1
+Rscript $dir/new_imgt.r $outdir/new_IMGT_ca/ $outdir/merged.txt "ca" 2>&1
+Rscript $dir/new_imgt.r $outdir/new_IMGT_cg/ $outdir/merged.txt "cg" 2>&1
+Rscript $dir/new_imgt.r $outdir/new_IMGT_cm/ $outdir/merged.txt "cm" 2>&1
 
 
 tmp="$PWD"
 cd $outdir/new_IMGT/ #tar weirdness...
 tar -cJf ../new_IMGT.txz *
-cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT/IgAT.xlsm
-zip -r ../IgAT.zip *
 
 cd $outdir/new_IMGT_ca/
 tar -cJf ../new_IMGT_ca.txz *
-cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT_ca/IgAT.xlsm
-zip -r ../IgAT_ca.zip *
 
 cd $outdir/new_IMGT_cg/
 tar -cJf ../new_IMGT_cg.txz *
-cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT_cg/IgAT.xlsm
-zip -r ../IgAT_cg.zip *
 
 cd $outdir/new_IMGT_cm/
 tar -cJf ../new_IMGT_cm.txz *
-cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT_cm/IgAT.xlsm
-zip -r ../IgAT_cm.zip *
 
 cd $tmp
 
@@ -219,17 +212,17 @@
 		echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output
 	done
 	
+	tmp=`cat $outdir/all_${func}_n.txt`
+	echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
 	tmp=`cat $outdir/unmatched_${func}_n.txt`
 	echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th>" >> $output
-	tmp=`cat $outdir/all_${func}_n.txt`
-	echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output
 
 	while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz
 	do
 		if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh
 			echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output
 		else
-			echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${unx}/${uny} (${unz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output
+			echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output
 		fi
 	done < $outdir/data_${func}.txt
 	echo "</table>" >> $output
@@ -259,21 +252,37 @@
 
 echo "<div class='tabbertab' title='Transition tables'>" >> $output
 
+echo "<table border='0'>" >> $output
+
 for gene in ${genes[@]}
 do
-	echo "<table border='1'><caption>$gene transition table</caption>" >> $output
+	echo "<tr>" >> $output
+	echo "<td><h1>${gene}</h1></td>" >> $output
+	echo "<td><img src='transitions_heatmap_${gene}.png' /></td>" >> $output
+	echo "<td><img src='transitions_stacked_${gene}.png' /></td>" >> $output
+	echo "<td><table border='1'>" >> $output
 	while IFS=, read from a c g t
 		do
 			echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
 	done < $outdir/transitions_${gene}_sum.txt
-	echo "</table>" >> $output
+	echo "</table></td>" >> $output
+	
+	echo "</tr>" >> $output
 done
 
-echo "<table border='1'><caption>All transition table</caption>" >> $output
+echo "<tr>" >> $output
+echo "<td><h1>All</h1></td>" >> $output
+echo "<td><img src='transitions_heatmap_all.png' /></td>" >> $output
+echo "<td><img src='transitions_stacked_all.png' /></td>" >> $output
+echo "<td><table border='1'>" >> $output
 while IFS=, read from a c g t
 	do
 		echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output
 done < $outdir/transitions_all_sum.txt
+echo "</table></td>" >> $output
+
+echo "</tr>" >> $output
+
 echo "</table>" >> $output
 
 echo "</div>" >> $output #transition tables tab end
@@ -315,30 +324,32 @@
 
 echo "<div class='tabbertab' title='Downloads'>" >> $output
 
-echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output
-echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output
-echo "<a href='merged.txt'>all data</a><br />" >> $output
-echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output
-echo "<a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output
-echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output
-echo "<a href='sequence_overview/index.html'>Sequence Overview</a><br />" >> $output
-echo "<a href='base_overview.html'>Base overview</a><br />" >> $output
-echo "<a href='baseline.pdf'>Baseline PDF</a><br />" >> $output
-echo "<a href='baseline.txt'>Baseline Table</a><br />" >> $output
-echo "<a href='baseline_ca.pdf'>Baseline ca PDF</a><br />" >> $output
-echo "<a href='baseline_ca.txt'>Baseline ca Table</a><br />" >> $output
-echo "<a href='baseline_cg.pdf'>Baseline cg PDF</a><br />" >> $output
-echo "<a href='baseline_cg.txt'>Baseline cg Table</a><br />" >> $output
-echo "<a href='baseline_cm.pdf'>Baseline cm PDF</a><br />" >> $output
-echo "<a href='baseline_cm.txt'>Baseline cm Table</a><br />" >> $output
-echo "<a href='IgAT.zip'>IgAT zip</a><br />" >> $output
-echo "<a href='IgAT_ca.zip'>IgAT ca zip</a><br />" >> $output
-echo "<a href='IgAT_cg.zip'>IgAT cg zip</a><br />" >> $output
-echo "<a href='IgAT_cm.zip'>IgAT cm zip</a><br />" >> $output
-echo "<a href='new_IMGT.txz'>Filtered IMGT zip</a><br />" >> $output
-echo "<a href='new_IMGT_ca.txz'>Filtered ca IMGT zip</a><br />" >> $output
-echo "<a href='new_IMGT_cg.txz'>Filtered cg IMGT zip</a><br />" >> $output
-echo "<a href='new_IMGT_cm.txz'>Filtered cm IMGT zip</a><br />" >> $output
+echo "<table border='1' width='700px'>" >> $output
+echo "<tr><td>The complete dataset</td><td><a href='merged.txt'>Download</a></td></tr>" >> $output
+echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt'>Download</a></td></tr>" >> $output
+echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt'>Download</a></td></tr>" >> $output
+echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt'>Download</a></td></tr>" >> $output
+echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt'>Download</a></td></tr>" >> $output
+echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt'>Download</a></td></tr>" >> $output
+echo "<tr><td>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</td><td><a href='sequence_overview/index.html'>Download</a></td></tr>" >> $output
+echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline PDF (<href a='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf'>Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline data</td><td><a href='baseline.txt'>Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline ca PDF</td><td><a href='baseline_ca.pdf'>Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline ca data</td><td><a href='baseline_ca.txt'>Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline cg PDF</td><td><a href='baseline_cg.pdf'>Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline cg data</td><td><a href='baseline_cg.txt'>Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline cm PDF</td><td><a href='baseline_cm.pdf'>Download</a></td></tr>" >> $output
+echo "<tr><td>Baseline cm data</td><td><a href='baseline_cm.txt'>Download</a></td></tr>" >> $output
+#echo "<tr><td></td><td><a href='IgAT.zip'>IgAT zip</a></td></tr>" >> $output
+#echo "<tr><td></td><td><a href='IgAT_ca.zip'>IgAT ca zip</a></td></tr>" >> $output
+#echo "<tr><td></td><td><a href='IgAT_cg.zip'>IgAT cg zip</a></td></tr>" >> $output
+#echo "<tr><td></td><td><a href='IgAT_cm.zip'>IgAT cm zip</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz'>Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered ca sequences</td><td><a href='new_IMGT_ca.txz'>Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered cg sequences</td><td><a href='new_IMGT_cg.txz'>Download</a></td></tr>" >> $output
+echo "<tr><td>An IMGT archive with just the matched and filtered cm sequences</td><td><a href='new_IMGT_cm.txz'>Download</a></td></tr>" >> $output
+echo "</table>" >> $output
 
 echo "</div>" >> $output #downloads tab end
 
@@ -356,7 +367,7 @@
 mkdir $outdir/baseline/ca_cg_cm
 if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then
 	cd $outdir/baseline/ca_cg_cm
-	bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"	
+	bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt"	
 else
 	echo "No sequences" > "$outdir/baseline.txt"	
 fi
@@ -364,7 +375,7 @@
 mkdir $outdir/baseline/ca
 if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then
 	cd $outdir/baseline/ca
-	bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt"
+	bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt"
 else
 	echo "No ca sequences" > "$outdir/baseline_ca.txt"	
 fi
@@ -372,7 +383,7 @@
 mkdir $outdir/baseline/cg
 if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then
 	cd $outdir/baseline/cg
-	bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt"
+	bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt"
 else
 	echo "No cg sequences" > "$outdir/baseline_cg.txt"	
 fi
@@ -380,7 +391,7 @@
 mkdir $outdir/baseline/cm
 if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then
 	cd $outdir/baseline/cm
-	bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt"
+	bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt"
 else
 	echo "No cm sequences" > "$outdir/baseline_cm.txt"	
 fi
@@ -392,14 +403,18 @@
 
 if [[ "$naive_output" != "None" ]]
 then
-	echo "---------------- imgt_loader.r ----------------"
-	echo "---------------- imgt_loader.r ----------------<br />" >> $log
+	#echo "---------------- imgt_loader.r ----------------"
+	#echo "---------------- imgt_loader.r ----------------<br />" >> $log
 	#python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output
-	Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1
+	#Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1
 
-	echo "---------------- naive_output.r ----------------"
-	echo "---------------- naive_output.r ----------------<br />" >> $log
-	Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1
+	#echo "---------------- naive_output.r ----------------"
+	#echo "---------------- naive_output.r ----------------<br />" >> $log
+	#Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1
+	
+	cp $outdir/new_IMGT_ca.txz ${naive_output_ca}
+	cp $outdir/new_IMGT_cg.txz ${naive_output_cg}
+	cp $outdir/new_IMGT_cm.txz ${naive_output_cm}
 fi
 
 echo "</table>" >> $outdir/base_overview.html
author	davidvanzessen
date	Tue, 09 Aug 2016 07:20:41 -0400
parents	ade5cf6fd2dc
children	4984c2a06c43