Mercurial > repos > davidvanzessen > mutation_analysis
diff wrapper.sh @ 114:e7b550d52eb7 draft
Uploaded
author | davidvanzessen |
---|---|
date | Tue, 09 Aug 2016 07:20:41 -0400 |
parents | ade5cf6fd2dc |
children | 4984c2a06c43 |
line wrap: on
line diff
--- a/wrapper.sh Thu Aug 04 04:52:51 2016 -0400 +++ b/wrapper.sh Tue Aug 09 07:20:41 2016 -0400 @@ -15,6 +15,7 @@ naive_output_cm=${11} filter_unique=${12} class_filter=${13} +empty_region_filter=${14} mkdir $outdir tar -xzf $dir/style.tar.gz -C $outdir @@ -80,7 +81,7 @@ echo "---------------- merge_and_filter.r ----------------" echo "---------------- merge_and_filter.r ----------------<br />" >> $log -Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} 2>&1 +Rscript $dir/merge_and_filter.r $PWD/summary.txt $PWD/sequences.txt $PWD/mutationanalysis.txt $PWD/mutationstats.txt $PWD/hotspots.txt $outdir/identified_genes.txt $outdir/merged.txt $outdir/before_unique_filter.txt $outdir/unmatched.txt $method $functionality $unique ${filter_unique} ${class_filter} ${empty_region_filter} 2>&1 echo "---------------- creating new IMGT zip ----------------" echo "---------------- creating new IMGT zip ----------------<br />" >> $log @@ -107,32 +108,24 @@ mkdir $outdir/new_IMGT_cm cp $outdir/new_IMGT/* $outdir/new_IMGT_cm -Rscript $dir/tmp/igat.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1 -Rscript $dir/tmp/igat.r $outdir/new_IMGT_ca/ $outdir/merged.txt "ca" 2>&1 -Rscript $dir/tmp/igat.r $outdir/new_IMGT_cg/ $outdir/merged.txt "cg" 2>&1 -Rscript $dir/tmp/igat.r $outdir/new_IMGT_cm/ $outdir/merged.txt "cm" 2>&1 +Rscript $dir/new_imgt.r $outdir/new_IMGT/ $outdir/merged.txt "-" 2>&1 +Rscript $dir/new_imgt.r $outdir/new_IMGT_ca/ $outdir/merged.txt "ca" 2>&1 +Rscript $dir/new_imgt.r $outdir/new_IMGT_cg/ $outdir/merged.txt "cg" 2>&1 +Rscript $dir/new_imgt.r $outdir/new_IMGT_cm/ $outdir/merged.txt "cm" 2>&1 tmp="$PWD" cd $outdir/new_IMGT/ #tar weirdness... tar -cJf ../new_IMGT.txz * -cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT/IgAT.xlsm -zip -r ../IgAT.zip * cd $outdir/new_IMGT_ca/ tar -cJf ../new_IMGT_ca.txz * -cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT_ca/IgAT.xlsm -zip -r ../IgAT_ca.zip * cd $outdir/new_IMGT_cg/ tar -cJf ../new_IMGT_cg.txz * -cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT_cg/IgAT.xlsm -zip -r ../IgAT_cg.zip * cd $outdir/new_IMGT_cm/ tar -cJf ../new_IMGT_cm.txz * -cp $dir/tmp/IgAT.xlsm $outdir/new_IMGT_cm/IgAT.xlsm -zip -r ../IgAT_cm.zip * cd $tmp @@ -219,17 +212,17 @@ echo "<th><a href='matched_${gene}_${func}.txt'>${gene} (N = $tmp)</a></th>" >> $output done + tmp=`cat $outdir/all_${func}_n.txt` + echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output tmp=`cat $outdir/unmatched_${func}_n.txt` echo "<th><a href='unmatched.txt'>unmatched (N = ${unmatched_count})</a></th>" >> $output - tmp=`cat $outdir/all_${func}_n.txt` - echo "<th><a href='matched_all_${func}.txt'>all (N = $tmp)</a></th>" >> $output while IFS=, read name cax cay caz ca1x ca1y ca1z ca2x ca2y ca2z cgx cgy cgz cg1x cg1y cg1z cg2x cg2y cg2z cg3x cg3y cg3z cg4x cg4y cg4z cmx cmy cmz unx uny unz allx ally allz do if [ "$name" == "FR S/R (ratio)" ] || [ "$name" == "CDR S/R (ratio)" ] ; then #meh echo "<tr><td>$name</td><td>${cax}/${cay} (${caz})</td><td>${ca1x}/${ca1y} (${ca1z})</td><td>${ca2x}/${ca2y} (${ca2z})</td><td>${cgx}/${cgy} (${cgz})</td><td>${cg1x}/${cg1y} (${cg1z})</td><td>${cg2x}/${cg2y} (${cg2z})</td><td>${cg3x}/${cg3y} (${cg3z})</td><td>${cg4x}/${cg4y} (${cg4z})</td><td>${cmx}/${cmy} (${cmz})</td><td>${allx}/${ally} (${allz})</td></tr>" >> $output else - echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${unx}/${uny} (${unz}%)</td><td>${allx}/${ally} (${allz}%)</td></tr>" >> $output + echo "<tr><td>$name</td><td>${cax}/${cay} (${caz}%)</td><td>${ca1x}/${ca1y} (${ca1z}%)</td><td>${ca2x}/${ca2y} (${ca2z}%)</td><td>${cgx}/${cgy} (${cgz}%)</td><td>${cg1x}/${cg1y} (${cg1z}%)</td><td>${cg2x}/${cg2y} (${cg2z}%)</td><td>${cg3x}/${cg3y} (${cg3z}%)</td><td>${cg4x}/${cg4y} (${cg4z}%)</td><td>${cmx}/${cmy} (${cmz}%)</td><td>${allx}/${ally} (${allz}%)</td><td>${unx}/${uny} (${unz}%)</td></tr>" >> $output fi done < $outdir/data_${func}.txt echo "</table>" >> $output @@ -259,21 +252,37 @@ echo "<div class='tabbertab' title='Transition tables'>" >> $output +echo "<table border='0'>" >> $output + for gene in ${genes[@]} do - echo "<table border='1'><caption>$gene transition table</caption>" >> $output + echo "<tr>" >> $output + echo "<td><h1>${gene}</h1></td>" >> $output + echo "<td><img src='transitions_heatmap_${gene}.png' /></td>" >> $output + echo "<td><img src='transitions_stacked_${gene}.png' /></td>" >> $output + echo "<td><table border='1'>" >> $output while IFS=, read from a c g t do echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output done < $outdir/transitions_${gene}_sum.txt - echo "</table>" >> $output + echo "</table></td>" >> $output + + echo "</tr>" >> $output done -echo "<table border='1'><caption>All transition table</caption>" >> $output +echo "<tr>" >> $output +echo "<td><h1>All</h1></td>" >> $output +echo "<td><img src='transitions_heatmap_all.png' /></td>" >> $output +echo "<td><img src='transitions_stacked_all.png' /></td>" >> $output +echo "<td><table border='1'>" >> $output while IFS=, read from a c g t do echo "<tr><td>$from</td><td>$a</td><td>$c</td><td>$g</td><td>$t</td></tr>" >> $output done < $outdir/transitions_all_sum.txt +echo "</table></td>" >> $output + +echo "</tr>" >> $output + echo "</table>" >> $output echo "</div>" >> $output #transition tables tab end @@ -315,30 +324,32 @@ echo "<div class='tabbertab' title='Downloads'>" >> $output -echo "<a href='unmatched.txt'>unmatched</a><br />" >> $output -echo "<a href='motif_per_seq.txt'>motif per sequence</a><br />" >> $output -echo "<a href='merged.txt'>all data</a><br />" >> $output -echo "<a href='mutation_by_id.txt'>mutations by id</a><br />" >> $output -echo "<a href='aa_id_mutations.txt'>AA mutations location by id</a><br />" >> $output -echo "<a href='absent_aa_id.txt'>Absant AA locations by id</a><br />" >> $output -echo "<a href='sequence_overview/index.html'>Sequence Overview</a><br />" >> $output -echo "<a href='base_overview.html'>Base overview</a><br />" >> $output -echo "<a href='baseline.pdf'>Baseline PDF</a><br />" >> $output -echo "<a href='baseline.txt'>Baseline Table</a><br />" >> $output -echo "<a href='baseline_ca.pdf'>Baseline ca PDF</a><br />" >> $output -echo "<a href='baseline_ca.txt'>Baseline ca Table</a><br />" >> $output -echo "<a href='baseline_cg.pdf'>Baseline cg PDF</a><br />" >> $output -echo "<a href='baseline_cg.txt'>Baseline cg Table</a><br />" >> $output -echo "<a href='baseline_cm.pdf'>Baseline cm PDF</a><br />" >> $output -echo "<a href='baseline_cm.txt'>Baseline cm Table</a><br />" >> $output -echo "<a href='IgAT.zip'>IgAT zip</a><br />" >> $output -echo "<a href='IgAT_ca.zip'>IgAT ca zip</a><br />" >> $output -echo "<a href='IgAT_cg.zip'>IgAT cg zip</a><br />" >> $output -echo "<a href='IgAT_cm.zip'>IgAT cm zip</a><br />" >> $output -echo "<a href='new_IMGT.txz'>Filtered IMGT zip</a><br />" >> $output -echo "<a href='new_IMGT_ca.txz'>Filtered ca IMGT zip</a><br />" >> $output -echo "<a href='new_IMGT_cg.txz'>Filtered cg IMGT zip</a><br />" >> $output -echo "<a href='new_IMGT_cm.txz'>Filtered cm IMGT zip</a><br />" >> $output +echo "<table border='1' width='700px'>" >> $output +echo "<tr><td>The complete dataset</td><td><a href='merged.txt'>Download</a></td></tr>" >> $output +echo "<tr><td>The alignment info on the unmatched sequences</td><td><a href='unmatched.txt'>Download</a></td></tr>" >> $output +echo "<tr><td>Motif data per sequence ID</td><td><a href='motif_per_seq.txt'>Download</a></td></tr>" >> $output +echo "<tr><td>Mutation data per sequence ID</td><td><a href='mutation_by_id.txt'>Download</a></td></tr>" >> $output +echo "<tr><td>AA mutation data per sequence ID</td><td><a href='aa_id_mutations.txt'>Download</a></td></tr>" >> $output +echo "<tr><td>Absent AA location data per sequence ID</td><td><a href='absent_aa_id.txt'>Download</a></td></tr>" >> $output +echo "<tr><td>CDR1+FR2+CDR2+FR3+CDR3 sequences that show up more than once</td><td><a href='sequence_overview/index.html'>Download</a></td></tr>" >> $output +echo "<tr><td>Base count for every sequence</td><td><a href='base_overview.html'>Download</a></td></tr>" >> $output +echo "<tr><td>Baseline PDF (<href a='http://selection.med.yale.edu/baseline/'>http://selection.med.yale.edu/baseline/</a>)</td><td><a href='baseline.pdf'>Download</a></td></tr>" >> $output +echo "<tr><td>Baseline data</td><td><a href='baseline.txt'>Download</a></td></tr>" >> $output +echo "<tr><td>Baseline ca PDF</td><td><a href='baseline_ca.pdf'>Download</a></td></tr>" >> $output +echo "<tr><td>Baseline ca data</td><td><a href='baseline_ca.txt'>Download</a></td></tr>" >> $output +echo "<tr><td>Baseline cg PDF</td><td><a href='baseline_cg.pdf'>Download</a></td></tr>" >> $output +echo "<tr><td>Baseline cg data</td><td><a href='baseline_cg.txt'>Download</a></td></tr>" >> $output +echo "<tr><td>Baseline cm PDF</td><td><a href='baseline_cm.pdf'>Download</a></td></tr>" >> $output +echo "<tr><td>Baseline cm data</td><td><a href='baseline_cm.txt'>Download</a></td></tr>" >> $output +#echo "<tr><td></td><td><a href='IgAT.zip'>IgAT zip</a></td></tr>" >> $output +#echo "<tr><td></td><td><a href='IgAT_ca.zip'>IgAT ca zip</a></td></tr>" >> $output +#echo "<tr><td></td><td><a href='IgAT_cg.zip'>IgAT cg zip</a></td></tr>" >> $output +#echo "<tr><td></td><td><a href='IgAT_cm.zip'>IgAT cm zip</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered sequences</td><td><a href='new_IMGT.txz'>Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered ca sequences</td><td><a href='new_IMGT_ca.txz'>Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered cg sequences</td><td><a href='new_IMGT_cg.txz'>Download</a></td></tr>" >> $output +echo "<tr><td>An IMGT archive with just the matched and filtered cm sequences</td><td><a href='new_IMGT_cm.txz'>Download</a></td></tr>" >> $output +echo "</table>" >> $output echo "</div>" >> $output #downloads tab end @@ -356,7 +367,7 @@ mkdir $outdir/baseline/ca_cg_cm if [[ $(wc -l < $outdir/new_IMGT/1_Summary.txt) -gt "1" ]]; then cd $outdir/baseline/ca_cg_cm - bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT.txz "ca_cg_cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline.pdf" "Sequence.ID" "$outdir/baseline.txt" else echo "No sequences" > "$outdir/baseline.txt" fi @@ -364,7 +375,7 @@ mkdir $outdir/baseline/ca if [[ $(wc -l < $outdir/new_IMGT_ca/1_Summary.txt) -gt "1" ]]; then cd $outdir/baseline/ca - bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt" + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_ca.txz "ca" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_ca.pdf" "Sequence.ID" "$outdir/baseline_ca.txt" else echo "No ca sequences" > "$outdir/baseline_ca.txt" fi @@ -372,7 +383,7 @@ mkdir $outdir/baseline/cg if [[ $(wc -l < $outdir/new_IMGT_cg/1_Summary.txt) -gt "1" ]]; then cd $outdir/baseline/cg - bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt" + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cg.txz "cg" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cg.pdf" "Sequence.ID" "$outdir/baseline_cg.txt" else echo "No cg sequences" > "$outdir/baseline_cg.txt" fi @@ -380,7 +391,7 @@ mkdir $outdir/baseline/cm if [[ $(wc -l < $outdir/new_IMGT_cm/1_Summary.txt) -gt "1" ]]; then cd $outdir/baseline/cm - bash $dir/tmp/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/tmp/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt" + bash $dir/baseline/wrapper.sh 1 1 1 1 0 0 "25:26:38:55:65:104:-" $outdir/new_IMGT_cm.txz "cm" "$dir/baseline/IMGT-reference-seqs-IGHV-2015-11-05.fa" "$outdir/baseline_cm.pdf" "Sequence.ID" "$outdir/baseline_cm.txt" else echo "No cm sequences" > "$outdir/baseline_cm.txt" fi @@ -392,14 +403,18 @@ if [[ "$naive_output" != "None" ]] then - echo "---------------- imgt_loader.r ----------------" - echo "---------------- imgt_loader.r ----------------<br />" >> $log + #echo "---------------- imgt_loader.r ----------------" + #echo "---------------- imgt_loader.r ----------------<br />" >> $log #python $dir/imgt_loader.py --summ $PWD/summary.txt --aa $PWD/aa.txt --junction $PWD/junction.txt --output $naive_output - Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1 + #Rscript --verbose $dir/imgt_loader.r $PWD/summary.txt $PWD/aa.txt $PWD/junction.txt $outdir/loader_output.txt 2>&1 - echo "---------------- naive_output.r ----------------" - echo "---------------- naive_output.r ----------------<br />" >> $log - Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1 + #echo "---------------- naive_output.r ----------------" + #echo "---------------- naive_output.r ----------------<br />" >> $log + #Rscript $dir/naive_output.r $outdir/loader_output.txt $outdir/merged.txt ${naive_output_ca} ${naive_output_cg} ${naive_output_cm} $outdir/ntoverview.txt $outdir/ntsum.txt 2>&1 + + cp $outdir/new_IMGT_ca.txz ${naive_output_ca} + cp $outdir/new_IMGT_cg.txz ${naive_output_cg} + cp $outdir/new_IMGT_cm.txz ${naive_output_cm} fi echo "</table>" >> $outdir/base_overview.html