diff tools/mira4/mira4.py @ 9:302d13490b23 draft

Uploaded v0.0.2 preview 1, BAM output
author peterjc
date Thu, 28 Nov 2013 05:07:59 -0500
parents 902f01c1084b
children 02350bef2e99
line wrap: on
line diff
--- a/tools/mira4/mira4.py	Mon Oct 28 05:44:46 2013 -0400
+++ b/tools/mira4/mira4.py	Thu Nov 28 05:07:59 2013 -0500
@@ -7,6 +7,9 @@
 import shutil
 import time
 
+#Do we need any PYTHONPATH magic?
+from mira4_make_bam import make_bam
+
 WRAPPER_VER = "0.0.1" #Keep in sync with the XML file
 
 def stop_err(msg, err=1):
@@ -28,7 +31,7 @@
         sys.exit(1)
     ver, tmp = child.communicate()
     del child
-    return ver.split("\n", 1)[0]
+    return ver.split("\n", 1)[0].strip()
 
 
 try:
@@ -38,12 +41,20 @@
 mira_binary = os.path.join(mira_path, "mira")
 if not os.path.isfile(mira_binary):
     stop_err("Missing mira under $MIRA4, %r" % mira_binary)
+mira_convert = os.path.join(mira_path, "miraconvert")
+if not os.path.isfile(mira_convert):
+    stop_err("Missing miraconvert under $MIRA4, %r" % mira_convert)
 
 mira_ver = get_version(mira_binary)
 if not mira_ver.strip().startswith("4.0"):
-    stop_err("This wrapper is for MIRA V4.0, not:\n%s" % mira_ver)
+    stop_err("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_binary))
+mira_convert_ver = get_version(mira_convert)
+if not mira_convert_ver.strip().startswith("4.0"):
+    stop_err("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert))
 if "-v" in sys.argv or "--version" in sys.argv:
     print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER)
+    if mira_ver != mira_convert_ver:
+        print "WARNING: miraconvert %s" % mira_convert_ver
     sys.exit(0)
 
 def fix_threads(manifest):
@@ -78,6 +89,7 @@
 
 
 def collect_output(temp, name, handle):
+    """Moves files to the output filenames (global variables)."""
     n3 = (temp, name, name, name)
     f = "%s/%s_assembly/%s_d_results" % (temp, name, name)
     if not os.path.isdir(f):
@@ -95,12 +107,15 @@
 
     #De novo or single strain mapping,
     old_fasta = "%s/%s_out.unpadded.fasta" % (f, name)
+    ref_fasta = "%s/%s_out.padded.fasta" % (f, name)
     if not os.path.isfile(old_fasta):
-        #Mapping (currently StrainX versus reference)
+        #Mapping (StrainX versus reference) or de novo
         old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name)
+        ref_fasta = "%s/%s_out_StrainX.padded.fasta" % (f, name)
     if not os.path.isfile(old_fasta):
-        #Triggered extractLargeContigs.sh?
-        old_fasta = "%s/%s_LargeContigs_out.fasta" % (f, name)
+        old_fasta = "%s/%s_out_ReferenceStrain.unpadded.fasta" % (f, name)
+        ref_fasta = "%s/%s_out_ReferenceStrain.padded.fasta" % (f, name)
+        
 
     missing = False
     for old, new in [(old_maf, out_maf),
@@ -116,6 +131,12 @@
         for filename in sorted(os.listdir(f)):
             sys.stderr.write("%s\n" % filename)
 
+    #For mapping mode, probably most people would expect a BAM file
+    #using the reference FASTA file...
+    msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle)
+    if msg:
+        stop_err(msg)
+
 def clean_up(temp, name):
     folder = "%s/%s_assembly" % (temp, name)
     if os.path.isdir(folder):
@@ -127,7 +148,7 @@
 temp = "."
 #name, out_fasta, out_qual, out_ace, out_caf, out_wig, out_log = sys.argv[1:8]
 name = "MIRA"
-manifest, out_maf, out_fasta, out_log = sys.argv[1:5]
+manifest, out_maf, out_bam, out_fasta, out_log = sys.argv[1:]
 
 fix_threads(manifest)
 
@@ -180,8 +201,8 @@
 handle.write("\n")
 handle.write("============================ MIRA has finished ===============================\n")
 handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0))
-print "MIRA took %0.2f hours" % (run_time / 3600.0)
 if return_code:
+    print "MIRA took %0.2f hours" % (run_time / 3600.0)
     handle.write("Return error code %i from command:\n" % return_code)
     handle.write(cmd + "\n")
     handle.close()
@@ -202,7 +223,11 @@
     handle.flush()
 
 #print "Collecting output..."
+start_time = time.time()
 collect_output(temp, name, handle)
+collect_time = time.time() - start_time
+handle.write("MIRA took %0.2f hours; collecting output %0.2f minutes\n" % (run_time / 3600.0, collect_time / 60.0))
+print("MIRA took %0.2f hours; collecting output %0.2f minutes\n" % (run_time / 3600.0, collect_time / 60.0))
 
 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"):
     #Treat as an error, but doing this AFTER collect_output
@@ -214,5 +239,6 @@
 #print "Cleaning up..."
 clean_up(temp, name)
 
+handle.write("\nDone\n")
 handle.close()
 print("Done")