comparison tools/mira4/mira4.py @ 9:302d13490b23 draft

Uploaded v0.0.2 preview 1, BAM output
author peterjc
date Thu, 28 Nov 2013 05:07:59 -0500
parents 902f01c1084b
children 02350bef2e99
comparison
equal deleted inserted replaced
8:2ab1d6f6a8ec 9:302d13490b23
4 import os 4 import os
5 import sys 5 import sys
6 import subprocess 6 import subprocess
7 import shutil 7 import shutil
8 import time 8 import time
9
10 #Do we need any PYTHONPATH magic?
11 from mira4_make_bam import make_bam
9 12
10 WRAPPER_VER = "0.0.1" #Keep in sync with the XML file 13 WRAPPER_VER = "0.0.1" #Keep in sync with the XML file
11 14
12 def stop_err(msg, err=1): 15 def stop_err(msg, err=1):
13 sys.stderr.write(msg+"\n") 16 sys.stderr.write(msg+"\n")
26 except Exception, err: 29 except Exception, err:
27 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) 30 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
28 sys.exit(1) 31 sys.exit(1)
29 ver, tmp = child.communicate() 32 ver, tmp = child.communicate()
30 del child 33 del child
31 return ver.split("\n", 1)[0] 34 return ver.split("\n", 1)[0].strip()
32 35
33 36
34 try: 37 try:
35 mira_path = os.environ["MIRA4"] 38 mira_path = os.environ["MIRA4"]
36 except ImportError: 39 except ImportError:
37 stop_err("Environment variable $MIRA4 not set") 40 stop_err("Environment variable $MIRA4 not set")
38 mira_binary = os.path.join(mira_path, "mira") 41 mira_binary = os.path.join(mira_path, "mira")
39 if not os.path.isfile(mira_binary): 42 if not os.path.isfile(mira_binary):
40 stop_err("Missing mira under $MIRA4, %r" % mira_binary) 43 stop_err("Missing mira under $MIRA4, %r" % mira_binary)
44 mira_convert = os.path.join(mira_path, "miraconvert")
45 if not os.path.isfile(mira_convert):
46 stop_err("Missing miraconvert under $MIRA4, %r" % mira_convert)
41 47
42 mira_ver = get_version(mira_binary) 48 mira_ver = get_version(mira_binary)
43 if not mira_ver.strip().startswith("4.0"): 49 if not mira_ver.strip().startswith("4.0"):
44 stop_err("This wrapper is for MIRA V4.0, not:\n%s" % mira_ver) 50 stop_err("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_binary))
51 mira_convert_ver = get_version(mira_convert)
52 if not mira_convert_ver.strip().startswith("4.0"):
53 stop_err("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert))
45 if "-v" in sys.argv or "--version" in sys.argv: 54 if "-v" in sys.argv or "--version" in sys.argv:
46 print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER) 55 print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER)
56 if mira_ver != mira_convert_ver:
57 print "WARNING: miraconvert %s" % mira_convert_ver
47 sys.exit(0) 58 sys.exit(0)
48 59
49 def fix_threads(manifest): 60 def fix_threads(manifest):
50 """Tweak the manifest to alter the number of threads.""" 61 """Tweak the manifest to alter the number of threads."""
51 try: 62 try:
76 sys.stderr.write(line) 87 sys.stderr.write(line)
77 sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("="*60, "="*60)) 88 sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("="*60, "="*60))
78 89
79 90
80 def collect_output(temp, name, handle): 91 def collect_output(temp, name, handle):
92 """Moves files to the output filenames (global variables)."""
81 n3 = (temp, name, name, name) 93 n3 = (temp, name, name, name)
82 f = "%s/%s_assembly/%s_d_results" % (temp, name, name) 94 f = "%s/%s_assembly/%s_d_results" % (temp, name, name)
83 if not os.path.isdir(f): 95 if not os.path.isdir(f):
84 log_manifest(manifest) 96 log_manifest(manifest)
85 stop_err("Missing output folder") 97 stop_err("Missing output folder")
93 #Triggered extractLargeContigs.sh? 105 #Triggered extractLargeContigs.sh?
94 old_maf = "%s/%s_LargeContigs_out.maf" % (f, name) 106 old_maf = "%s/%s_LargeContigs_out.maf" % (f, name)
95 107
96 #De novo or single strain mapping, 108 #De novo or single strain mapping,
97 old_fasta = "%s/%s_out.unpadded.fasta" % (f, name) 109 old_fasta = "%s/%s_out.unpadded.fasta" % (f, name)
110 ref_fasta = "%s/%s_out.padded.fasta" % (f, name)
98 if not os.path.isfile(old_fasta): 111 if not os.path.isfile(old_fasta):
99 #Mapping (currently StrainX versus reference) 112 #Mapping (StrainX versus reference) or de novo
100 old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name) 113 old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name)
114 ref_fasta = "%s/%s_out_StrainX.padded.fasta" % (f, name)
101 if not os.path.isfile(old_fasta): 115 if not os.path.isfile(old_fasta):
102 #Triggered extractLargeContigs.sh? 116 old_fasta = "%s/%s_out_ReferenceStrain.unpadded.fasta" % (f, name)
103 old_fasta = "%s/%s_LargeContigs_out.fasta" % (f, name) 117 ref_fasta = "%s/%s_out_ReferenceStrain.padded.fasta" % (f, name)
118
104 119
105 missing = False 120 missing = False
106 for old, new in [(old_maf, out_maf), 121 for old, new in [(old_maf, out_maf),
107 (old_fasta, out_fasta)]: 122 (old_fasta, out_fasta)]:
108 if not os.path.isfile(old): 123 if not os.path.isfile(old):
114 log_manifest(manifest) 129 log_manifest(manifest)
115 sys.stderr.write("Contents of %r:\n" % f) 130 sys.stderr.write("Contents of %r:\n" % f)
116 for filename in sorted(os.listdir(f)): 131 for filename in sorted(os.listdir(f)):
117 sys.stderr.write("%s\n" % filename) 132 sys.stderr.write("%s\n" % filename)
118 133
134 #For mapping mode, probably most people would expect a BAM file
135 #using the reference FASTA file...
136 msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle)
137 if msg:
138 stop_err(msg)
139
119 def clean_up(temp, name): 140 def clean_up(temp, name):
120 folder = "%s/%s_assembly" % (temp, name) 141 folder = "%s/%s_assembly" % (temp, name)
121 if os.path.isdir(folder): 142 if os.path.isdir(folder):
122 shutil.rmtree(folder) 143 shutil.rmtree(folder)
123 144
125 #Currently Galaxy puts us somewhere safe like: 146 #Currently Galaxy puts us somewhere safe like:
126 #/opt/galaxy-dist/database/job_working_directory/846/ 147 #/opt/galaxy-dist/database/job_working_directory/846/
127 temp = "." 148 temp = "."
128 #name, out_fasta, out_qual, out_ace, out_caf, out_wig, out_log = sys.argv[1:8] 149 #name, out_fasta, out_qual, out_ace, out_caf, out_wig, out_log = sys.argv[1:8]
129 name = "MIRA" 150 name = "MIRA"
130 manifest, out_maf, out_fasta, out_log = sys.argv[1:5] 151 manifest, out_maf, out_bam, out_fasta, out_log = sys.argv[1:]
131 152
132 fix_threads(manifest) 153 fix_threads(manifest)
133 154
134 start_time = time.time() 155 start_time = time.time()
135 #cmd_list =sys.argv[8:] 156 #cmd_list =sys.argv[8:]
178 run_time = time.time() - start_time 199 run_time = time.time() - start_time
179 return_code = child.returncode 200 return_code = child.returncode
180 handle.write("\n") 201 handle.write("\n")
181 handle.write("============================ MIRA has finished ===============================\n") 202 handle.write("============================ MIRA has finished ===============================\n")
182 handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0)) 203 handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0))
183 print "MIRA took %0.2f hours" % (run_time / 3600.0)
184 if return_code: 204 if return_code:
205 print "MIRA took %0.2f hours" % (run_time / 3600.0)
185 handle.write("Return error code %i from command:\n" % return_code) 206 handle.write("Return error code %i from command:\n" % return_code)
186 handle.write(cmd + "\n") 207 handle.write(cmd + "\n")
187 handle.close() 208 handle.close()
188 clean_up(temp, name) 209 clean_up(temp, name)
189 log_manifest(manifest) 210 log_manifest(manifest)
200 e.close() 221 e.close()
201 handle.write("============================ (end of ec.log) =================================\n") 222 handle.write("============================ (end of ec.log) =================================\n")
202 handle.flush() 223 handle.flush()
203 224
204 #print "Collecting output..." 225 #print "Collecting output..."
226 start_time = time.time()
205 collect_output(temp, name, handle) 227 collect_output(temp, name, handle)
228 collect_time = time.time() - start_time
229 handle.write("MIRA took %0.2f hours; collecting output %0.2f minutes\n" % (run_time / 3600.0, collect_time / 60.0))
230 print("MIRA took %0.2f hours; collecting output %0.2f minutes\n" % (run_time / 3600.0, collect_time / 60.0))
206 231
207 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): 232 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"):
208 #Treat as an error, but doing this AFTER collect_output 233 #Treat as an error, but doing this AFTER collect_output
209 sys.stderr.write("Extract Large Contigs failed\n") 234 sys.stderr.write("Extract Large Contigs failed\n")
210 handle.write("Extract Large Contigs failed\n") 235 handle.write("Extract Large Contigs failed\n")
212 sys.exit(1) 237 sys.exit(1)
213 238
214 #print "Cleaning up..." 239 #print "Cleaning up..."
215 clean_up(temp, name) 240 clean_up(temp, name)
216 241
242 handle.write("\nDone\n")
217 handle.close() 243 handle.close()
218 print("Done") 244 print("Done")