Mercurial > repos > peterjc > mira4_assembler
comparison tools/mira4_0/mira4.py @ 32:56b421d59805 draft
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
| author | peterjc |
|---|---|
| date | Thu, 18 May 2017 13:36:08 -0400 |
| parents | fd95aaef8818 |
| children | 0785a6537f3e |
comparison
equal
deleted
inserted
replaced
| 31:fd95aaef8818 | 32:56b421d59805 |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 """A simple wrapper script to call MIRA and collect its output. | 2 """A simple wrapper script to call MIRA and collect its output. |
| 3 """ | 3 """ |
| 4 | |
| 5 from __future__ import print_function | |
| 6 | |
| 4 import os | 7 import os |
| 8 import shutil | |
| 9 import subprocess | |
| 5 import sys | 10 import sys |
| 6 import subprocess | 11 import tempfile |
| 7 import shutil | |
| 8 import time | 12 import time |
| 9 import tempfile | 13 |
| 10 from optparse import OptionParser | 14 from optparse import OptionParser |
| 11 | 15 |
| 12 #Do we need any PYTHONPATH magic? | 16 # Do we need any PYTHONPATH magic? |
| 13 from mira4_make_bam import make_bam | 17 from mira4_make_bam import make_bam |
| 14 | 18 |
| 15 WRAPPER_VER = "0.0.4" #Keep in sync with the XML file | 19 WRAPPER_VER = "0.0.10" # Keep in sync with the XML file |
| 16 | 20 |
| 17 | 21 |
| 18 def get_version(mira_binary): | 22 def get_version(mira_binary): |
| 19 """Run MIRA to find its version number""" | 23 """Run MIRA to find its version number.""" |
| 20 # At the commend line I would use: mira -v | head -n 1 | 24 # At the commend line I would use: mira -v | head -n 1 |
| 21 # however there is some pipe error when doing that here. | 25 # however there is some pipe error when doing that here. |
| 22 cmd = [mira_binary, "-v"] | 26 cmd = [mira_binary, "-v"] |
| 23 try: | 27 try: |
| 24 child = subprocess.Popen(cmd, | 28 child = subprocess.Popen(cmd, |
| 25 stdout=subprocess.PIPE, | 29 stdout=subprocess.PIPE, |
| 26 stderr=subprocess.STDOUT) | 30 stderr=subprocess.STDOUT) |
| 27 except Exception, err: | 31 except Exception as err: |
| 28 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) | 32 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) |
| 29 sys.exit(1) | 33 sys.exit(1) |
| 30 ver, tmp = child.communicate() | 34 ver, tmp = child.communicate() |
| 31 del child | 35 del child |
| 32 return ver.split("\n", 1)[0].strip() | 36 return ver.split("\n", 1)[0].strip() |
| 33 | 37 |
| 34 #Parse Command Line | 38 |
| 39 # Parse Command Line | |
| 35 usage = """Galaxy MIRA4 wrapper script v%s - use as follows: | 40 usage = """Galaxy MIRA4 wrapper script v%s - use as follows: |
| 36 | 41 |
| 37 $ python mira4.py ... | 42 $ python mira4.py ... |
| 38 | 43 |
| 39 This will run the MIRA binary and collect its output files as directed. | 44 This will run the MIRA binary and collect its output files as directed. |
| 82 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_binary)) | 87 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_binary)) |
| 83 mira_convert_ver = get_version(mira_convert) | 88 mira_convert_ver = get_version(mira_convert) |
| 84 if not mira_convert_ver.strip().startswith("4.0"): | 89 if not mira_convert_ver.strip().startswith("4.0"): |
| 85 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert)) | 90 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert)) |
| 86 if options.version: | 91 if options.version: |
| 87 print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER) | 92 print("%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER)) |
| 88 if mira_ver != mira_convert_ver: | 93 if mira_ver != mira_convert_ver: |
| 89 print "WARNING: miraconvert %s" % mira_convert_ver | 94 print("WARNING: miraconvert %s" % mira_convert_ver) |
| 90 sys.exit(0) | 95 sys.exit(0) |
| 91 | 96 |
| 92 if not manifest: | 97 if not manifest: |
| 93 sys.exit("Manifest is required") | 98 sys.exit("Manifest is required") |
| 94 elif not os.path.isfile(manifest): | 99 elif not os.path.isfile(manifest): |
| 119 """ | 124 """ |
| 120 handle = open(manifest, "r") | 125 handle = open(manifest, "r") |
| 121 text = handle.read() | 126 text = handle.read() |
| 122 handle.close() | 127 handle.close() |
| 123 | 128 |
| 124 #At time of writing, this is at the end of a file, | 129 # At time of writing, this is at the end of a file, |
| 125 #but could be followed by a space in future... | 130 # but could be followed by a space in future... |
| 126 text = text.replace("-DI:trt=/tmp", "-DI:trt=" + tempfile.gettempdir()) | 131 text = text.replace("-DI:trt=/tmp", "-DI:trt=" + tempfile.gettempdir()) |
| 127 | 132 |
| 128 #Want to try to ensure this gets written to disk before MIRA attempts | 133 # Want to try to ensure this gets written to disk before MIRA attempts |
| 129 #to open it - any networked file system may impose a delay... | 134 # to open it - any networked file system may impose a delay... |
| 130 handle = open(manifest, "w") | 135 handle = open(manifest, "w") |
| 131 handle.write(text) | 136 handle.write(text) |
| 132 handle.flush() | 137 handle.flush() |
| 133 os.fsync(handle.fileno()) | 138 os.fsync(handle.fileno()) |
| 134 handle.close() | 139 handle.close() |
| 135 | 140 |
| 136 | 141 |
| 137 def log_manifest(manifest): | 142 def log_manifest(manifest): |
| 138 """Write the manifest file to stderr.""" | 143 """Write the manifest file to stderr.""" |
| 139 sys.stderr.write("\n%s\nManifest file\n%s\n" % ("="*60, "="*60)) | 144 sys.stderr.write("\n%s\nManifest file\n%s\n" % ("=" * 60, "=" * 60)) |
| 140 with open(manifest) as h: | 145 with open(manifest) as h: |
| 141 for line in h: | 146 for line in h: |
| 142 sys.stderr.write(line) | 147 sys.stderr.write(line) |
| 143 sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("="*60, "="*60)) | 148 sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("=" * 60, "=" * 60)) |
| 144 | 149 |
| 145 | 150 |
| 146 def collect_output(temp, name, handle): | 151 def collect_output(temp, name, handle): |
| 147 """Moves files to the output filenames (global variables).""" | 152 """Moves files to the output filenames (global variables).""" |
| 148 f = "%s/%s_assembly/%s_d_results" % (temp, name, name) | 153 f = "%s/%s_assembly/%s_d_results" % (temp, name, name) |
| 154 sys.exit("Empty output folder") | 159 sys.exit("Empty output folder") |
| 155 missing = [] | 160 missing = [] |
| 156 | 161 |
| 157 old_maf = "%s/%s_out.maf" % (f, name) | 162 old_maf = "%s/%s_out.maf" % (f, name) |
| 158 if not os.path.isfile(old_maf): | 163 if not os.path.isfile(old_maf): |
| 159 #Triggered extractLargeContigs.sh? | 164 # Triggered extractLargeContigs.sh? |
| 160 old_maf = "%s/%s_LargeContigs_out.maf" % (f, name) | 165 old_maf = "%s/%s_LargeContigs_out.maf" % (f, name) |
| 161 | 166 |
| 162 #De novo or single strain mapping, | 167 # De novo or single strain mapping, |
| 163 old_fasta = "%s/%s_out.unpadded.fasta" % (f, name) | 168 old_fasta = "%s/%s_out.unpadded.fasta" % (f, name) |
| 164 ref_fasta = "%s/%s_out.padded.fasta" % (f, name) | 169 ref_fasta = "%s/%s_out.padded.fasta" % (f, name) |
| 165 if not os.path.isfile(old_fasta): | 170 if not os.path.isfile(old_fasta): |
| 166 #Mapping (StrainX versus reference) or de novo | 171 # Mapping (StrainX versus reference) or de novo |
| 167 old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name) | 172 old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name) |
| 168 ref_fasta = "%s/%s_out_StrainX.padded.fasta" % (f, name) | 173 ref_fasta = "%s/%s_out_StrainX.padded.fasta" % (f, name) |
| 169 if not os.path.isfile(old_fasta): | 174 if not os.path.isfile(old_fasta): |
| 170 old_fasta = "%s/%s_out_ReferenceStrain.unpadded.fasta" % (f, name) | 175 old_fasta = "%s/%s_out_ReferenceStrain.unpadded.fasta" % (f, name) |
| 171 ref_fasta = "%s/%s_out_ReferenceStrain.padded.fasta" % (f, name) | 176 ref_fasta = "%s/%s_out_ReferenceStrain.padded.fasta" % (f, name) |
| 172 | |
| 173 | 177 |
| 174 missing = False | 178 missing = False |
| 175 for old, new in [(old_maf, out_maf), | 179 for old, new in [(old_maf, out_maf), |
| 176 (old_fasta, out_fasta)]: | 180 (old_fasta, out_fasta)]: |
| 177 if not os.path.isfile(old): | 181 if not os.path.isfile(old): |
| 185 log_manifest(manifest) | 189 log_manifest(manifest) |
| 186 sys.stderr.write("Contents of %r:\n" % f) | 190 sys.stderr.write("Contents of %r:\n" % f) |
| 187 for filename in sorted(os.listdir(f)): | 191 for filename in sorted(os.listdir(f)): |
| 188 sys.stderr.write("%s\n" % filename) | 192 sys.stderr.write("%s\n" % filename) |
| 189 | 193 |
| 190 #For mapping mode, probably most people would expect a BAM file | 194 # For mapping mode, probably most people would expect a BAM file |
| 191 #using the reference FASTA file... | 195 # using the reference FASTA file... |
| 192 if out_bam and out_bam != "-": | 196 if out_bam and out_bam != "-": |
| 193 if out_maf and out_maf != "-": | 197 if out_maf and out_maf != "-": |
| 194 msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle) | 198 msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle) |
| 195 else: | 199 else: |
| 196 #Not collecting the MAF file, use original location | 200 # Not collecting the MAF file, use original location |
| 197 msg = make_bam(mira_convert, old_maf, ref_fasta, out_bam, handle) | 201 msg = make_bam(mira_convert, old_maf, ref_fasta, out_bam, handle) |
| 198 if msg: | 202 if msg: |
| 199 sys.exit(msg) | 203 sys.exit(msg) |
| 204 | |
| 200 | 205 |
| 201 def clean_up(temp, name): | 206 def clean_up(temp, name): |
| 202 folder = "%s/%s_assembly" % (temp, name) | 207 folder = "%s/%s_assembly" % (temp, name) |
| 203 if os.path.isdir(folder): | 208 if os.path.isdir(folder): |
| 204 shutil.rmtree(folder) | 209 shutil.rmtree(folder) |
| 205 | 210 |
| 206 #TODO - Run MIRA in /tmp or a configurable directory? | 211 |
| 207 #Currently Galaxy puts us somewhere safe like: | 212 # TODO - Run MIRA in /tmp or a configurable directory? |
| 208 #/opt/galaxy-dist/database/job_working_directory/846/ | 213 # Currently Galaxy puts us somewhere safe like: |
| 214 # /opt/galaxy-dist/database/job_working_directory/846/ | |
| 209 temp = "." | 215 temp = "." |
| 210 | 216 |
| 211 name = "MIRA" | 217 name = "MIRA" |
| 212 | 218 |
| 213 override_temp(manifest) | 219 override_temp(manifest) |
| 216 cmd_list = [mira_binary, "-t", str(threads), manifest] | 222 cmd_list = [mira_binary, "-t", str(threads), manifest] |
| 217 cmd = " ".join(cmd_list) | 223 cmd = " ".join(cmd_list) |
| 218 | 224 |
| 219 assert os.path.isdir(temp) | 225 assert os.path.isdir(temp) |
| 220 d = "%s_assembly" % name | 226 d = "%s_assembly" % name |
| 221 #This can fail on my development machine if stale folders exist | 227 # This can fail on my development machine if stale folders exist |
| 222 #under Galaxy's .../database/job_working_directory/ tree: | 228 # under Galaxy's .../database/job_working_directory/ tree: |
| 223 assert not os.path.isdir(d), "Path %r already exists:\n%s" % (d, os.path.abspath(d)) | 229 assert not os.path.isdir(d), "Path %r already exists:\n%s" % (d, os.path.abspath(d)) |
| 224 try: | 230 try: |
| 225 #Check path access | 231 # Check path access |
| 226 os.mkdir(d) | 232 os.mkdir(d) |
| 227 except Exception, err: | 233 except Exception as err: |
| 228 log_manifest(manifest) | 234 log_manifest(manifest) |
| 229 sys.stderr.write("Error making directory %s\n%s" % (d, err)) | 235 sys.stderr.write("Error making directory %s\n%s" % (d, err)) |
| 230 sys.exit(1) | 236 sys.exit(1) |
| 231 | 237 |
| 232 #print os.path.abspath(".") | 238 # print(os.path.abspath(".")) |
| 233 #print cmd | 239 # print(cmd) |
| 234 | 240 |
| 235 if out_log and out_log != "-": | 241 if out_log and out_log != "-": |
| 236 handle = open(out_log, "w") | 242 handle = open(out_log, "w") |
| 237 else: | 243 else: |
| 238 handle = open(os.devnull, "w") | 244 handle = open(os.devnull, "w") |
| 244 del m | 250 del m |
| 245 handle.write("\n") | 251 handle.write("\n") |
| 246 handle.write("============================ Starting MIRA now ===============================\n") | 252 handle.write("============================ Starting MIRA now ===============================\n") |
| 247 handle.flush() | 253 handle.flush() |
| 248 try: | 254 try: |
| 249 #Run MIRA | 255 # Run MIRA |
| 250 child = subprocess.Popen(cmd_list, | 256 child = subprocess.Popen(cmd_list, |
| 251 stdout=handle, | 257 stdout=handle, |
| 252 stderr=subprocess.STDOUT) | 258 stderr=subprocess.STDOUT) |
| 253 except Exception, err: | 259 except Exception as err: |
| 254 log_manifest(manifest) | 260 log_manifest(manifest) |
| 255 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) | 261 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) |
| 256 #TODO - call clean up? | 262 # TODO - call clean up? |
| 257 handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) | 263 handle.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) |
| 258 handle.close() | 264 handle.close() |
| 259 sys.exit(1) | 265 sys.exit(1) |
| 260 #Use .communicate as can get deadlocks with .wait(), | 266 # Use .communicate as can get deadlocks with .wait(), |
| 261 stdout, stderr = child.communicate() | 267 stdout, stderr = child.communicate() |
| 262 assert not stdout and not stderr #Should be empty as sent to handle | 268 assert not stdout and not stderr # Should be empty as sent to handle |
| 263 run_time = time.time() - start_time | 269 run_time = time.time() - start_time |
| 264 return_code = child.returncode | 270 return_code = child.returncode |
| 265 handle.write("\n") | 271 handle.write("\n") |
| 266 handle.write("============================ MIRA has finished ===============================\n") | 272 handle.write("============================ MIRA has finished ===============================\n") |
| 267 handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0)) | 273 handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0)) |
| 268 if return_code: | 274 if return_code: |
| 269 print "MIRA took %0.2f hours" % (run_time / 3600.0) | 275 print("MIRA took %0.2f hours" % (run_time / 3600.0)) |
| 270 handle.write("Return error code %i from command:\n" % return_code) | 276 handle.write("Return error code %i from command:\n" % return_code) |
| 271 handle.write(cmd + "\n") | 277 handle.write(cmd + "\n") |
| 272 handle.close() | 278 handle.close() |
| 273 clean_up(temp, name) | 279 clean_up(temp, name) |
| 274 log_manifest(manifest) | 280 log_manifest(manifest) |
| 275 sys.stderr.write("Return error code %i from command:\n" % return_code) | 281 sys.stderr.write("Return error code %i from command:\n" % return_code) |
| 276 sys.stderr.write(cmd + "\n") | 282 sys.stderr.write(cmd + "\n") |
| 277 sys.exit(eturn_code) | 283 sys.exit(return_code) |
| 278 handle.flush() | 284 handle.flush() |
| 279 | 285 |
| 280 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): | 286 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): |
| 281 handle.write("\n") | 287 handle.write("\n") |
| 282 handle.write("====================== Extract Large Contigs failed ==========================\n") | 288 handle.write("====================== Extract Large Contigs failed ==========================\n") |
| 285 handle.write(line) | 291 handle.write(line) |
| 286 e.close() | 292 e.close() |
| 287 handle.write("============================ (end of ec.log) =================================\n") | 293 handle.write("============================ (end of ec.log) =================================\n") |
| 288 handle.flush() | 294 handle.flush() |
| 289 | 295 |
| 290 #print "Collecting output..." | 296 # print("Collecting output...") |
| 291 start_time = time.time() | 297 start_time = time.time() |
| 292 collect_output(temp, name, handle) | 298 collect_output(temp, name, handle) |
| 293 collect_time = time.time() - start_time | 299 collect_time = time.time() - start_time |
| 294 handle.write("MIRA took %0.2f hours; collecting output %0.2f minutes\n" % (run_time / 3600.0, collect_time / 60.0)) | 300 handle.write("MIRA took %0.2f hours; collecting output %0.2f minutes\n" |
| 295 print("MIRA took %0.2f hours; collecting output %0.2f minutes\n" % (run_time / 3600.0, collect_time / 60.0)) | 301 % (run_time / 3600.0, collect_time / 60.0)) |
| 302 print("MIRA took %0.2f hours; collecting output %0.2f minutes\n" | |
| 303 % (run_time / 3600.0, collect_time / 60.0)) | |
| 296 | 304 |
| 297 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): | 305 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): |
| 298 #Treat as an error, but doing this AFTER collect_output | 306 # Treat as an error, but doing this AFTER collect_output |
| 299 sys.stderr.write("Extract Large Contigs failed\n") | 307 sys.stderr.write("Extract Large Contigs failed\n") |
| 300 handle.write("Extract Large Contigs failed\n") | 308 handle.write("Extract Large Contigs failed\n") |
| 301 handle.close() | 309 handle.close() |
| 302 sys.exit(1) | 310 sys.exit(1) |
| 303 | 311 |
| 304 #print "Cleaning up..." | 312 # print "Cleaning up..." |
| 305 clean_up(temp, name) | 313 clean_up(temp, name) |
| 306 | 314 |
| 307 handle.write("\nDone\n") | 315 handle.write("\nDone\n") |
| 308 handle.close() | 316 handle.close() |
| 309 print("Done") | 317 print("Done") |
