Mercurial > repos > peterjc > mira4_assembler
comparison tools/mira4/mira4.py @ 9:302d13490b23 draft
Uploaded v0.0.2 preview 1, BAM output
| author | peterjc |
|---|---|
| date | Thu, 28 Nov 2013 05:07:59 -0500 |
| parents | 902f01c1084b |
| children | 02350bef2e99 |
comparison
equal
deleted
inserted
replaced
| 8:2ab1d6f6a8ec | 9:302d13490b23 |
|---|---|
| 4 import os | 4 import os |
| 5 import sys | 5 import sys |
| 6 import subprocess | 6 import subprocess |
| 7 import shutil | 7 import shutil |
| 8 import time | 8 import time |
| 9 | |
| 10 #Do we need any PYTHONPATH magic? | |
| 11 from mira4_make_bam import make_bam | |
| 9 | 12 |
| 10 WRAPPER_VER = "0.0.1" #Keep in sync with the XML file | 13 WRAPPER_VER = "0.0.1" #Keep in sync with the XML file |
| 11 | 14 |
| 12 def stop_err(msg, err=1): | 15 def stop_err(msg, err=1): |
| 13 sys.stderr.write(msg+"\n") | 16 sys.stderr.write(msg+"\n") |
| 26 except Exception, err: | 29 except Exception, err: |
| 27 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) | 30 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) |
| 28 sys.exit(1) | 31 sys.exit(1) |
| 29 ver, tmp = child.communicate() | 32 ver, tmp = child.communicate() |
| 30 del child | 33 del child |
| 31 return ver.split("\n", 1)[0] | 34 return ver.split("\n", 1)[0].strip() |
| 32 | 35 |
| 33 | 36 |
| 34 try: | 37 try: |
| 35 mira_path = os.environ["MIRA4"] | 38 mira_path = os.environ["MIRA4"] |
| 36 except ImportError: | 39 except ImportError: |
| 37 stop_err("Environment variable $MIRA4 not set") | 40 stop_err("Environment variable $MIRA4 not set") |
| 38 mira_binary = os.path.join(mira_path, "mira") | 41 mira_binary = os.path.join(mira_path, "mira") |
| 39 if not os.path.isfile(mira_binary): | 42 if not os.path.isfile(mira_binary): |
| 40 stop_err("Missing mira under $MIRA4, %r" % mira_binary) | 43 stop_err("Missing mira under $MIRA4, %r" % mira_binary) |
| 44 mira_convert = os.path.join(mira_path, "miraconvert") | |
| 45 if not os.path.isfile(mira_convert): | |
| 46 stop_err("Missing miraconvert under $MIRA4, %r" % mira_convert) | |
| 41 | 47 |
| 42 mira_ver = get_version(mira_binary) | 48 mira_ver = get_version(mira_binary) |
| 43 if not mira_ver.strip().startswith("4.0"): | 49 if not mira_ver.strip().startswith("4.0"): |
| 44 stop_err("This wrapper is for MIRA V4.0, not:\n%s" % mira_ver) | 50 stop_err("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_binary)) |
| 51 mira_convert_ver = get_version(mira_convert) | |
| 52 if not mira_convert_ver.strip().startswith("4.0"): | |
| 53 stop_err("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert)) | |
| 45 if "-v" in sys.argv or "--version" in sys.argv: | 54 if "-v" in sys.argv or "--version" in sys.argv: |
| 46 print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER) | 55 print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER) |
| 56 if mira_ver != mira_convert_ver: | |
| 57 print "WARNING: miraconvert %s" % mira_convert_ver | |
| 47 sys.exit(0) | 58 sys.exit(0) |
| 48 | 59 |
| 49 def fix_threads(manifest): | 60 def fix_threads(manifest): |
| 50 """Tweak the manifest to alter the number of threads.""" | 61 """Tweak the manifest to alter the number of threads.""" |
| 51 try: | 62 try: |
| 76 sys.stderr.write(line) | 87 sys.stderr.write(line) |
| 77 sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("="*60, "="*60)) | 88 sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("="*60, "="*60)) |
| 78 | 89 |
| 79 | 90 |
| 80 def collect_output(temp, name, handle): | 91 def collect_output(temp, name, handle): |
| 92 """Moves files to the output filenames (global variables).""" | |
| 81 n3 = (temp, name, name, name) | 93 n3 = (temp, name, name, name) |
| 82 f = "%s/%s_assembly/%s_d_results" % (temp, name, name) | 94 f = "%s/%s_assembly/%s_d_results" % (temp, name, name) |
| 83 if not os.path.isdir(f): | 95 if not os.path.isdir(f): |
| 84 log_manifest(manifest) | 96 log_manifest(manifest) |
| 85 stop_err("Missing output folder") | 97 stop_err("Missing output folder") |
| 93 #Triggered extractLargeContigs.sh? | 105 #Triggered extractLargeContigs.sh? |
| 94 old_maf = "%s/%s_LargeContigs_out.maf" % (f, name) | 106 old_maf = "%s/%s_LargeContigs_out.maf" % (f, name) |
| 95 | 107 |
| 96 #De novo or single strain mapping, | 108 #De novo or single strain mapping, |
| 97 old_fasta = "%s/%s_out.unpadded.fasta" % (f, name) | 109 old_fasta = "%s/%s_out.unpadded.fasta" % (f, name) |
| 110 ref_fasta = "%s/%s_out.padded.fasta" % (f, name) | |
| 98 if not os.path.isfile(old_fasta): | 111 if not os.path.isfile(old_fasta): |
| 99 #Mapping (currently StrainX versus reference) | 112 #Mapping (StrainX versus reference) or de novo |
| 100 old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name) | 113 old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name) |
| 114 ref_fasta = "%s/%s_out_StrainX.padded.fasta" % (f, name) | |
| 101 if not os.path.isfile(old_fasta): | 115 if not os.path.isfile(old_fasta): |
| 102 #Triggered extractLargeContigs.sh? | 116 old_fasta = "%s/%s_out_ReferenceStrain.unpadded.fasta" % (f, name) |
| 103 old_fasta = "%s/%s_LargeContigs_out.fasta" % (f, name) | 117 ref_fasta = "%s/%s_out_ReferenceStrain.padded.fasta" % (f, name) |
| 118 | |
| 104 | 119 |
| 105 missing = False | 120 missing = False |
| 106 for old, new in [(old_maf, out_maf), | 121 for old, new in [(old_maf, out_maf), |
| 107 (old_fasta, out_fasta)]: | 122 (old_fasta, out_fasta)]: |
| 108 if not os.path.isfile(old): | 123 if not os.path.isfile(old): |
| 114 log_manifest(manifest) | 129 log_manifest(manifest) |
| 115 sys.stderr.write("Contents of %r:\n" % f) | 130 sys.stderr.write("Contents of %r:\n" % f) |
| 116 for filename in sorted(os.listdir(f)): | 131 for filename in sorted(os.listdir(f)): |
| 117 sys.stderr.write("%s\n" % filename) | 132 sys.stderr.write("%s\n" % filename) |
| 118 | 133 |
| 134 #For mapping mode, probably most people would expect a BAM file | |
| 135 #using the reference FASTA file... | |
| 136 msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle) | |
| 137 if msg: | |
| 138 stop_err(msg) | |
| 139 | |
| 119 def clean_up(temp, name): | 140 def clean_up(temp, name): |
| 120 folder = "%s/%s_assembly" % (temp, name) | 141 folder = "%s/%s_assembly" % (temp, name) |
| 121 if os.path.isdir(folder): | 142 if os.path.isdir(folder): |
| 122 shutil.rmtree(folder) | 143 shutil.rmtree(folder) |
| 123 | 144 |
| 125 #Currently Galaxy puts us somewhere safe like: | 146 #Currently Galaxy puts us somewhere safe like: |
| 126 #/opt/galaxy-dist/database/job_working_directory/846/ | 147 #/opt/galaxy-dist/database/job_working_directory/846/ |
| 127 temp = "." | 148 temp = "." |
| 128 #name, out_fasta, out_qual, out_ace, out_caf, out_wig, out_log = sys.argv[1:8] | 149 #name, out_fasta, out_qual, out_ace, out_caf, out_wig, out_log = sys.argv[1:8] |
| 129 name = "MIRA" | 150 name = "MIRA" |
| 130 manifest, out_maf, out_fasta, out_log = sys.argv[1:5] | 151 manifest, out_maf, out_bam, out_fasta, out_log = sys.argv[1:] |
| 131 | 152 |
| 132 fix_threads(manifest) | 153 fix_threads(manifest) |
| 133 | 154 |
| 134 start_time = time.time() | 155 start_time = time.time() |
| 135 #cmd_list =sys.argv[8:] | 156 #cmd_list =sys.argv[8:] |
| 178 run_time = time.time() - start_time | 199 run_time = time.time() - start_time |
| 179 return_code = child.returncode | 200 return_code = child.returncode |
| 180 handle.write("\n") | 201 handle.write("\n") |
| 181 handle.write("============================ MIRA has finished ===============================\n") | 202 handle.write("============================ MIRA has finished ===============================\n") |
| 182 handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0)) | 203 handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0)) |
| 183 print "MIRA took %0.2f hours" % (run_time / 3600.0) | |
| 184 if return_code: | 204 if return_code: |
| 205 print "MIRA took %0.2f hours" % (run_time / 3600.0) | |
| 185 handle.write("Return error code %i from command:\n" % return_code) | 206 handle.write("Return error code %i from command:\n" % return_code) |
| 186 handle.write(cmd + "\n") | 207 handle.write(cmd + "\n") |
| 187 handle.close() | 208 handle.close() |
| 188 clean_up(temp, name) | 209 clean_up(temp, name) |
| 189 log_manifest(manifest) | 210 log_manifest(manifest) |
| 200 e.close() | 221 e.close() |
| 201 handle.write("============================ (end of ec.log) =================================\n") | 222 handle.write("============================ (end of ec.log) =================================\n") |
| 202 handle.flush() | 223 handle.flush() |
| 203 | 224 |
| 204 #print "Collecting output..." | 225 #print "Collecting output..." |
| 226 start_time = time.time() | |
| 205 collect_output(temp, name, handle) | 227 collect_output(temp, name, handle) |
| 228 collect_time = time.time() - start_time | |
| 229 handle.write("MIRA took %0.2f hours; collecting output %0.2f minutes\n" % (run_time / 3600.0, collect_time / 60.0)) | |
| 230 print("MIRA took %0.2f hours; collecting output %0.2f minutes\n" % (run_time / 3600.0, collect_time / 60.0)) | |
| 206 | 231 |
| 207 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): | 232 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): |
| 208 #Treat as an error, but doing this AFTER collect_output | 233 #Treat as an error, but doing this AFTER collect_output |
| 209 sys.stderr.write("Extract Large Contigs failed\n") | 234 sys.stderr.write("Extract Large Contigs failed\n") |
| 210 handle.write("Extract Large Contigs failed\n") | 235 handle.write("Extract Large Contigs failed\n") |
| 212 sys.exit(1) | 237 sys.exit(1) |
| 213 | 238 |
| 214 #print "Cleaning up..." | 239 #print "Cleaning up..." |
| 215 clean_up(temp, name) | 240 clean_up(temp, name) |
| 216 | 241 |
| 242 handle.write("\nDone\n") | |
| 217 handle.close() | 243 handle.close() |
| 218 print("Done") | 244 print("Done") |
