Mercurial > repos > peterjc > mira4_assembler
comparison tools/mira4/mira4.py @ 9:302d13490b23 draft
Uploaded v0.0.2 preview 1, BAM output
author | peterjc |
---|---|
date | Thu, 28 Nov 2013 05:07:59 -0500 |
parents | 902f01c1084b |
children | 02350bef2e99 |
comparison
equal
deleted
inserted
replaced
8:2ab1d6f6a8ec | 9:302d13490b23 |
---|---|
4 import os | 4 import os |
5 import sys | 5 import sys |
6 import subprocess | 6 import subprocess |
7 import shutil | 7 import shutil |
8 import time | 8 import time |
9 | |
10 #Do we need any PYTHONPATH magic? | |
11 from mira4_make_bam import make_bam | |
9 | 12 |
10 WRAPPER_VER = "0.0.1" #Keep in sync with the XML file | 13 WRAPPER_VER = "0.0.1" #Keep in sync with the XML file |
11 | 14 |
12 def stop_err(msg, err=1): | 15 def stop_err(msg, err=1): |
13 sys.stderr.write(msg+"\n") | 16 sys.stderr.write(msg+"\n") |
26 except Exception, err: | 29 except Exception, err: |
27 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) | 30 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) |
28 sys.exit(1) | 31 sys.exit(1) |
29 ver, tmp = child.communicate() | 32 ver, tmp = child.communicate() |
30 del child | 33 del child |
31 return ver.split("\n", 1)[0] | 34 return ver.split("\n", 1)[0].strip() |
32 | 35 |
33 | 36 |
34 try: | 37 try: |
35 mira_path = os.environ["MIRA4"] | 38 mira_path = os.environ["MIRA4"] |
36 except ImportError: | 39 except ImportError: |
37 stop_err("Environment variable $MIRA4 not set") | 40 stop_err("Environment variable $MIRA4 not set") |
38 mira_binary = os.path.join(mira_path, "mira") | 41 mira_binary = os.path.join(mira_path, "mira") |
39 if not os.path.isfile(mira_binary): | 42 if not os.path.isfile(mira_binary): |
40 stop_err("Missing mira under $MIRA4, %r" % mira_binary) | 43 stop_err("Missing mira under $MIRA4, %r" % mira_binary) |
44 mira_convert = os.path.join(mira_path, "miraconvert") | |
45 if not os.path.isfile(mira_convert): | |
46 stop_err("Missing miraconvert under $MIRA4, %r" % mira_convert) | |
41 | 47 |
42 mira_ver = get_version(mira_binary) | 48 mira_ver = get_version(mira_binary) |
43 if not mira_ver.strip().startswith("4.0"): | 49 if not mira_ver.strip().startswith("4.0"): |
44 stop_err("This wrapper is for MIRA V4.0, not:\n%s" % mira_ver) | 50 stop_err("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_binary)) |
51 mira_convert_ver = get_version(mira_convert) | |
52 if not mira_convert_ver.strip().startswith("4.0"): | |
53 stop_err("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_ver, mira_convert)) | |
45 if "-v" in sys.argv or "--version" in sys.argv: | 54 if "-v" in sys.argv or "--version" in sys.argv: |
46 print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER) | 55 print "%s, MIRA wrapper version %s" % (mira_ver, WRAPPER_VER) |
56 if mira_ver != mira_convert_ver: | |
57 print "WARNING: miraconvert %s" % mira_convert_ver | |
47 sys.exit(0) | 58 sys.exit(0) |
48 | 59 |
49 def fix_threads(manifest): | 60 def fix_threads(manifest): |
50 """Tweak the manifest to alter the number of threads.""" | 61 """Tweak the manifest to alter the number of threads.""" |
51 try: | 62 try: |
76 sys.stderr.write(line) | 87 sys.stderr.write(line) |
77 sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("="*60, "="*60)) | 88 sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("="*60, "="*60)) |
78 | 89 |
79 | 90 |
80 def collect_output(temp, name, handle): | 91 def collect_output(temp, name, handle): |
92 """Moves files to the output filenames (global variables).""" | |
81 n3 = (temp, name, name, name) | 93 n3 = (temp, name, name, name) |
82 f = "%s/%s_assembly/%s_d_results" % (temp, name, name) | 94 f = "%s/%s_assembly/%s_d_results" % (temp, name, name) |
83 if not os.path.isdir(f): | 95 if not os.path.isdir(f): |
84 log_manifest(manifest) | 96 log_manifest(manifest) |
85 stop_err("Missing output folder") | 97 stop_err("Missing output folder") |
93 #Triggered extractLargeContigs.sh? | 105 #Triggered extractLargeContigs.sh? |
94 old_maf = "%s/%s_LargeContigs_out.maf" % (f, name) | 106 old_maf = "%s/%s_LargeContigs_out.maf" % (f, name) |
95 | 107 |
96 #De novo or single strain mapping, | 108 #De novo or single strain mapping, |
97 old_fasta = "%s/%s_out.unpadded.fasta" % (f, name) | 109 old_fasta = "%s/%s_out.unpadded.fasta" % (f, name) |
110 ref_fasta = "%s/%s_out.padded.fasta" % (f, name) | |
98 if not os.path.isfile(old_fasta): | 111 if not os.path.isfile(old_fasta): |
99 #Mapping (currently StrainX versus reference) | 112 #Mapping (StrainX versus reference) or de novo |
100 old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name) | 113 old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name) |
114 ref_fasta = "%s/%s_out_StrainX.padded.fasta" % (f, name) | |
101 if not os.path.isfile(old_fasta): | 115 if not os.path.isfile(old_fasta): |
102 #Triggered extractLargeContigs.sh? | 116 old_fasta = "%s/%s_out_ReferenceStrain.unpadded.fasta" % (f, name) |
103 old_fasta = "%s/%s_LargeContigs_out.fasta" % (f, name) | 117 ref_fasta = "%s/%s_out_ReferenceStrain.padded.fasta" % (f, name) |
118 | |
104 | 119 |
105 missing = False | 120 missing = False |
106 for old, new in [(old_maf, out_maf), | 121 for old, new in [(old_maf, out_maf), |
107 (old_fasta, out_fasta)]: | 122 (old_fasta, out_fasta)]: |
108 if not os.path.isfile(old): | 123 if not os.path.isfile(old): |
114 log_manifest(manifest) | 129 log_manifest(manifest) |
115 sys.stderr.write("Contents of %r:\n" % f) | 130 sys.stderr.write("Contents of %r:\n" % f) |
116 for filename in sorted(os.listdir(f)): | 131 for filename in sorted(os.listdir(f)): |
117 sys.stderr.write("%s\n" % filename) | 132 sys.stderr.write("%s\n" % filename) |
118 | 133 |
134 #For mapping mode, probably most people would expect a BAM file | |
135 #using the reference FASTA file... | |
136 msg = make_bam(mira_convert, out_maf, ref_fasta, out_bam, handle) | |
137 if msg: | |
138 stop_err(msg) | |
139 | |
119 def clean_up(temp, name): | 140 def clean_up(temp, name): |
120 folder = "%s/%s_assembly" % (temp, name) | 141 folder = "%s/%s_assembly" % (temp, name) |
121 if os.path.isdir(folder): | 142 if os.path.isdir(folder): |
122 shutil.rmtree(folder) | 143 shutil.rmtree(folder) |
123 | 144 |
125 #Currently Galaxy puts us somewhere safe like: | 146 #Currently Galaxy puts us somewhere safe like: |
126 #/opt/galaxy-dist/database/job_working_directory/846/ | 147 #/opt/galaxy-dist/database/job_working_directory/846/ |
127 temp = "." | 148 temp = "." |
128 #name, out_fasta, out_qual, out_ace, out_caf, out_wig, out_log = sys.argv[1:8] | 149 #name, out_fasta, out_qual, out_ace, out_caf, out_wig, out_log = sys.argv[1:8] |
129 name = "MIRA" | 150 name = "MIRA" |
130 manifest, out_maf, out_fasta, out_log = sys.argv[1:5] | 151 manifest, out_maf, out_bam, out_fasta, out_log = sys.argv[1:] |
131 | 152 |
132 fix_threads(manifest) | 153 fix_threads(manifest) |
133 | 154 |
134 start_time = time.time() | 155 start_time = time.time() |
135 #cmd_list =sys.argv[8:] | 156 #cmd_list =sys.argv[8:] |
178 run_time = time.time() - start_time | 199 run_time = time.time() - start_time |
179 return_code = child.returncode | 200 return_code = child.returncode |
180 handle.write("\n") | 201 handle.write("\n") |
181 handle.write("============================ MIRA has finished ===============================\n") | 202 handle.write("============================ MIRA has finished ===============================\n") |
182 handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0)) | 203 handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0)) |
183 print "MIRA took %0.2f hours" % (run_time / 3600.0) | |
184 if return_code: | 204 if return_code: |
205 print "MIRA took %0.2f hours" % (run_time / 3600.0) | |
185 handle.write("Return error code %i from command:\n" % return_code) | 206 handle.write("Return error code %i from command:\n" % return_code) |
186 handle.write(cmd + "\n") | 207 handle.write(cmd + "\n") |
187 handle.close() | 208 handle.close() |
188 clean_up(temp, name) | 209 clean_up(temp, name) |
189 log_manifest(manifest) | 210 log_manifest(manifest) |
200 e.close() | 221 e.close() |
201 handle.write("============================ (end of ec.log) =================================\n") | 222 handle.write("============================ (end of ec.log) =================================\n") |
202 handle.flush() | 223 handle.flush() |
203 | 224 |
204 #print "Collecting output..." | 225 #print "Collecting output..." |
226 start_time = time.time() | |
205 collect_output(temp, name, handle) | 227 collect_output(temp, name, handle) |
228 collect_time = time.time() - start_time | |
229 handle.write("MIRA took %0.2f hours; collecting output %0.2f minutes\n" % (run_time / 3600.0, collect_time / 60.0)) | |
230 print("MIRA took %0.2f hours; collecting output %0.2f minutes\n" % (run_time / 3600.0, collect_time / 60.0)) | |
206 | 231 |
207 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): | 232 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): |
208 #Treat as an error, but doing this AFTER collect_output | 233 #Treat as an error, but doing this AFTER collect_output |
209 sys.stderr.write("Extract Large Contigs failed\n") | 234 sys.stderr.write("Extract Large Contigs failed\n") |
210 handle.write("Extract Large Contigs failed\n") | 235 handle.write("Extract Large Contigs failed\n") |
212 sys.exit(1) | 237 sys.exit(1) |
213 | 238 |
214 #print "Cleaning up..." | 239 #print "Cleaning up..." |
215 clean_up(temp, name) | 240 clean_up(temp, name) |
216 | 241 |
242 handle.write("\nDone\n") | |
217 handle.close() | 243 handle.close() |
218 print("Done") | 244 print("Done") |