comparison tools/mira4/mira4.py @ 4:df86ed992a1b draft

Uploaded preview 4, lots of work on mapping
author peterjc
date Fri, 11 Oct 2013 04:28:45 -0400
parents 32f693f6e741
children ffefb87bd414
comparison
equal deleted inserted replaced
3:c7538ae82a24 4:df86ed992a1b
29 ver, tmp = child.communicate() 29 ver, tmp = child.communicate()
30 del child 30 del child
31 return ver.split("\n", 1)[0] 31 return ver.split("\n", 1)[0]
32 32
33 33
34 os.environ["PATH"] = "/mnt/galaxy/downloads/mira_4.0rc2_linux-gnu_x86_64_static/bin/:%s" % os.environ["PATH"] 34 os.environ["PATH"] = "/mnt/galaxy/downloads/mira_4.0rc3_linux-gnu_x86_64_static/bin/:%s" % os.environ["PATH"]
35 mira_binary = "mira" 35 mira_binary = "mira"
36 mira_ver = get_version(mira_binary) 36 mira_ver = get_version(mira_binary)
37 if not mira_ver.strip().startswith("4.0"): 37 if not mira_ver.strip().startswith("4.0"):
38 stop_err("This wrapper is for MIRA V4.0, not:\n%s" % mira_ver) 38 stop_err("This wrapper is for MIRA V4.0, not:\n%s" % mira_ver)
39 if "-v" in sys.argv: 39 if "-v" in sys.argv:
49 for line in h: 49 for line in h:
50 sys.stderr.write(line) 50 sys.stderr.write(line)
51 sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("="*60, "="*60)) 51 sys.stderr.write("\n%s\nEnd of manifest\n%s\n" % ("="*60, "="*60))
52 52
53 53
54 def massage_symlinks(manifest): 54 def collect_output(temp, name, handle):
55 """Create FASTQ aliases and edit the manifest to use them.
56
57 Short term measure for MIRA 4.0RC2 which depends on data file
58 extensions to decide the file format, and doesn't like *.dat
59 as used in Galaxy.
60 """
61 base = os.path.split(manifest)[0]
62 with open(manifest) as h:
63 lines = h.readlines()
64 f = 0
65 for i, line in enumerate(lines):
66 if not line.startswith("data ="):
67 continue
68 #Assumes no spaces in filename, would they have to be escaped?
69 new_line = "data ="
70 for filename in line[6:].strip().split():
71 if not filename:
72 continue
73 assert os.path.isfile(filename), filename
74 f += 1
75 alias = os.path.join(base, "input%i.fastq" % f)
76 new_line += " " + alias
77 cmd = "ln -s %s %s" % (filename, alias)
78 if os.system(cmd):
79 stop_err("Problem creating FASTQ alias:\n%s" % cmd)
80 lines[i] = new_line + "\n"
81 with open(manifest, "w") as h:
82 for line in lines:
83 #sys.stderr.write(line)
84 h.write(line)
85 return True
86
87
88 def collect_output(temp, name):
89 n3 = (temp, name, name, name) 55 n3 = (temp, name, name, name)
90 f = "%s/%s_assembly/%s_d_results" % (temp, name, name) 56 f = "%s/%s_assembly/%s_d_results" % (temp, name, name)
91 if not os.path.isdir(f): 57 if not os.path.isdir(f):
92 log_manifest(manifest) 58 log_manifest(manifest)
93 stop_err("Missing output folder") 59 stop_err("Missing output folder")
94 if not os.listdir(f): 60 if not os.listdir(f):
95 log_manifest(manifest) 61 log_manifest(manifest)
96 stop_err("Empty output folder") 62 stop_err("Empty output folder")
97 missing = [] 63 missing = []
98 for old, new in [("%s/%s_out.maf" % (f, name), out_maf), 64
99 ("%s/%s_out.unpadded.fasta" % (f, name), out_fasta)]: 65 old_maf = "%s/%s_out.maf" % (f, name)
66 if not os.path.isfile(old_maf):
67 #Triggered extractLargeContigs.sh?
68 old_maf = "%s/%s_LargeContigs_out.maf" % (f, name)
69
70 #De novo or single strain mapping,
71 old_fasta = "%s/%s_out.unpadded.fasta" % (f, name)
72 if not os.path.isfile(old_fasta):
73 #Mapping (currently StrainX versus reference)
74 old_fasta = "%s/%s_out_StrainX.unpadded.fasta" % (f, name)
75 if not os.path.isfile(old_fasta):
76 #Triggered extractLargeContigs.sh?
77 old_fasta = "%s/%s_LargeContigs_out.fasta" % (f, name)
78
79 missing = False
80 for old, new in [(old_maf, out_maf),
81 (old_fasta, out_fasta)]:
100 if not os.path.isfile(old): 82 if not os.path.isfile(old):
101 missing.append(os.path.splitext(old)[-1]) 83 missing = True
102 else: 84 else:
85 handle.write("Capturing %s\n" % old)
103 shutil.move(old, new) 86 shutil.move(old, new)
104 if missing: 87 if missing:
105 log_manifest(manifest) 88 log_manifest(manifest)
106 sys.stderr.write("Contents of %r: %r\n" % (f, os.listdir(f))) 89 sys.stderr.write("Contents of %r:\n" % f)
107 stop_err("Missing output files: %s" % ", ".join(missing)) 90 for filename in sorted(os.listdir(f)):
91 sys.stderr.write("%s\n" % filename)
108 92
109 def clean_up(temp, name): 93 def clean_up(temp, name):
110 folder = "%s/%s_assembly" % (temp, name) 94 folder = "%s/%s_assembly" % (temp, name)
111 if os.path.isdir(folder): 95 if os.path.isdir(folder):
112 shutil.rmtree(folder) 96 shutil.rmtree(folder)
116 #/opt/galaxy-dist/database/job_working_directory/846/ 100 #/opt/galaxy-dist/database/job_working_directory/846/
117 temp = "." 101 temp = "."
118 #name, out_fasta, out_qual, out_ace, out_caf, out_wig, out_log = sys.argv[1:8] 102 #name, out_fasta, out_qual, out_ace, out_caf, out_wig, out_log = sys.argv[1:8]
119 name = "MIRA" 103 name = "MIRA"
120 manifest, out_maf, out_fasta, out_log = sys.argv[1:5] 104 manifest, out_maf, out_fasta, out_log = sys.argv[1:5]
121
122 #Hack until MIRA v4 lets us specify file format explicitly,
123 massage_symlinks(manifest)
124 105
125 start_time = time.time() 106 start_time = time.time()
126 #cmd_list =sys.argv[8:] 107 #cmd_list =sys.argv[8:]
127 cmd_list = [mira_binary, manifest] 108 cmd_list = [mira_binary, manifest]
128 cmd = " ".join(cmd_list) 109 cmd = " ".join(cmd_list)
140 121
141 #print os.path.abspath(".") 122 #print os.path.abspath(".")
142 #print cmd 123 #print cmd
143 124
144 handle = open(out_log, "w") 125 handle = open(out_log, "w")
126 handle.write("======================== MIRA manifest (instructions) ========================\n")
127 m = open(manifest, "rU")
128 for line in m:
129 handle.write(line)
130 m.close()
131 del m
132 handle.write("\n")
133 handle.write("============================ Starting MIRA now ===============================\n")
134 handle.flush()
145 try: 135 try:
146 #Run MIRA 136 #Run MIRA
147 child = subprocess.Popen(cmd_list, 137 child = subprocess.Popen(cmd_list,
148 stdout=handle, 138 stdout=handle,
149 stderr=subprocess.STDOUT) 139 stderr=subprocess.STDOUT)
157 #Use .communicate as can get deadlocks with .wait(), 147 #Use .communicate as can get deadlocks with .wait(),
158 stdout, stderr = child.communicate() 148 stdout, stderr = child.communicate()
159 assert not stdout and not stderr #Should be empty as sent to handle 149 assert not stdout and not stderr #Should be empty as sent to handle
160 run_time = time.time() - start_time 150 run_time = time.time() - start_time
161 return_code = child.returncode 151 return_code = child.returncode
162 handle.write("\n\nMIRA took %0.2f minutes\n" % (run_time / 60.0)) 152 handle.write("\n")
163 print "MIRA took %0.2f minutes" % (run_time / 60.0) 153 handle.write("============================ MIRA has finished ===============================\n")
154 handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0))
155 print "MIRA took %0.2f hours" % (run_time / 3600.0)
164 if return_code: 156 if return_code:
165 handle.write("Return error code %i from command:\n" % return_code) 157 handle.write("Return error code %i from command:\n" % return_code)
166 handle.write(cmd + "\n") 158 handle.write(cmd + "\n")
167 handle.close() 159 handle.close()
168 clean_up(temp, name) 160 clean_up(temp, name)
169 log_manifest(manifest) 161 log_manifest(manifest)
170 stop_err("Return error code %i from command:\n%s" % (return_code, cmd), 162 stop_err("Return error code %i from command:\n%s" % (return_code, cmd),
171 return_code) 163 return_code)
172 handle.close() 164 handle.flush()
165
166 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"):
167 handle.write("\n")
168 handle.write("====================== Extract Large Contigs failed ==========================\n")
169 e = open("MIRA_assembly/MIRA_d_results/ec.log", "rU")
170 for line in e:
171 handle.write(line)
172 e.close()
173 handle.write("============================ (end of ec.log) =================================\n")
174 handle.flush()
173 175
174 #print "Collecting output..." 176 #print "Collecting output..."
175 collect_output(temp, name) 177 collect_output(temp, name, handle)
178
179 if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"):
180 #Treat as an error, but doing this AFTER collect_output
181 sys.stderr.write("Extract Large Contigs failed\n")
182 handle.write("Extract Large Contigs failed\n")
183 handle.close()
184 sys.exit(1)
176 185
177 #print "Cleaning up..." 186 #print "Cleaning up..."
178 clean_up(temp, name) 187 clean_up(temp, name)
179 188
180 print "Done" 189 handle.close()
190 print("Done")