Mercurial > repos > peterjc > mira4_assembler
annotate tools/mira4_0/mira4_convert.py @ 33:1291ed21789f draft
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 1d7d466d01b23d03d214e93f1f8efa19cfa18268
| author | peterjc |
|---|---|
| date | Fri, 02 Jun 2017 11:22:01 -0400 |
| parents | 56b421d59805 |
| children | 0785a6537f3e |
| rev | line source |
|---|---|
| 25 | 1 #!/usr/bin/env python |
| 2 """A simple wrapper script to call MIRA and collect its output. | |
| 3 | |
| 4 This focuses on the miraconvert binary. | |
| 5 """ | |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
6 |
|
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
7 from __future__ import print_function |
|
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
8 |
| 25 | 9 import os |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
10 import shutil |
| 25 | 11 import subprocess |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
12 import sys |
|
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
13 |
| 25 | 14 from optparse import OptionParser |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
15 |
| 25 | 16 try: |
| 17 from io import BytesIO | |
| 18 except ImportError: | |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
19 # Should we worry about Python 2.5 or older? |
| 25 | 20 from StringIO import StringIO as BytesIO |
| 21 | |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
22 # Do we need any PYTHONPATH magic? |
| 25 | 23 from mira4_make_bam import depad |
| 24 | |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
25 WRAPPER_VER = "0.0.10" # Keep in sync with the XML file |
| 25 | 26 |
| 27 | |
| 28 def run(cmd): | |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
29 # Avoid using shell=True when we call subprocess to ensure if the Python |
|
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
30 # script is killed, so too is the child process. |
| 25 | 31 try: |
| 32 child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
33 except Exception as err: |
|
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
34 sys.exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
35 # Use .communicate as can get deadlocks with .wait(), |
| 25 | 36 stdout, stderr = child.communicate() |
| 37 return_code = child.returncode | |
| 38 if return_code: | |
| 39 cmd_str = " ".join(cmd) # doesn't quote spaces etc | |
| 40 if stderr and stdout: | |
|
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
41 sys.exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr)) |
| 25 | 42 else: |
|
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
43 sys.exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)) |
| 25 | 44 |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
45 |
| 25 | 46 def get_version(mira_binary): |
| 47 """Run MIRA to find its version number""" | |
| 48 # At the commend line I would use: mira -v | head -n 1 | |
| 49 # however there is some pipe error when doing that here. | |
| 50 cmd = [mira_binary, "-v"] | |
| 51 try: | |
| 52 child = subprocess.Popen(cmd, | |
| 53 stdout=subprocess.PIPE, | |
| 54 stderr=subprocess.STDOUT) | |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
55 except Exception as err: |
| 25 | 56 sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) |
| 57 sys.exit(1) | |
| 58 ver, tmp = child.communicate() | |
| 59 del child | |
| 60 return ver.split("\n", 1)[0].strip() | |
| 61 | |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
62 |
|
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
63 # Parse Command Line |
| 25 | 64 usage = """Galaxy MIRA4 wrapper script v%s - use as follows: |
| 65 | |
| 66 $ python mira4_convert.py ... | |
| 67 | |
| 68 This will run the MIRA miraconvert binary and collect its output files as directed. | |
| 69 """ % WRAPPER_VER | |
| 70 parser = OptionParser(usage=usage) | |
| 71 parser.add_option("--input", dest="input", | |
| 72 default=None, metavar="FILE", | |
| 73 help="MIRA input filename") | |
| 74 parser.add_option("-x", "--min_length", dest="min_length", | |
| 75 default="0", | |
| 76 help="Minimum contig length") | |
| 77 parser.add_option("-y", "--min_cover", dest="min_cover", | |
| 78 default="0", | |
| 79 help="Minimum average contig coverage") | |
| 80 parser.add_option("-z", "--min_reads", dest="min_reads", | |
| 81 default="0", | |
| 82 help="Minimum reads per contig") | |
| 83 parser.add_option("--maf", dest="maf", | |
| 84 default="", metavar="FILE", | |
| 85 help="MIRA MAF output filename") | |
| 86 parser.add_option("--ace", dest="ace", | |
| 87 default="", metavar="FILE", | |
| 88 help="ACE output filename") | |
| 89 parser.add_option("--bam", dest="bam", | |
| 90 default="", metavar="FILE", | |
| 91 help="Unpadded BAM output filename") | |
| 92 parser.add_option("--fasta", dest="fasta", | |
| 93 default="", metavar="FILE", | |
| 94 help="Unpadded FASTA output filename") | |
| 95 parser.add_option("--cstats", dest="cstats", | |
| 96 default="", metavar="FILE", | |
| 97 help="Contig statistics filename") | |
| 98 parser.add_option("-v", "--version", dest="version", | |
| 99 default=False, action="store_true", | |
| 100 help="Show version and quit") | |
| 101 options, args = parser.parse_args() | |
| 102 if args: | |
|
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
103 sys.exit("Expected options (e.g. --input example.maf), not arguments") |
| 25 | 104 |
| 105 input_maf = options.input | |
| 106 out_maf = options.maf | |
| 107 out_bam = options.bam | |
| 108 out_fasta = options.fasta | |
| 109 out_ace = options.ace | |
| 110 out_cstats = options.cstats | |
| 111 | |
| 112 try: | |
| 113 mira_path = os.environ["MIRA4"] | |
| 114 except KeyError: | |
|
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
115 sys.exit("Environment variable $MIRA4 not set") |
| 25 | 116 mira_convert = os.path.join(mira_path, "miraconvert") |
| 117 if not os.path.isfile(mira_convert): | |
|
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
118 sys.exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s" |
| 25 | 119 % (mira_convert, ", ".join(os.listdir(mira_path)))) |
| 120 | |
| 121 mira_convert_ver = get_version(mira_convert) | |
| 122 if not mira_convert_ver.strip().startswith("4.0"): | |
|
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
123 sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_convert_ver, mira_convert)) |
| 25 | 124 if options.version: |
| 125 print("%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER)) | |
| 126 sys.exit(0) | |
| 127 | |
| 128 if not input_maf: | |
|
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
129 sys.exit("Input MIRA file is required") |
| 25 | 130 elif not os.path.isfile(input_maf): |
|
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
131 sys.exit("Missing input MIRA file: %r" % input_maf) |
| 25 | 132 |
| 133 if not (out_maf or out_bam or out_fasta or out_ace or out_cstats): | |
|
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
134 sys.exit("No output requested") |
| 25 | 135 |
| 136 | |
| 137 def check_min_int(value, name): | |
| 138 try: | |
| 139 i = int(value) | |
|
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
140 except ValueError: |
|
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
141 sys.exit("Bad %s setting, %r" % (name, value)) |
| 25 | 142 if i < 0: |
|
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
143 sys.exit("Negative %s setting, %r" % (name, value)) |
| 25 | 144 return i |
| 145 | |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
146 |
| 25 | 147 min_length = check_min_int(options.min_length, "minimum length") |
| 148 min_cover = check_min_int(options.min_cover, "minimum cover") | |
| 149 min_reads = check_min_int(options.min_reads, "minimum reads") | |
| 150 | |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
151 # TODO - Run MIRA in /tmp or a configurable directory? |
|
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
152 # Currently Galaxy puts us somewhere safe like: |
|
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
153 # /opt/galaxy-dist/database/job_working_directory/846/ |
| 25 | 154 temp = "." |
| 155 | |
| 156 | |
| 157 cmd_list = [mira_convert] | |
| 158 if min_length: | |
| 159 cmd_list.extend(["-x", str(min_length)]) | |
| 160 if min_cover: | |
| 161 cmd_list.extend(["-y", str(min_cover)]) | |
| 162 if min_reads: | |
| 163 cmd_list.extend(["-z", str(min_reads)]) | |
| 164 cmd_list.extend(["-f", "maf", input_maf, os.path.join(temp, "converted")]) | |
| 165 if out_maf: | |
| 166 cmd_list.append("maf") | |
| 167 if out_bam: | |
| 168 cmd_list.append("samnbb") | |
| 169 if not out_fasta: | |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
170 # Need this for samtools depad |
| 25 | 171 out_fasta = os.path.join(temp, "depadded.fasta") |
| 172 if out_fasta: | |
| 173 cmd_list.append("fasta") | |
| 174 if out_ace: | |
| 175 cmd_list.append("ace") | |
| 176 if out_cstats: | |
| 177 cmd_list.append("cstats") | |
| 178 run(cmd_list) | |
| 179 | |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
180 |
| 25 | 181 def collect(old, new): |
| 182 if not os.path.isfile(old): | |
|
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
183 sys.exit("Missing expected output file %s" % old) |
| 25 | 184 shutil.move(old, new) |
| 185 | |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
186 |
| 25 | 187 if out_maf: |
| 188 collect(os.path.join(temp, "converted.maf"), out_maf) | |
| 189 if out_fasta: | |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
190 # Can we look at the MAF file to see if there are multiple strains? |
| 25 | 191 old = os.path.join(temp, "converted_AllStrains.unpadded.fasta") |
| 192 if os.path.isfile(old): | |
| 193 collect(old, out_fasta) | |
| 194 else: | |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
195 # Might the output be filtered down to zero contigs? |
| 25 | 196 old = os.path.join(temp, "converted.fasta") |
| 197 if not os.path.isfile(old): | |
|
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
198 sys.exit("Missing expected output FASTA file") |
| 25 | 199 elif os.path.getsize(old) == 0: |
| 200 print("Warning - no contigs (harsh filters?)") | |
| 201 collect(old, out_fasta) | |
| 202 else: | |
|
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
203 sys.exit("Missing expected output FASTA file (only generic file present)") |
| 25 | 204 if out_ace: |
| 205 collect(os.path.join(temp, "converted.maf"), out_ace) | |
| 206 if out_cstats: | |
| 207 collect(os.path.join(temp, "converted_info_contigstats.txt"), out_cstats) | |
| 208 | |
| 209 if out_bam: | |
| 210 assert os.path.isfile(out_fasta) | |
| 211 old = os.path.join(temp, "converted.samnbb") | |
| 212 if not os.path.isfile(old): | |
| 213 old = os.path.join(temp, "converted.sam") | |
| 214 if not os.path.isfile(old): | |
|
31
fd95aaef8818
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit bc3d484c5cd68ddcf456db2fff489d584aa2034c
peterjc
parents:
25
diff
changeset
|
215 sys.exit("Missing expected intermediate file %s" % old) |
| 25 | 216 h = BytesIO() |
| 217 msg = depad(out_fasta, old, out_bam, h) | |
| 218 if msg: | |
| 219 print(msg) | |
| 220 print(h.getvalue()) | |
| 221 h.close() | |
| 222 sys.exit(1) | |
| 223 h.close() | |
| 224 if out_fasta == os.path.join(temp, "depadded.fasta"): | |
|
32
56b421d59805
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit fd979d17340cde155de176604744831d9597c6b6
peterjc
parents:
31
diff
changeset
|
225 # Not asked for by Galaxy, no longer needed |
| 25 | 226 os.remove(out_fasta) |
| 227 | |
| 228 if min_length or min_cover or min_reads: | |
| 229 print("Filtered.") | |
| 230 else: | |
| 231 print("Converted.") |
