| 
25
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 """A simple wrapper script to call MIRA and collect its output.
 | 
| 
 | 
     3 
 | 
| 
 | 
     4 This focuses on the miraconvert binary.
 | 
| 
 | 
     5 """
 | 
| 
 | 
     6 import os
 | 
| 
 | 
     7 import sys
 | 
| 
 | 
     8 import subprocess
 | 
| 
 | 
     9 import shutil
 | 
| 
 | 
    10 import time
 | 
| 
 | 
    11 import tempfile
 | 
| 
 | 
    12 from optparse import OptionParser
 | 
| 
 | 
    13 try:
 | 
| 
 | 
    14     from io import BytesIO
 | 
| 
 | 
    15 except ImportError:
 | 
| 
 | 
    16     #Should we worry about Python 2.5 or older?
 | 
| 
 | 
    17     from StringIO import StringIO as BytesIO
 | 
| 
 | 
    18 
 | 
| 
 | 
    19 #Do we need any PYTHONPATH magic?
 | 
| 
 | 
    20 from mira4_make_bam import depad
 | 
| 
 | 
    21 
 | 
| 
 | 
    22 WRAPPER_VER = "0.0.7"  # Keep in sync with the XML file
 | 
| 
 | 
    23 
 | 
| 
 | 
    24 def sys_exit(msg, err=1):
 | 
| 
 | 
    25     sys.stderr.write(msg+"\n")
 | 
| 
 | 
    26     sys.exit(err)
 | 
| 
 | 
    27 
 | 
| 
 | 
    28 def run(cmd):
 | 
| 
 | 
    29     #Avoid using shell=True when we call subprocess to ensure if the Python
 | 
| 
 | 
    30     #script is killed, so too is the child process.
 | 
| 
 | 
    31     try:
 | 
| 
 | 
    32         child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
 | 
| 
 | 
    33     except Exception, err:
 | 
| 
 | 
    34         sys_exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
 | 
| 
 | 
    35     #Use .communicate as can get deadlocks with .wait(),
 | 
| 
 | 
    36     stdout, stderr = child.communicate()
 | 
| 
 | 
    37     return_code = child.returncode
 | 
| 
 | 
    38     if return_code:
 | 
| 
 | 
    39         cmd_str = " ".join(cmd)  # doesn't quote spaces etc
 | 
| 
 | 
    40         if stderr and stdout:
 | 
| 
 | 
    41             sys_exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr))
 | 
| 
 | 
    42         else:
 | 
| 
 | 
    43             sys_exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr))
 | 
| 
 | 
    44 
 | 
| 
 | 
    45 def get_version(mira_binary):
 | 
| 
 | 
    46     """Run MIRA to find its version number"""
 | 
| 
 | 
    47     # At the commend line I would use: mira -v | head -n 1
 | 
| 
 | 
    48     # however there is some pipe error when doing that here.
 | 
| 
 | 
    49     cmd = [mira_binary, "-v"]
 | 
| 
 | 
    50     try:
 | 
| 
 | 
    51         child = subprocess.Popen(cmd,
 | 
| 
 | 
    52                                  stdout=subprocess.PIPE,
 | 
| 
 | 
    53                                  stderr=subprocess.STDOUT)
 | 
| 
 | 
    54     except Exception, err:
 | 
| 
 | 
    55         sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err))
 | 
| 
 | 
    56         sys.exit(1)
 | 
| 
 | 
    57     ver, tmp = child.communicate()
 | 
| 
 | 
    58     del child
 | 
| 
 | 
    59     return ver.split("\n", 1)[0].strip()
 | 
| 
 | 
    60 
 | 
| 
 | 
    61 #Parse Command Line
 | 
| 
 | 
    62 usage = """Galaxy MIRA4 wrapper script v%s - use as follows:
 | 
| 
 | 
    63 
 | 
| 
 | 
    64 $ python mira4_convert.py ...
 | 
| 
 | 
    65 
 | 
| 
 | 
    66 This will run the MIRA miraconvert binary and collect its output files as directed.
 | 
| 
 | 
    67 """ % WRAPPER_VER
 | 
| 
 | 
    68 parser = OptionParser(usage=usage)
 | 
| 
 | 
    69 parser.add_option("--input", dest="input",
 | 
| 
 | 
    70                   default=None, metavar="FILE",
 | 
| 
 | 
    71                   help="MIRA input filename")
 | 
| 
 | 
    72 parser.add_option("-x", "--min_length", dest="min_length",
 | 
| 
 | 
    73                   default="0",
 | 
| 
 | 
    74                   help="Minimum contig length")
 | 
| 
 | 
    75 parser.add_option("-y", "--min_cover", dest="min_cover",
 | 
| 
 | 
    76                   default="0",
 | 
| 
 | 
    77                   help="Minimum average contig coverage")
 | 
| 
 | 
    78 parser.add_option("-z", "--min_reads", dest="min_reads",
 | 
| 
 | 
    79                   default="0",
 | 
| 
 | 
    80                   help="Minimum reads per contig")
 | 
| 
 | 
    81 parser.add_option("--maf", dest="maf",
 | 
| 
 | 
    82                   default="", metavar="FILE",
 | 
| 
 | 
    83                   help="MIRA MAF output filename")
 | 
| 
 | 
    84 parser.add_option("--ace", dest="ace",
 | 
| 
 | 
    85                   default="", metavar="FILE",
 | 
| 
 | 
    86                   help="ACE output filename")
 | 
| 
 | 
    87 parser.add_option("--bam", dest="bam",
 | 
| 
 | 
    88                   default="", metavar="FILE",
 | 
| 
 | 
    89                   help="Unpadded BAM output filename")
 | 
| 
 | 
    90 parser.add_option("--fasta", dest="fasta",
 | 
| 
 | 
    91                   default="", metavar="FILE",
 | 
| 
 | 
    92                   help="Unpadded FASTA output filename")
 | 
| 
 | 
    93 parser.add_option("--cstats", dest="cstats",
 | 
| 
 | 
    94                   default="", metavar="FILE",
 | 
| 
 | 
    95                   help="Contig statistics filename")
 | 
| 
 | 
    96 parser.add_option("-v", "--version", dest="version",
 | 
| 
 | 
    97                   default=False, action="store_true",
 | 
| 
 | 
    98                   help="Show version and quit")
 | 
| 
 | 
    99 options, args = parser.parse_args()
 | 
| 
 | 
   100 if args:
 | 
| 
 | 
   101     sys_exit("Expected options (e.g. --input example.maf), not arguments")
 | 
| 
 | 
   102 
 | 
| 
 | 
   103 input_maf = options.input
 | 
| 
 | 
   104 out_maf = options.maf
 | 
| 
 | 
   105 out_bam = options.bam
 | 
| 
 | 
   106 out_fasta = options.fasta
 | 
| 
 | 
   107 out_ace = options.ace
 | 
| 
 | 
   108 out_cstats = options.cstats
 | 
| 
 | 
   109 
 | 
| 
 | 
   110 try:
 | 
| 
 | 
   111     mira_path = os.environ["MIRA4"]
 | 
| 
 | 
   112 except KeyError:
 | 
| 
 | 
   113     sys_exit("Environment variable $MIRA4 not set")
 | 
| 
 | 
   114 mira_convert = os.path.join(mira_path, "miraconvert")
 | 
| 
 | 
   115 if not os.path.isfile(mira_convert):
 | 
| 
 | 
   116     sys_exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s"
 | 
| 
 | 
   117              % (mira_convert, ", ".join(os.listdir(mira_path))))
 | 
| 
 | 
   118 
 | 
| 
 | 
   119 mira_convert_ver = get_version(mira_convert)
 | 
| 
 | 
   120 if not mira_convert_ver.strip().startswith("4.0"):
 | 
| 
 | 
   121     sys_exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_convert_ver, mira_convert))
 | 
| 
 | 
   122 if options.version:
 | 
| 
 | 
   123     print("%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER))
 | 
| 
 | 
   124     sys.exit(0)
 | 
| 
 | 
   125 
 | 
| 
 | 
   126 if not input_maf:
 | 
| 
 | 
   127     sys_exit("Input MIRA file is required")
 | 
| 
 | 
   128 elif not os.path.isfile(input_maf):
 | 
| 
 | 
   129     sys_exit("Missing input MIRA file: %r" % input_maf)
 | 
| 
 | 
   130 
 | 
| 
 | 
   131 if not (out_maf or out_bam or out_fasta or out_ace or out_cstats):
 | 
| 
 | 
   132     sys_exit("No output requested")
 | 
| 
 | 
   133 
 | 
| 
 | 
   134 
 | 
| 
 | 
   135 def check_min_int(value, name):
 | 
| 
 | 
   136     try:
 | 
| 
 | 
   137         i = int(value)
 | 
| 
 | 
   138     except:
 | 
| 
 | 
   139         sys_exit("Bad %s setting, %r" % (name, value))
 | 
| 
 | 
   140     if i < 0:
 | 
| 
 | 
   141         sys_exit("Negative %s setting, %r" % (name, value))
 | 
| 
 | 
   142     return i
 | 
| 
 | 
   143 
 | 
| 
 | 
   144 min_length = check_min_int(options.min_length, "minimum length")
 | 
| 
 | 
   145 min_cover = check_min_int(options.min_cover, "minimum cover")
 | 
| 
 | 
   146 min_reads = check_min_int(options.min_reads, "minimum reads")
 | 
| 
 | 
   147 
 | 
| 
 | 
   148 #TODO - Run MIRA in /tmp or a configurable directory?
 | 
| 
 | 
   149 #Currently Galaxy puts us somewhere safe like:
 | 
| 
 | 
   150 #/opt/galaxy-dist/database/job_working_directory/846/
 | 
| 
 | 
   151 temp = "."
 | 
| 
 | 
   152 
 | 
| 
 | 
   153 
 | 
| 
 | 
   154 cmd_list = [mira_convert]
 | 
| 
 | 
   155 if min_length:
 | 
| 
 | 
   156     cmd_list.extend(["-x", str(min_length)])
 | 
| 
 | 
   157 if min_cover:
 | 
| 
 | 
   158     cmd_list.extend(["-y", str(min_cover)])
 | 
| 
 | 
   159 if min_reads:
 | 
| 
 | 
   160     cmd_list.extend(["-z", str(min_reads)])
 | 
| 
 | 
   161 cmd_list.extend(["-f", "maf", input_maf, os.path.join(temp, "converted")])
 | 
| 
 | 
   162 if out_maf:
 | 
| 
 | 
   163     cmd_list.append("maf")
 | 
| 
 | 
   164 if out_bam:
 | 
| 
 | 
   165     cmd_list.append("samnbb")
 | 
| 
 | 
   166     if not out_fasta:
 | 
| 
 | 
   167         #Need this for samtools depad
 | 
| 
 | 
   168         out_fasta = os.path.join(temp, "depadded.fasta")
 | 
| 
 | 
   169 if out_fasta:
 | 
| 
 | 
   170     cmd_list.append("fasta")
 | 
| 
 | 
   171 if out_ace:
 | 
| 
 | 
   172     cmd_list.append("ace")
 | 
| 
 | 
   173 if out_cstats:
 | 
| 
 | 
   174     cmd_list.append("cstats")
 | 
| 
 | 
   175 run(cmd_list)
 | 
| 
 | 
   176 
 | 
| 
 | 
   177 def collect(old, new):
 | 
| 
 | 
   178     if not os.path.isfile(old):
 | 
| 
 | 
   179         sys_exit("Missing expected output file %s" % old)
 | 
| 
 | 
   180     shutil.move(old, new)
 | 
| 
 | 
   181 
 | 
| 
 | 
   182 if out_maf:
 | 
| 
 | 
   183     collect(os.path.join(temp, "converted.maf"), out_maf)
 | 
| 
 | 
   184 if out_fasta:
 | 
| 
 | 
   185     #Can we look at the MAF file to see if there are multiple strains?
 | 
| 
 | 
   186     old = os.path.join(temp, "converted_AllStrains.unpadded.fasta")
 | 
| 
 | 
   187     if os.path.isfile(old):
 | 
| 
 | 
   188         collect(old, out_fasta)
 | 
| 
 | 
   189     else:
 | 
| 
 | 
   190         #Might the output be filtered down to zero contigs?
 | 
| 
 | 
   191         old = os.path.join(temp, "converted.fasta")
 | 
| 
 | 
   192         if not os.path.isfile(old):
 | 
| 
 | 
   193             sys_exit("Missing expected output FASTA file")
 | 
| 
 | 
   194         elif os.path.getsize(old) == 0:
 | 
| 
 | 
   195             print("Warning - no contigs (harsh filters?)")
 | 
| 
 | 
   196             collect(old, out_fasta)
 | 
| 
 | 
   197         else:
 | 
| 
 | 
   198             sys_exit("Missing expected output FASTA file (only generic file present)")
 | 
| 
 | 
   199 if out_ace:
 | 
| 
 | 
   200     collect(os.path.join(temp, "converted.maf"), out_ace)
 | 
| 
 | 
   201 if out_cstats:
 | 
| 
 | 
   202     collect(os.path.join(temp, "converted_info_contigstats.txt"), out_cstats)
 | 
| 
 | 
   203 
 | 
| 
 | 
   204 if out_bam:
 | 
| 
 | 
   205     assert os.path.isfile(out_fasta)
 | 
| 
 | 
   206     old = os.path.join(temp, "converted.samnbb")
 | 
| 
 | 
   207     if not os.path.isfile(old):
 | 
| 
 | 
   208         old = os.path.join(temp, "converted.sam")
 | 
| 
 | 
   209     if not os.path.isfile(old):
 | 
| 
 | 
   210         sys_exit("Missing expected intermediate file %s" % old)
 | 
| 
 | 
   211     h = BytesIO()
 | 
| 
 | 
   212     msg = depad(out_fasta, old, out_bam, h)
 | 
| 
 | 
   213     if msg:
 | 
| 
 | 
   214         print(msg)
 | 
| 
 | 
   215         print(h.getvalue())
 | 
| 
 | 
   216         h.close()
 | 
| 
 | 
   217         sys.exit(1)
 | 
| 
 | 
   218     h.close()
 | 
| 
 | 
   219     if out_fasta == os.path.join(temp, "depadded.fasta"):
 | 
| 
 | 
   220         #Not asked for by Galaxy, no longer needed
 | 
| 
 | 
   221         os.remove(out_fasta)
 | 
| 
 | 
   222 
 | 
| 
 | 
   223 if min_length or min_cover or min_reads:
 | 
| 
 | 
   224     print("Filtered.")
 | 
| 
 | 
   225 else:
 | 
| 
 | 
   226     print("Converted.")
 |