Mercurial > repos > peterjc > mira4_assembler
changeset 39:bbf14bb9607b draft default tip
planemo upload for repository https://github.com/peterjc/galaxy_mira/tree/master/tools/mira4_0 commit 89578746a1c5b29c84a173d8b2709f086f69a7b6
| author | peterjc | 
|---|---|
| date | Mon, 03 Jun 2019 13:29:00 -0400 | 
| parents | cee8f9005e43 | 
| children | |
| files | tools/mira4_0/mira4.py tools/mira4_0/mira4_bait.py tools/mira4_0/mira4_convert.py tools/mira4_0/mira4_make_bam.py tools/mira4_0/mira4_validator.py tools/mira4_0/repository_dependencies.xml tools/mira4_0/tool_dependencies.xml | 
| diffstat | 7 files changed, 245 insertions(+), 119 deletions(-) [+] | 
line wrap: on
 line diff
--- a/tools/mira4_0/mira4.py Wed Jul 11 12:35:35 2018 -0400 +++ b/tools/mira4_0/mira4.py Mon Jun 03 13:29:00 2019 -0400 @@ -24,9 +24,12 @@ # however there is some pipe error when doing that here. cmd = [mira_binary, "-v"] try: - child = subprocess.Popen(cmd, universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) + child = subprocess.Popen( + cmd, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) except Exception as err: sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) sys.exit(1) @@ -36,31 +39,56 @@ # Parse Command Line -usage = """Galaxy MIRA4 wrapper script v%s - use as follows: +usage = ( + """Galaxy MIRA4 wrapper script v%s - use as follows: $ python mira4.py ... This will run the MIRA binary and collect its output files as directed. -""" % WRAPPER_VER +""" + % WRAPPER_VER +) parser = OptionParser(usage=usage) -parser.add_option("-m", "--manifest", dest="manifest", - default=None, metavar="FILE", - help="MIRA manifest filename") -parser.add_option("--maf", dest="maf", - default="-", metavar="FILE", - help="MIRA MAF output filename") -parser.add_option("--bam", dest="bam", - default="-", metavar="FILE", - help="Unpadded BAM output filename") -parser.add_option("--fasta", dest="fasta", - default="-", metavar="FILE", - help="Unpadded FASTA output filename") -parser.add_option("--log", dest="log", - default="-", metavar="FILE", - help="MIRA logging output filename") -parser.add_option("-v", "--version", dest="version", - default=False, action="store_true", - help="Show version and quit") +parser.add_option( + "-m", + "--manifest", + dest="manifest", + default=None, + metavar="FILE", + help="MIRA manifest filename", +) +parser.add_option( + "--maf", dest="maf", default="-", metavar="FILE", help="MIRA MAF output filename" +) +parser.add_option( + "--bam", + dest="bam", + default="-", + metavar="FILE", + help="Unpadded BAM output filename", +) +parser.add_option( + "--fasta", + dest="fasta", + default="-", + metavar="FILE", + help="Unpadded FASTA output filename", +) +parser.add_option( + "--log", + dest="log", + default="-", + metavar="FILE", + help="MIRA logging output filename", +) +parser.add_option( + "-v", + "--version", + dest="version", + default=False, + action="store_true", + help="Show version and quit", +) options, args = parser.parse_args() manifest = options.manifest out_maf = options.maf @@ -72,14 +100,20 @@ mira_path = os.environ["MIRA4"] mira_binary = os.path.join(mira_path, "mira") if not os.path.isfile(mira_binary): - sys.exit("Missing mira under $MIRA4, %r\nFolder contained: %s" - % (mira_binary, ", ".join(os.listdir(mira_path)))) + sys.exit( + "Missing mira under $MIRA4, %r\nFolder contained: %s" + % (mira_binary, ", ".join(os.listdir(mira_path))) + ) mira_convert = os.path.join(mira_path, "miraconvert") if not os.path.isfile(mira_convert): - sys.exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s" - % (mira_convert, ", ".join(os.listdir(mira_path)))) + sys.exit( + "Missing miraconvert under $MIRA4, %r\nFolder contained: %s" + % (mira_convert, ", ".join(os.listdir(mira_path))) + ) else: - sys.stderr.write("DEBUG: Since $MIRA4 is not set, assuming mira binaries are on $PATH.\n") + sys.stderr.write( + "DEBUG: Since $MIRA4 is not set, assuming mira binaries are on $PATH.\n" + ) mira_path = None mira_binary = "mira" mira_convert = "miraconvert" @@ -178,8 +212,7 @@ ref_fasta = "%s/%s_out_ReferenceStrain.padded.fasta" % (f, name) missing = False - for old, new in [(old_maf, out_maf), - (old_fasta, out_fasta)]: + for old, new in [(old_maf, out_maf), (old_fasta, out_fasta)]: if not os.path.isfile(old): missing = True elif not new or new == "-": @@ -245,20 +278,22 @@ handle = open(out_log, "w") else: handle = open(os.devnull, "w") -handle.write("======================== MIRA manifest (instructions) ========================\n") +handle.write( + "======================== MIRA manifest (instructions) ========================\n" +) m = open(manifest, "rU") for line in m: handle.write(line) m.close() del m handle.write("\n") -handle.write("============================ Starting MIRA now ===============================\n") +handle.write( + "============================ Starting MIRA now ===============================\n" +) handle.flush() try: # Run MIRA - child = subprocess.Popen(cmd_list, - stdout=handle, - stderr=subprocess.STDOUT) + child = subprocess.Popen(cmd_list, stdout=handle, stderr=subprocess.STDOUT) except Exception as err: log_manifest(manifest) sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) @@ -272,7 +307,9 @@ run_time = time.time() - start_time return_code = child.returncode handle.write("\n") -handle.write("============================ MIRA has finished ===============================\n") +handle.write( + "============================ MIRA has finished ===============================\n" +) handle.write("MIRA took %0.2f hours\n" % (run_time / 3600.0)) if return_code: print("MIRA took %0.2f hours" % (run_time / 3600.0)) @@ -288,22 +325,30 @@ if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): handle.write("\n") - handle.write("====================== Extract Large Contigs failed ==========================\n") + handle.write( + "====================== Extract Large Contigs failed ==========================\n" # noqa: E501 + ) e = open("MIRA_assembly/MIRA_d_results/ec.log", "rU") for line in e: handle.write(line) e.close() - handle.write("============================ (end of ec.log) =================================\n") + handle.write( + "============================ (end of ec.log) =================================\n" # noqa: E501 + ) handle.flush() # print("Collecting output...") start_time = time.time() collect_output(temp, name, handle) collect_time = time.time() - start_time -handle.write("MIRA took %0.2f hours; collecting output %0.2f minutes\n" - % (run_time / 3600.0, collect_time / 60.0)) -print("MIRA took %0.2f hours; collecting output %0.2f minutes\n" - % (run_time / 3600.0, collect_time / 60.0)) +handle.write( + "MIRA took %0.2f hours; collecting output %0.2f minutes\n" + % (run_time / 3600.0, collect_time / 60.0) +) +print( + "MIRA took %0.2f hours; collecting output %0.2f minutes\n" + % (run_time / 3600.0, collect_time / 60.0) +) if os.path.isfile("MIRA_assembly/MIRA_d_results/ec.log"): # Treat as an error, but doing this AFTER collect_output
--- a/tools/mira4_0/mira4_bait.py Wed Jul 11 12:35:35 2018 -0400 +++ b/tools/mira4_0/mira4_bait.py Mon Jun 03 13:29:00 2019 -0400 @@ -18,9 +18,12 @@ # however there is some pipe error when doing that here. cmd = [mira_binary, "-v"] try: - child = subprocess.Popen(cmd, universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) + child = subprocess.Popen( + cmd, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) except Exception as err: sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) sys.exit(1) @@ -40,10 +43,14 @@ mira_path = os.environ["MIRA4"] mira_binary = os.path.join(mira_path, "mirabait") if not os.path.isfile(mira_binary): - sys.exit("Missing mirabait under $MIRA4, %r\nFolder contained: %s" - % (mira_binary, ", ".join(os.listdir(mira_path)))) + sys.exit( + "Missing mirabait under $MIRA4, %r\nFolder contained: %s" + % (mira_binary, ", ".join(os.listdir(mira_path))) + ) else: - sys.stderr.write("DEBUG: Since $MIRA4 is not set, assuming mira binaries are on $PATH.\n") + sys.stderr.write( + "DEBUG: Since $MIRA4 is not set, assuming mira binaries are on $PATH.\n" + ) mira_path = None mira_binary = "mirabait" @@ -55,7 +62,16 @@ sys.exit(0) -format, output_choice, strand_choice, kmer_length, min_occurance, bait_file, in_file, out_file = sys.argv[1:] +( + format, + output_choice, + strand_choice, + kmer_length, + min_occurance, + bait_file, + in_file, + out_file, +) = sys.argv[1:] if format.startswith("fastq"): format = "fastq" @@ -67,9 +83,20 @@ assert out_file.endswith(".dat") out_file_stem = out_file[:-4] -cmd_list = [mira_binary, "-f", format, "-t", format, - "-k", kmer_length, "-n", min_occurance, - bait_file, in_file, out_file_stem] +cmd_list = [ + mira_binary, + "-f", + format, + "-t", + format, + "-k", + kmer_length, + "-n", + min_occurance, + bait_file, + in_file, + out_file_stem, +] if output_choice == "pos": pass elif output_choice == "neg": @@ -90,9 +117,12 @@ start_time = time.time() try: # Run MIRA - child = subprocess.Popen(cmd_list, universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) + child = subprocess.Popen( + cmd_list, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) except Exception as err: sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) sys.exit(1) @@ -105,8 +135,7 @@ if return_code: sys.stderr.write(stdout) - sys.exit("Return error code %i from command:\n%s" % (return_code, cmd), - return_code) + sys.exit("Return error code %i from command:\n%s" % (return_code, cmd), return_code) # Capture output out_tmp = out_file_stem + "." + format
--- a/tools/mira4_0/mira4_convert.py Wed Jul 11 12:35:35 2018 -0400 +++ b/tools/mira4_0/mira4_convert.py Mon Jun 03 13:29:00 2019 -0400 @@ -29,8 +29,9 @@ # Avoid using shell=True when we call subprocess to ensure if the Python # script is killed, so too is the child process. try: - child = subprocess.Popen(cmd, universal_newlines=True, - stdout=subprocess.PIPE, stderr=subprocess.PIPE) + child = subprocess.Popen( + cmd, universal_newlines=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) except Exception as err: sys.exit("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) # Use .communicate as can get deadlocks with .wait(), @@ -39,9 +40,14 @@ if return_code: cmd_str = " ".join(cmd) # doesn't quote spaces etc if stderr and stdout: - sys.exit("Return code %i from command:\n%s\n\n%s\n\n%s" % (return_code, cmd_str, stdout, stderr)) + sys.exit( + "Return code %i from command:\n%s\n\n%s\n\n%s" + % (return_code, cmd_str, stdout, stderr) + ) else: - sys.exit("Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr)) + sys.exit( + "Return code %i from command:\n%s\n%s" % (return_code, cmd_str, stderr) + ) def get_version(mira_binary): @@ -50,9 +56,7 @@ # however there is some pipe error when doing that here. cmd = [mira_binary, "-v"] try: - child = subprocess.Popen(cmd, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) + child = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) except Exception as err: sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (" ".join(cmd), err)) sys.exit(1) @@ -62,43 +66,63 @@ # Parse Command Line -usage = """Galaxy MIRA4 wrapper script v%s - use as follows: +usage = ( + """Galaxy MIRA4 wrapper script v%s - use as follows: $ python mira4_convert.py ... This will run the MIRA miraconvert binary and collect its output files as directed. -""" % WRAPPER_VER +""" + % WRAPPER_VER +) parser = OptionParser(usage=usage) -parser.add_option("--input", dest="input", - default=None, metavar="FILE", - help="MIRA input filename") -parser.add_option("-x", "--min_length", dest="min_length", - default="0", - help="Minimum contig length") -parser.add_option("-y", "--min_cover", dest="min_cover", - default="0", - help="Minimum average contig coverage") -parser.add_option("-z", "--min_reads", dest="min_reads", - default="0", - help="Minimum reads per contig") -parser.add_option("--maf", dest="maf", - default="", metavar="FILE", - help="MIRA MAF output filename") -parser.add_option("--ace", dest="ace", - default="", metavar="FILE", - help="ACE output filename") -parser.add_option("--bam", dest="bam", - default="", metavar="FILE", - help="Unpadded BAM output filename") -parser.add_option("--fasta", dest="fasta", - default="", metavar="FILE", - help="Unpadded FASTA output filename") -parser.add_option("--cstats", dest="cstats", - default="", metavar="FILE", - help="Contig statistics filename") -parser.add_option("-v", "--version", dest="version", - default=False, action="store_true", - help="Show version and quit") +parser.add_option( + "--input", dest="input", default=None, metavar="FILE", help="MIRA input filename" +) +parser.add_option( + "-x", "--min_length", dest="min_length", default="0", help="Minimum contig length" +) +parser.add_option( + "-y", + "--min_cover", + dest="min_cover", + default="0", + help="Minimum average contig coverage", +) +parser.add_option( + "-z", "--min_reads", dest="min_reads", default="0", help="Minimum reads per contig" +) +parser.add_option( + "--maf", dest="maf", default="", metavar="FILE", help="MIRA MAF output filename" +) +parser.add_option( + "--ace", dest="ace", default="", metavar="FILE", help="ACE output filename" +) +parser.add_option( + "--bam", dest="bam", default="", metavar="FILE", help="Unpadded BAM output filename" +) +parser.add_option( + "--fasta", + dest="fasta", + default="", + metavar="FILE", + help="Unpadded FASTA output filename", +) +parser.add_option( + "--cstats", + dest="cstats", + default="", + metavar="FILE", + help="Contig statistics filename", +) +parser.add_option( + "-v", + "--version", + dest="version", + default=False, + action="store_true", + help="Show version and quit", +) options, args = parser.parse_args() if args: sys.exit("Expected options (e.g. --input example.maf), not arguments") @@ -114,16 +138,22 @@ mira_path = os.environ["MIRA4"] mira_convert = os.path.join(mira_path, "miraconvert") if not os.path.isfile(mira_convert): - sys.exit("Missing miraconvert under $MIRA4, %r\nFolder contained: %s" - % (mira_convert, ", ".join(os.listdir(mira_path)))) + sys.exit( + "Missing miraconvert under $MIRA4, %r\nFolder contained: %s" + % (mira_convert, ", ".join(os.listdir(mira_path))) + ) else: - sys.stderr.write("DEBUG: Since $MIRA4 is not set, assuming mira binaries are on $PATH.\n") + sys.stderr.write( + "DEBUG: Since $MIRA4 is not set, assuming mira binaries are on $PATH.\n" + ) mira_path = None mira_convert = "miraconvert" mira_convert_ver = get_version(mira_convert) if not mira_convert_ver.strip().startswith("4.0"): - sys.exit("This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_convert_ver, mira_convert)) + sys.exit( + "This wrapper is for MIRA V4.0, not:\n%s\n%s" % (mira_convert_ver, mira_convert) + ) if options.version: print("%s, MIRA wrapper version %s" % (mira_convert_ver, WRAPPER_VER)) sys.exit(0)
--- a/tools/mira4_0/mira4_make_bam.py Wed Jul 11 12:35:35 2018 -0400 +++ b/tools/mira4_0/mira4_make_bam.py Mon Jun 03 13:29:00 2019 -0400 @@ -10,10 +10,13 @@ def run(cmd, log_handle): try: - child = subprocess.Popen(cmd, shell=True, - universal_newlines=True, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT) + child = subprocess.Popen( + cmd, + shell=True, + universal_newlines=True, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) except Exception as err: sys.stderr.write("Error invoking command:\n%s\n\n%s\n" % (cmd, err)) # TODO - call clean up? @@ -32,17 +35,27 @@ def depad(fasta_file, sam_file, bam_file, log_handle): - log_handle.write("\n================= Converting MIRA assembly from SAM to BAM ===================\n") + log_handle.write( + "\n================= Converting MIRA assembly from SAM to BAM ===================\n" # noqa: E501 + ) # Also doing SAM to (uncompressed) BAM during depad - bam_stem = bam_file + ".tmp" # Have write permissions and want final file in this folder - cmd = 'samtools depad -S -u -T "%s" "%s" | samtools sort - "%s"' % (fasta_file, sam_file, bam_stem) + bam_stem = ( + bam_file + ".tmp" + ) # Have write permissions and want final file in this folder + cmd = 'samtools depad -S -u -T "%s" "%s" | samtools sort - "%s"' % ( + fasta_file, + sam_file, + bam_stem, + ) return_code = run(cmd, log_handle) if return_code: return "Error %i from command:\n%s" % (return_code, cmd) if not os.path.isfile(bam_stem + ".bam"): return "samtools depad or sort failed to produce BAM file" - log_handle.write("\n====================== Indexing MIRA assembly BAM file =======================\n") + log_handle.write( + "\n====================== Indexing MIRA assembly BAM file =======================\n" # noqa: E501 + ) cmd = 'samtools index "%s.bam"' % bam_stem return_code = run(cmd, log_handle) if return_code: @@ -60,7 +73,9 @@ if not os.path.isfile(fasta_file): return "Missing padded FASTA file: %r" % fasta_file - log_handle.write("\n====================== Converting MIRA assembly to SAM =======================\n") + log_handle.write( + "\n====================== Converting MIRA assembly to SAM =======================\n" # noqa: E501 + ) tmp_dir = tempfile.mkdtemp() sam_file = os.path.join(tmp_dir, "x.sam")
--- a/tools/mira4_0/mira4_validator.py Wed Jul 11 12:35:35 2018 -0400 +++ b/tools/mira4_0/mira4_validator.py Mon Jun 03 13:29:00 2019 -0400 @@ -36,7 +36,9 @@ try: min_size_int = int(min_size) if min_size_int < 0: - err["min_size"] = "Minumum size must not be negative (%i)" % min_size_int + err["min_size"] = ( + "Minumum size must not be negative (%i)" % min_size_int + ) min_size = None # Avoid doing comparison below except ValueError: err["min_size"] = "Minimum size is not an integer (%s)" % min_size @@ -46,14 +48,19 @@ try: max_size_int = int(max_size) if max_size_int < 0: - err["max_size"] = "Maximum size must not be negative (%i)" % max_size_int + err["max_size"] = ( + "Maximum size must not be negative (%i)" % max_size_int + ) max_size = None # Avoid doing comparison below except ValueError: err["max_size"] = "Maximum size is not an integer (%s)" % max_size max_size = None # Avoid doing comparison below if min_size and max_size and min_size_int > max_size_int: - msg = "Minimum size must be less than maximum size (%i vs %i)" % (min_size_int, max_size_int) + msg = "Minimum size must be less than maximum size (%i vs %i)" % ( + min_size_int, + max_size_int, + ) err["min_size"] = msg err["max_size"] = msg
--- a/tools/mira4_0/repository_dependencies.xml Wed Jul 11 12:35:35 2018 -0400 +++ b/tools/mira4_0/repository_dependencies.xml Mon Jun 03 13:29:00 2019 -0400 @@ -1,4 +1,4 @@ -<?xml version="1.0"?> +<?xml version="1.0" ?> <repositories description="This requires the MIRA datatype definitions (e.g. the MIRA Assembly Format)."> - <repository changeset_revision="2bd1f1175fb2" name="mira_datatypes" owner="peterjc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> -</repositories> + <repository changeset_revision="65ea1ef0181d" name="mira_datatypes" owner="peterjc" toolshed="https://testtoolshed.g2.bx.psu.edu"/> +</repositories> \ No newline at end of file
--- a/tools/mira4_0/tool_dependencies.xml Wed Jul 11 12:35:35 2018 -0400 +++ b/tools/mira4_0/tool_dependencies.xml Mon Jun 03 13:29:00 2019 -0400 @@ -1,9 +1,9 @@ -<?xml version="1.0"?> +<?xml version="1.0" ?> <tool_dependency> <package name="samtools" version="0.1.19"> - <repository changeset_revision="a0ab0fae27e5" name="package_samtools_0_1_19" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="a0ab0fae27e5" name="package_samtools_0_1_19" owner="iuc" toolshed="https://testtoolshed.g2.bx.psu.edu"/> </package> <package name="MIRA" version="4.0.2"> - <repository changeset_revision="b4efe6c4acf7" name="package_mira_4_0_2" owner="peterjc" toolshed="https://testtoolshed.g2.bx.psu.edu" /> + <repository changeset_revision="b4efe6c4acf7" name="package_mira_4_0_2" owner="peterjc" toolshed="https://testtoolshed.g2.bx.psu.edu"/> </package> -</tool_dependency> +</tool_dependency> \ No newline at end of file
