tmhmm_and_signalp: tools/protein_analysis/seq_analysis

comparison tools/protein_analysis/seq_analysis_utils.py @ 30:6d9d7cdf00fc draft

v0.2.11 Job splitting fast-fail; RXLR tools supports HMMER2 from BioConda; Capture more version information; misc internal changes

author	peterjc
date	Thu, 21 Sep 2017 11:15:55 -0400
parents	3cb02adf4326
children	20da7f48b56f

comparison

equal deleted inserted replaced

-:3cb02adf4326
+:6d9d7cdf00fc
 Given Galaxy currently supports Python 2.4+ this cannot use the Python module
 multiprocessing so the function run_jobs instead is a simple pool approach
 using just the subprocess library.
 """
-import sys
+from __future__ import print_function
 import os
 import subprocess
+import sys
 from time import sleep
 __version__ = "0.0.2"
 try:
 else:
 raise NotImplementedError('cannot determine number of cpus')
 def thread_count(command_line_arg, default=1):
+"""Determine number of threads to use from the command line args."""
 try:
 num = int(command_line_arg)
 except ValueError:
 num = default
 if num < 1:
 for i in range(0, len(seq), 60):
 handle.write(seq[i:i + 60] + "\n")
 handle.close()
 files.append(new_filename)
 # print "%i records in %s" % (len(records), new_filename)
-except ValueError, err:
+except ValueError as err:
 # Max length failure from parser - clean up
 try:
 handle.close()
 except Exception:
 pass
 for f in files:
 assert os.path.isfile(f), "Missing split file %r (!??)" % f
 return files
-def run_jobs(jobs, threads, pause=10, verbose=False):
+def run_jobs(jobs, threads, pause=10, verbose=False, fast_fail=True):
 """Takes list of cmd strings, returns dict with error levels."""
 pending = jobs[:]
 running = []
 results = {}
+skipped = []
 if threads == 1:
 # Special case this for speed, don't need the waits
 for cmd in jobs:
 results[cmd] = subprocess.call(cmd, shell=True)
 return results
+failed = False
 while pending or running:
 # See if any have finished
 for (cmd, process) in running:
 return_code = process.poll()  # non-blocking
 if return_code is not None:
 results[cmd] = return_code
+if return_code:
+failed = True
 running = [(cmd, process) for (cmd, process) in running
 if cmd not in results]
 if verbose:
-print "%i jobs pending, %i running, %i completed" \
+print("%i jobs pending, %i running, %i completed" %
-% (len(pending), len(running), len(results))
+(len(pending), len(running), len(results)))
 # See if we can start any new threads
+if pending and failed and fast_fail:
+# Don't start any more jobs
+if verbose:
+print("Failed, will not start remaining %i jobs" % len(pending))
+skipped = pending
+pending = []
 while pending and len(running) < threads:
 cmd = pending.pop(0)
 if verbose:
-print cmd
+print(cmd)
 process = subprocess.Popen(cmd, shell=True)
 running.append((cmd, process))
 # Loop...
 sleep(pause)
 if verbose:
-print "%i jobs completed" % len(results)
+print("%i jobs completed" % len(results))
-assert set(jobs) == set(results)
+assert set(jobs) == set(results).union(skipped)
 return results

Mercurial > repos > peterjc > tmhmm_and_signalp

comparison tools/protein_analysis/seq_analysis_utils.py @ 30:6d9d7cdf00fc draft