diff gene_family_classifier.py @ 131:656614635ebf draft

Uploaded
author greg
date Thu, 06 Apr 2017 14:21:39 -0400
parents 7789adec8707
children b2ae23e484e8
line wrap: on
line diff
--- a/gene_family_classifier.py	Wed Mar 22 08:45:31 2017 -0400
+++ b/gene_family_classifier.py	Thu Apr 06 14:21:39 2017 -0400
@@ -3,8 +3,9 @@
 import os
 import shutil
 import subprocess
-import sys
-import tempfile
+
+import utils
+
 
 BUFF_SIZE = 1048576
 OUTPUT_DIR = 'geneFamilyClassification_dir'
@@ -34,79 +35,6 @@
 args = parser.parse_args()
 
 
-def get_stderr_exception(tmp_err, tmp_stderr, tmp_out, tmp_stdout, include_stdout=False):
-    tmp_stderr.close()
-    """
-    Return a stderr string of reasonable size.
-    """
-    # Get stderr, allowing for case where it's very large.
-    tmp_stderr = open(tmp_err, 'rb')
-    stderr_str = ''
-    buffsize = BUFF_SIZE
-    try:
-        while True:
-            stderr_str += tmp_stderr.read(buffsize)
-            if not stderr_str or len(stderr_str) % buffsize != 0:
-                break
-    except OverflowError:
-        pass
-    tmp_stderr.close()
-    if include_stdout:
-        tmp_stdout = open(tmp_out, 'rb')
-        stdout_str = ''
-        buffsize = BUFF_SIZE
-        try:
-            while True:
-                stdout_str += tmp_stdout.read(buffsize)
-                if not stdout_str or len(stdout_str) % buffsize != 0:
-                    break
-        except OverflowError:
-            pass
-    tmp_stdout.close()
-    if include_stdout:
-        return 'STDOUT\n%s\n\nSTDERR\n%s\n' % (stdout_str, stderr_str)
-    return stderr_str
-
-
-def move_directory_files(source_dir, destination_dir):
-    source_directory = os.path.abspath(source_dir)
-    destination_directory = os.path.abspath(destination_dir)
-    if not os.path.isdir(destination_directory):
-        os.makedirs(destination_directory)
-    for dir_entry in os.listdir(source_directory):
-        source_entry = os.path.join(source_directory, dir_entry)
-        shutil.move(source_entry, destination_directory)
-
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit(1)
-
-
-def write_html_output(output, title, dir):
-    with open(output, 'w') as fh:
-        fh.write('<html><head><h3>%s</h3></head>\n' % title)
-        fh.write('<body><p/><table cellpadding="2">\n')
-        fh.write('<tr><th>Size</th><th>Name</th></tr>\n')
-        for index, fname in enumerate(sorted(os.listdir(dir))):
-            if index % 2 == 0:
-                bgcolor = '#D8D8D8'
-            else:
-                bgcolor = '#FFFFFF'
-            try:
-                size = str(os.path.getsize(os.path.join(dir, fname)))
-            except:
-                size = 'unknown'
-            link = '<a href="%s" type="text/plain">%s</a>\n' % (fname, fname)
-            fh.write('<tr bgcolor="%s"><td>%s</td><td>%s</td></tr>\n' % (bgcolor, size, link))
-        fh.write('</table></body></html>\n')
-
-
-# Define command response buffers.
-tmp_out = tempfile.NamedTemporaryFile().name
-tmp_stdout = open(tmp_out, 'wb')
-tmp_err = tempfile.NamedTemporaryFile().name
-tmp_stderr = open(tmp_err, 'wb')
 # Build the command line.
 cmd = 'GeneFamilyClassifier'
 cmd += ' --proteins %s' % args.input
@@ -134,12 +62,9 @@
     create_corresponding_coding_sequences = True
     cmd += ' --coding_sequences %s' % args.coding_sequences
 # Run the command.
-proc = subprocess.Popen(args=cmd, stderr=tmp_stderr, stdout=tmp_stdout, shell=True)
+proc = subprocess.Popen(args=cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
 rc = proc.wait()
-# Handle execution errors.
-if rc != 0:
-    error_message = get_stderr_exception(tmp_err, tmp_stderr, tmp_out, tmp_stdout)
-    stop_err( error_message )
+utils.check_execution_errors(rc, proc.stderr)
 # Handle hmmscan.log output.
 if args.classifier in ['hmmscan', 'both']:
     src_hmmscan_log = os.path.join(OUTPUT_DIR, 'hmmscan.log')
@@ -159,12 +84,12 @@
         orthogroups_fasta_dest_dir = args.output_ptortho_dir
         title = 'Orthogroups files'
     orthogroups_fasta_src_dir = os.path.join(OUTPUT_DIR, 'orthogroups_fasta')
-    move_directory_files(orthogroups_fasta_src_dir, orthogroups_fasta_dest_dir)
-    write_html_output(out_file, title, orthogroups_fasta_dest_dir)
+    utils.move_directory_files(orthogroups_fasta_src_dir, orthogroups_fasta_dest_dir)
+    utils.write_html_output(out_file, title, orthogroups_fasta_dest_dir)
 # Handle single copy orthogroup outputs.
 if args.output_ptsco is not None:
     single_copy_fasta_src_dir = os.path.join(OUTPUT_DIR, 'single_copy_fasta')
     single_copy_fasta_dest_dir = args.output_ptsco_dir
     title = 'Single copy orthogroups files'
-    move_directory_files(single_copy_fasta_src_dir, single_copy_fasta_dest_dir)
-    write_html_output(args.output_ptsco, title, single_copy_fasta_dest_dir)
+    utils.move_directory_files(single_copy_fasta_src_dir, single_copy_fasta_dest_dir)
+    utils.write_html_output(args.output_ptsco, title, single_copy_fasta_dest_dir)