diff tools/protein_analysis/tmhmm2.py @ 7:5e62aefb2918 draft

Uploaded v0.1.2 to Test Tool Shed
author peterjc
date Tue, 26 Mar 2013 14:24:56 -0400
parents 747cec3192d3
children 391a142c1e60
line wrap: on
line diff
--- a/tools/protein_analysis/tmhmm2.py	Tue Jun 07 17:41:38 2011 -0400
+++ b/tools/protein_analysis/tmhmm2.py	Tue Mar 26 14:24:56 2013 -0400
@@ -1,10 +1,10 @@
 #!/usr/bin/env python
 """Wrapper for TMHMM v2.0 for use in Galaxy.
 
-This script takes exactly two command line arguments - an input protein FASTA
-filename and an output tabular filename. It then calls the standalone TMHMM
-v2.0 program (not the webservice) requesting the short output (one line per
-protein).
+This script takes exactly three command line arguments - number of threads,
+an input protein FASTA filename, and an output tabular filename. It then
+calls the standalone TMHMM v2.0 program (not the webservice) requesting
+the short output (one line per protein).
 
 The first major feature is cleaning up the tabular output. The short form raw
 output from TMHMM v2.0 looks like this (six columns tab separated):
@@ -33,27 +33,29 @@
 use Python's multiprocessing library in this situation but it requires at
 least Python 2.6 and at the time of writing Galaxy still supports Python 2.4.
 
+Note that this is somewhat redundant with job-splitting available in Galaxy
+itself (see the SignalP XML file for settings).
+
 Also tmhmm2 can fail without returning an error code, for example if run on a
 64 bit machine with only the 32 bit binaries installed. This script will spot
 when there is no output from tmhmm2, and raise an error.
 """
 import sys
 import os
-from seq_analysis_utils import stop_err, split_fasta, run_jobs
+import tempfile
+from seq_analysis_utils import stop_err, split_fasta, run_jobs, thread_count
 
 FASTA_CHUNK = 500
 
 if len(sys.argv) != 4:
    stop_err("Require three arguments, number of threads (int), input protein FASTA file & output tabular file")
-try:
-   num_threads = int(sys.argv[1])
-except:
-   num_threads = 0
-if num_threads < 1:
-   stop_err("Threads argument %s is not a positive integer" % sys.argv[1])
+
+num_threads = thread_count(sys.argv[1], default=4)
 fasta_file = sys.argv[2]
 tabular_file = sys.argv[3]
 
+tmp_dir = tempfile.mkdtemp()
+
 def clean_tabular(raw_handle, out_handle):
     """Clean up tabular TMHMM output, returns output line count."""
     count = 0
@@ -84,7 +86,7 @@
 
 #Note that if the input FASTA file contains no sequences,
 #split_fasta returns an empty list (i.e. zero temp files).
-fasta_files = split_fasta(fasta_file, tabular_file, FASTA_CHUNK)
+fasta_files = split_fasta(fasta_file, os.path.join(tmp_dir, "tmhmm"), FASTA_CHUNK)
 temp_files = [f+".out" for f in fasta_files]
 jobs = ["tmhmm -short %s > %s" % (fasta, temp)
         for fasta, temp in zip(fasta_files, temp_files)]
@@ -93,6 +95,10 @@
     for f in file_list:
         if os.path.isfile(f):
             os.remove(f)
+    try:
+        os.rmdir(tmp_dir)
+    except:
+        pass
 
 if len(jobs) > 1 and num_threads > 1:
     #A small "info" message for Galaxy to show the user.
@@ -105,8 +111,7 @@
             output = open(temp).readline()
         except IOError:
             output = ""
-        clean_up(fasta_files)
-        clean_up(temp_files)
+        clean_up(fasta_files + temp_files)
         stop_err("One or more tasks failed, e.g. %i from %r gave:\n%s" % (error_level, cmd, output),
                  error_level)
 del results
@@ -119,10 +124,8 @@
     count = clean_tabular(data_handle, out_handle)
     data_handle.close()
     if not count:
-        clean_up(fasta_files)
-        clean_up(temp_files)
+        clean_up(fasta_files + temp_files)
         stop_err("No output from tmhmm2")
 out_handle.close()
 
-clean_up(fasta_files)
-clean_up(temp_files)
+clean_up(fasta_files + temp_files)