changeset 131:656614635ebf draft

Uploaded
author greg
date Thu, 06 Apr 2017 14:21:39 -0400
parents fb3feee2638d
children 169504db8e43
files gene_family_classifier.py gene_family_classifier.xml macros.xml utils.py
diffstat 4 files changed, 143 insertions(+), 131 deletions(-) [+]
line wrap: on
line diff
--- a/gene_family_classifier.py	Wed Mar 22 08:45:31 2017 -0400
+++ b/gene_family_classifier.py	Thu Apr 06 14:21:39 2017 -0400
@@ -3,8 +3,9 @@
 import os
 import shutil
 import subprocess
-import sys
-import tempfile
+
+import utils
+
 
 BUFF_SIZE = 1048576
 OUTPUT_DIR = 'geneFamilyClassification_dir'
@@ -34,79 +35,6 @@
 args = parser.parse_args()
 
 
-def get_stderr_exception(tmp_err, tmp_stderr, tmp_out, tmp_stdout, include_stdout=False):
-    tmp_stderr.close()
-    """
-    Return a stderr string of reasonable size.
-    """
-    # Get stderr, allowing for case where it's very large.
-    tmp_stderr = open(tmp_err, 'rb')
-    stderr_str = ''
-    buffsize = BUFF_SIZE
-    try:
-        while True:
-            stderr_str += tmp_stderr.read(buffsize)
-            if not stderr_str or len(stderr_str) % buffsize != 0:
-                break
-    except OverflowError:
-        pass
-    tmp_stderr.close()
-    if include_stdout:
-        tmp_stdout = open(tmp_out, 'rb')
-        stdout_str = ''
-        buffsize = BUFF_SIZE
-        try:
-            while True:
-                stdout_str += tmp_stdout.read(buffsize)
-                if not stdout_str or len(stdout_str) % buffsize != 0:
-                    break
-        except OverflowError:
-            pass
-    tmp_stdout.close()
-    if include_stdout:
-        return 'STDOUT\n%s\n\nSTDERR\n%s\n' % (stdout_str, stderr_str)
-    return stderr_str
-
-
-def move_directory_files(source_dir, destination_dir):
-    source_directory = os.path.abspath(source_dir)
-    destination_directory = os.path.abspath(destination_dir)
-    if not os.path.isdir(destination_directory):
-        os.makedirs(destination_directory)
-    for dir_entry in os.listdir(source_directory):
-        source_entry = os.path.join(source_directory, dir_entry)
-        shutil.move(source_entry, destination_directory)
-
-
-def stop_err(msg):
-    sys.stderr.write(msg)
-    sys.exit(1)
-
-
-def write_html_output(output, title, dir):
-    with open(output, 'w') as fh:
-        fh.write('<html><head><h3>%s</h3></head>\n' % title)
-        fh.write('<body><p/><table cellpadding="2">\n')
-        fh.write('<tr><th>Size</th><th>Name</th></tr>\n')
-        for index, fname in enumerate(sorted(os.listdir(dir))):
-            if index % 2 == 0:
-                bgcolor = '#D8D8D8'
-            else:
-                bgcolor = '#FFFFFF'
-            try:
-                size = str(os.path.getsize(os.path.join(dir, fname)))
-            except:
-                size = 'unknown'
-            link = '<a href="%s" type="text/plain">%s</a>\n' % (fname, fname)
-            fh.write('<tr bgcolor="%s"><td>%s</td><td>%s</td></tr>\n' % (bgcolor, size, link))
-        fh.write('</table></body></html>\n')
-
-
-# Define command response buffers.
-tmp_out = tempfile.NamedTemporaryFile().name
-tmp_stdout = open(tmp_out, 'wb')
-tmp_err = tempfile.NamedTemporaryFile().name
-tmp_stderr = open(tmp_err, 'wb')
 # Build the command line.
 cmd = 'GeneFamilyClassifier'
 cmd += ' --proteins %s' % args.input
@@ -134,12 +62,9 @@
     create_corresponding_coding_sequences = True
     cmd += ' --coding_sequences %s' % args.coding_sequences
 # Run the command.
-proc = subprocess.Popen(args=cmd, stderr=tmp_stderr, stdout=tmp_stdout, shell=True)
+proc = subprocess.Popen(args=cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
 rc = proc.wait()
-# Handle execution errors.
-if rc != 0:
-    error_message = get_stderr_exception(tmp_err, tmp_stderr, tmp_out, tmp_stdout)
-    stop_err( error_message )
+utils.check_execution_errors(rc, proc.stderr)
 # Handle hmmscan.log output.
 if args.classifier in ['hmmscan', 'both']:
     src_hmmscan_log = os.path.join(OUTPUT_DIR, 'hmmscan.log')
@@ -159,12 +84,12 @@
         orthogroups_fasta_dest_dir = args.output_ptortho_dir
         title = 'Orthogroups files'
     orthogroups_fasta_src_dir = os.path.join(OUTPUT_DIR, 'orthogroups_fasta')
-    move_directory_files(orthogroups_fasta_src_dir, orthogroups_fasta_dest_dir)
-    write_html_output(out_file, title, orthogroups_fasta_dest_dir)
+    utils.move_directory_files(orthogroups_fasta_src_dir, orthogroups_fasta_dest_dir)
+    utils.write_html_output(out_file, title, orthogroups_fasta_dest_dir)
 # Handle single copy orthogroup outputs.
 if args.output_ptsco is not None:
     single_copy_fasta_src_dir = os.path.join(OUTPUT_DIR, 'single_copy_fasta')
     single_copy_fasta_dest_dir = args.output_ptsco_dir
     title = 'Single copy orthogroups files'
-    move_directory_files(single_copy_fasta_src_dir, single_copy_fasta_dest_dir)
-    write_html_output(args.output_ptsco, title, single_copy_fasta_dest_dir)
+    utils.move_directory_files(single_copy_fasta_src_dir, single_copy_fasta_dest_dir)
+    utils.write_html_output(args.output_ptsco, title, single_copy_fasta_dest_dir)
--- a/gene_family_classifier.xml	Wed Mar 22 08:45:31 2017 -0400
+++ b/gene_family_classifier.xml	Thu Apr 06 14:21:39 2017 -0400
@@ -1,16 +1,10 @@
 <tool id="plant_tribes_gene_family_classifier" name="GeneFamilyClassifier" version="0.4.0">
     <description>classifies gene sequences into pre-computed orthologous plant gene family clusters</description>
-    <requirements>
-        <requirement type="package" version="0.4">plant_tribes_gene_family_classifier</requirement>
-    </requirements>
-    <stdio>
-        <!-- Anything other than zero is an error -->
-        <exit_code range="1:" />
-        <exit_code range=":-1" />
-        <!-- In case the return code has not been set propery check stderr too -->
-        <regex match="Error:" />
-        <regex match="Exception:" />
-    </stdio>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements_gene_family_classifier" />
+    <expand macro="stdio" />
     <command>
         <![CDATA[
             #if str($options_type.options_type_selector) == 'advanced':
@@ -295,42 +289,8 @@
 
     </help>
     <citations>
-        <citation type="bibtex">
-            @misc{None,
-            journal = {None},
-            author = {1. Wafula EK},
-            title = {Manuscript in preparation},
-            year = {None},
-            url = {https://github.com/dePamphilis/PlantTribes},}
-        </citation>
-        <citation type="bibtex">
-            @article{Sasidharan2012,
-            journal = {Nucleic Acids Research},
-            author = {2. Sasidharan R, Nepusz T, Swarbreck D, Huala E, Paccanaro A},
-            title = {GFam: a platform for automatic annotation of gene families},
-            year = {2012},
-            pages = {gks631},}
-        </citation>
-        <citation type="bibtex">
-            @article{Li2003,
-            journal = {Genome Research}
-            author = {3. Li L, Stoeckert CJ, Roos DS},
-            title = {OrthoMCL: identification of ortholog groups for eukaryotic genomes},
-            year = {2003},
-            volume = {13},
-            number = {9},
-            pages = {2178-2189},}
-        </citation>
-        <citation type="bibtex">
-            @article{Emms2015,
-            journal = {Genome Biology}
-            author = {4. Emms DM, Kelly S},
-            title = {OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy},
-            year = {2015},
-            volume = {16},
-            number = {1},
-            pages = {157},}
-        </citation>
+        <expand macro="citation1" />
+        <expand macro="citations2to4" />
         <citation type="bibtex">
             @article{Altschul1990,
             journal = {Journal of molecular biology}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Apr 06 14:21:39 2017 -0400
@@ -0,0 +1,85 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<macros>
+    <xml name="requirements_assembly_post_processor">
+        <requirements>
+            <requirement type="package" version="0.4">plant_tribes_assembly_post_processor</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_classifier">
+        <requirements>
+            <requirement type="package" version="0.8">plant_tribes_gene_family_classifier</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_integrator">
+        <requirements>
+            <requirement type="package" version="0.8">plant_tribes_gene_family_integrator</requirement>
+        </requirements>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:"/>
+            <exit_code range=":-1"/>
+            <regex match="Error:"/>
+            <regex match="Exception:"/>
+        </stdio>
+    </xml>
+    <xml name="param_scaffold">
+        <param name="scaffold" type="select" label="Orthogroups or gene families proteins scaffold">
+            <options from_data_table="plant_tribes_scaffolds" />
+            <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table." />
+        </param>
+    </xml>
+    <xml name="param_method">
+        <param name="method" type="select" label="Protein clustering method">
+            <option value="gfam" selected="true">GFam</option>
+            <option value="orthofinder">OrthoFinder</option>
+            <option value="orthomcl">OrthoMCL</option>
+        </param>
+    </xml>
+    <xml name="param_orthogroup_fna">
+        <param name="orthogroup_fna" type="select" display="radio" label="Process corresponding gene family classification orthogroups CDS fasta files?">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no">No</option>
+        </param>
+    </xml>
+    <xml name="citation1">
+        <citation type="bibtex">
+            @misc{None,
+            journal = {None},
+            author = {1. Wafula EK},
+            title = {Manuscript in preparation},
+            year = {None},
+            url = {https://github.com/dePamphilis/PlantTribes},}
+        </citation>
+    </xml>
+    <xml name="citations2to4">
+        <citation type="bibtex">
+            @article{Sasidharan2012,
+            journal = {Nucleic Acids Research},
+            author = {2. Sasidharan R, Nepusz T, Swarbreck D, Huala E, Paccanaro A},
+            title = {GFam: a platform for automatic annotation of gene families},
+            year = {2012},
+            pages = {gks631},}
+        </citation>
+        <citation type="bibtex">
+            @article{Li2003,
+            journal = {Genome Research}
+            author = {3. Li L, Stoeckert CJ, Roos DS},
+            title = {OrthoMCL: identification of ortholog groups for eukaryotic genomes},
+            year = {2003},
+            volume = {13},
+            number = {9},
+            pages = {2178-2189},}
+        </citation>
+        <citation type="bibtex">
+            @article{Emms2015,
+            journal = {Genome Biology}
+            author = {4. Emms DM, Kelly S},
+            title = {OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy},
+            year = {2015},
+            volume = {16},
+            number = {1},
+            pages = {157},}
+        </citation>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.py	Thu Apr 06 14:21:39 2017 -0400
@@ -0,0 +1,42 @@
+import os
+import shutil
+import sys
+
+
+def check_execution_errors(rc, stderr):
+    if rc != 0:
+        stop_err(stderr.read())
+
+
+def move_directory_files(source_dir, destination_dir):
+    source_directory = os.path.abspath(source_dir)
+    destination_directory = os.path.abspath(destination_dir)
+    if not os.path.isdir(destination_directory):
+        os.makedirs(destination_directory)
+    for dir_entry in os.listdir(source_directory):
+        source_entry = os.path.join(source_directory, dir_entry)
+        shutil.move(source_entry, destination_directory)
+
+
+def stop_err(msg):
+    sys.stderr.write(msg)
+    sys.exit(1)
+
+
+def write_html_output(output, title, dir):
+    with open(output, 'w') as fh:
+        fh.write('<html><head><h3>%s</h3></head>\n' % title)
+        fh.write('<body><p/><table cellpadding="2">\n')
+        fh.write('<tr><th>Size</th><th>Name</th></tr>\n')
+        for index, fname in enumerate(sorted(os.listdir(dir))):
+            if index % 2 == 0:
+                bgcolor = '#D8D8D8'
+            else:
+                bgcolor = '#FFFFFF'
+            try:
+                size = str(os.path.getsize(os.path.join(dir, fname)))
+            except:
+                size = 'unknown'
+            link = '<a href="%s" type="text/plain">%s</a>\n' % (fname, fname)
+            fh.write('<tr bgcolor="%s"><td>%s</td><td>%s</td></tr>\n' % (bgcolor, size, link))
+        fh.write('</table></body></html>\n')