changeset 0:109a0eb7791f draft

Uploaded
author greg
date Thu, 06 Apr 2017 13:34:02 -0400
parents
children f53cafa72117
files gene_family_integrator.py gene_family_integrator.xml macros.xml plant_tribes_scaffolds.loc.sample tool_data_table_conf.xml.sample utils.py
diffstat 6 files changed, 246 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_family_integrator.py	Thu Apr 06 13:34:02 2017 -0400
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+import argparse
+import subprocess
+
+import utils.py
+
+OUTPUT_DIR = 'integratedGeneFamilies_dir'
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--orthogroup_faa', dest='orthogroup_faa', help="Directory of input fasta datasets")
+parser.add_argument('--scaffold', dest='scaffold', default='mode', help='Orthogroups or gene families proteins scaffold')
+parser.add_argument('--method', dest='method', help='Protein clustering method')
+parser.add_argument('--orthogroup_fna', dest='orthogroup_fna', default=None, help='Use correspong coding sequences')
+parser.add_argument('--output', dest='output', help="Output dataset")
+parser.add_argument('--output_dir', dest='output_dir', help="Output dataset file_path directory")
+
+args = parser.parse_args()
+
+# Build the command line.
+cmd = 'GeneFamilyIntegrator'
+cmd += ' --orthogroup_faa %s' % args.orthogroup_fasta
+cmd += ' --scaffold %s' % args.scaffold
+cmd += ' --method %s' % args.method
+if args.orthogroup_fna is not None:
+    cmd += ' --orthogroup_fna'
+# Run the command.
+proc = subprocess.Popen(args=cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
+rc = proc.wait()
+utils.check_execution_errors(rc, proc.stderr)
+utils.move_directory_files(OUTPUT_DIR, args.output_dir)
+utils.write_html_output(args.output, 'Integrated gene family sequences', args.output_dir)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_family_integrator.xml	Thu Apr 06 13:34:02 2017 -0400
@@ -0,0 +1,78 @@
+<tool id="plant_tribes_gene_family_integrator" name="GeneFamilyIntegrator" version="0.8.0">
+    <description>integrates de novo assembly sequences with scaffold gene family sequences</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements_gene_family_integrator" />
+    <expand macro="stdio" />
+    <command>
+        <![CDATA[
+            #set input_format = $input_format_cond.input_format
+            #set scaffold = $input_format_cond.scaffold
+            #set method = $input_format_cond.method
+
+            python $__tool_directory__/gene_family_integrator.py
+            --scaffold '$scaffold.fields.path'
+            --method $method
+            #if str($input_format) == 'ptortho':
+                --orthogroup_faa '$input_format_cond.input_ptortho.extra_files_path'
+            #else:
+                ## str($input_format) == 'ptorthocs'
+                --orthogroup_faa '$input_format_cond.input_ptorthocs.extra_files_path'
+               #if str($orthogroup_fna) == 'yes':
+                    --orthogroup_fna 'true'
+                #end if
+            #end if
+            --output '$output_html'
+            --output_dir '$output_html.files_path'
+        ]]>
+    </command>
+    <inputs>
+        <conditional name="input_format_cond">
+            <param name="input_format" type="select" label="Select type of data to sub sample">
+                <option value="ptortho">Gene family clusters</option>
+                <option value="ptorthocs">Gene family clusters with corresponding coding sequences</option>
+            </param>
+            <when value="ptortho">
+                <param name="input_ptortho" format="ptortho" type="data" label="Gene family clusters" />
+                <expand macro="param_scaffold" />
+                <expand macro="param_method" />
+            </when>
+            <when value="ptorthocs">
+                <param name="input_ptorthocs" format="ptorthocs" type="data" label="Gene family clusters with corresponding coding sequences" />
+                <expand macro="param_scaffold" />
+                <expand macro="param_method" />
+                <expand macro="param_orthogroup_fna" />
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="output_html" format="html" />
+    </outputs>
+    <tests>
+        <test>
+        </test>
+    </tests>
+    <help>
+This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary
+analyses of genome-scale gene families and transcriptomes. This tool integrates classified post processed de novo transcriptome
+assembly sequence(s) with the scaffold gene family sequences.
+
+-----
+
+**Required options**
+
+ * **Gene family clusters** - sequences classified into gene family clusters, optionally including corresponding coding sequences.
+ * **Gene family scaffold** - one of the PlantTribes gene family scaffolds [2-4] installed into Galaxy by the PlantTribes Scaffold Data Manager tool.
+ * **Protein clustering method** - gene family scaffold protein clustering method as described in the AssemblyPostProcessor tool.
+
+**Other options**
+
+ * Process corresponding gene family classification orthogroups CDS fasta files? - Select 'Yes' top process corresponding gene family classification orthogroups CDS fasta files.
+
+    </help>
+    <citations>
+        <expand macro="citation1" />
+        <expand macro="citations2to4" />
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Thu Apr 06 13:34:02 2017 -0400
@@ -0,0 +1,85 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<macros>
+    <xml name="requirements_assembly_post_processor">
+        <requirements>
+            <requirement type="package" version="0.4">plant_tribes_assembly_post_processor</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_classifier">
+        <requirements>
+            <requirement type="package" version="0.4">plant_tribes_gene_family_classifier</requirement>
+        </requirements>
+    </xml>
+    <xml name="requirements_gene_family_integrator">
+        <requirements>
+            <requirement type="package" version="0.8">plant_tribes_gene_family_integrator</requirement>
+        </requirements>
+    </xml>
+    <xml name="stdio">
+        <stdio>
+            <exit_code range="1:"/>
+            <exit_code range=":-1"/>
+            <regex match="Error:"/>
+            <regex match="Exception:"/>
+        </stdio>
+    </xml>
+    <xml name="param_scaffold">
+        <param name="scaffold" type="select" label="Orthogroups or gene families proteins scaffold">
+            <options from_data_table="plant_tribes_scaffolds" />
+            <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table." />
+        </param>
+    </xml>
+    <xml name="param_method">
+        <param name="method" type="select" label="Protein clustering method">
+            <option value="gfam" selected="true">GFam</option>
+            <option value="orthofinder">OrthoFinder</option>
+            <option value="orthomcl">OrthoMCL</option>
+        </param>
+    </xml>
+    <xml name="param_orthogroup_fna">
+        <param name="orthogroup_fna" type="select" display="radio" label="Process corresponding gene family classification orthogroups CDS fasta files?">
+            <option value="yes" selected="true">Yes</option>
+            <option value="no">No</option>
+        </param>
+    </xml>
+    <xml name="citation1">
+        <citation type="bibtex">
+            @misc{None,
+            journal = {None},
+            author = {1. Wafula EK},
+            title = {Manuscript in preparation},
+            year = {None},
+            url = {https://github.com/dePamphilis/PlantTribes},}
+        </citation>
+    </xml>
+    <xml name="citations2to4">
+        <citation type="bibtex">
+            @article{Sasidharan2012,
+            journal = {Nucleic Acids Research},
+            author = {2. Sasidharan R, Nepusz T, Swarbreck D, Huala E, Paccanaro A},
+            title = {GFam: a platform for automatic annotation of gene families},
+            year = {2012},
+            pages = {gks631},}
+        </citation>
+        <citation type="bibtex">
+            @article{Li2003,
+            journal = {Genome Research}
+            author = {3. Li L, Stoeckert CJ, Roos DS},
+            title = {OrthoMCL: identification of ortholog groups for eukaryotic genomes},
+            year = {2003},
+            volume = {13},
+            number = {9},
+            pages = {2178-2189},}
+        </citation>
+        <citation type="bibtex">
+            @article{Emms2015,
+            journal = {Genome Biology}
+            author = {4. Emms DM, Kelly S},
+            title = {OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy},
+            year = {2015},
+            volume = {16},
+            number = {1},
+            pages = {157},}
+        </citation>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/plant_tribes_scaffolds.loc.sample	Thu Apr 06 13:34:02 2017 -0400
@@ -0,0 +1,4 @@
+## Plant Tribes scaffolds
+#Value	Name	Path	Description
+#22Gv1.0	22Gv1.0	/plant_tribes/scaffolds/22Gv1.0	22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0)
+#22Gv1.1	22Gv1.1	/plant_tribes/scaffolds/22Gv1.1	22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Thu Apr 06 13:34:02 2017 -0400
@@ -0,0 +1,6 @@
+<tables>
+    <table name="plant_tribes_scaffolds" comment_char="#">
+        <columns>value, name, path, description</columns>
+        <file path="tool-data/plant_tribes_scaffolds.loc" />
+    </table>
+</tables>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.py	Thu Apr 06 13:34:02 2017 -0400
@@ -0,0 +1,42 @@
+import os
+import shutil
+import sys
+
+
+def check_execution_errors(rc, stderr):
+    if rc != 0:
+        stop_err(stderr.read())
+
+
+def move_directory_files(source_dir, destination_dir):
+    source_directory = os.path.abspath(source_dir)
+    destination_directory = os.path.abspath(destination_dir)
+    if not os.path.isdir(destination_directory):
+        os.makedirs(destination_directory)
+    for dir_entry in os.listdir(source_directory):
+        source_entry = os.path.join(source_directory, dir_entry)
+        shutil.move(source_entry, destination_directory)
+
+
+def stop_err(msg):
+    sys.stderr.write(msg)
+    sys.exit(1)
+
+
+def write_html_output(output, title, dir):
+    with open(output, 'w') as fh:
+        fh.write('<html><head><h3>%s</h3></head>\n' % title)
+        fh.write('<body><p/><table cellpadding="2">\n')
+        fh.write('<tr><th>Size</th><th>Name</th></tr>\n')
+        for index, fname in enumerate(sorted(os.listdir(dir))):
+            if index % 2 == 0:
+                bgcolor = '#D8D8D8'
+            else:
+                bgcolor = '#FFFFFF'
+            try:
+                size = str(os.path.getsize(os.path.join(dir, fname)))
+            except:
+                size = 'unknown'
+            link = '<a href="%s" type="text/plain">%s</a>\n' % (fname, fname)
+            fh.write('<tr bgcolor="%s"><td>%s</td><td>%s</td></tr>\n' % (bgcolor, size, link))
+        fh.write('</table></body></html>\n')