# HG changeset patch
# User greg
# Date 1491500042 14400
# Node ID 109a0eb7791f31e9a121bfd7880040a931138f37
Uploaded
diff -r 000000000000 -r 109a0eb7791f gene_family_integrator.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_family_integrator.py Thu Apr 06 13:34:02 2017 -0400
@@ -0,0 +1,31 @@
+#!/usr/bin/env python
+import argparse
+import subprocess
+
+import utils.py
+
+OUTPUT_DIR = 'integratedGeneFamilies_dir'
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--orthogroup_faa', dest='orthogroup_faa', help="Directory of input fasta datasets")
+parser.add_argument('--scaffold', dest='scaffold', default='mode', help='Orthogroups or gene families proteins scaffold')
+parser.add_argument('--method', dest='method', help='Protein clustering method')
+parser.add_argument('--orthogroup_fna', dest='orthogroup_fna', default=None, help='Use correspong coding sequences')
+parser.add_argument('--output', dest='output', help="Output dataset")
+parser.add_argument('--output_dir', dest='output_dir', help="Output dataset file_path directory")
+
+args = parser.parse_args()
+
+# Build the command line.
+cmd = 'GeneFamilyIntegrator'
+cmd += ' --orthogroup_faa %s' % args.orthogroup_fasta
+cmd += ' --scaffold %s' % args.scaffold
+cmd += ' --method %s' % args.method
+if args.orthogroup_fna is not None:
+ cmd += ' --orthogroup_fna'
+# Run the command.
+proc = subprocess.Popen(args=cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
+rc = proc.wait()
+utils.check_execution_errors(rc, proc.stderr)
+utils.move_directory_files(OUTPUT_DIR, args.output_dir)
+utils.write_html_output(args.output, 'Integrated gene family sequences', args.output_dir)
diff -r 000000000000 -r 109a0eb7791f gene_family_integrator.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_family_integrator.xml Thu Apr 06 13:34:02 2017 -0400
@@ -0,0 +1,78 @@
+
+ integrates de novo assembly sequences with scaffold gene family sequences
+
+ macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary
+analyses of genome-scale gene families and transcriptomes. This tool integrates classified post processed de novo transcriptome
+assembly sequence(s) with the scaffold gene family sequences.
+
+-----
+
+**Required options**
+
+ * **Gene family clusters** - sequences classified into gene family clusters, optionally including corresponding coding sequences.
+ * **Gene family scaffold** - one of the PlantTribes gene family scaffolds [2-4] installed into Galaxy by the PlantTribes Scaffold Data Manager tool.
+ * **Protein clustering method** - gene family scaffold protein clustering method as described in the AssemblyPostProcessor tool.
+
+**Other options**
+
+ * Process corresponding gene family classification orthogroups CDS fasta files? - Select 'Yes' top process corresponding gene family classification orthogroups CDS fasta files.
+
+
+
+
+
+
+
diff -r 000000000000 -r 109a0eb7791f macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Thu Apr 06 13:34:02 2017 -0400
@@ -0,0 +1,85 @@
+
+
+
+
+ plant_tribes_assembly_post_processor
+
+
+
+
+ plant_tribes_gene_family_classifier
+
+
+
+
+ plant_tribes_gene_family_integrator
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @misc{None,
+ journal = {None},
+ author = {1. Wafula EK},
+ title = {Manuscript in preparation},
+ year = {None},
+ url = {https://github.com/dePamphilis/PlantTribes},}
+
+
+
+
+ @article{Sasidharan2012,
+ journal = {Nucleic Acids Research},
+ author = {2. Sasidharan R, Nepusz T, Swarbreck D, Huala E, Paccanaro A},
+ title = {GFam: a platform for automatic annotation of gene families},
+ year = {2012},
+ pages = {gks631},}
+
+
+ @article{Li2003,
+ journal = {Genome Research}
+ author = {3. Li L, Stoeckert CJ, Roos DS},
+ title = {OrthoMCL: identification of ortholog groups for eukaryotic genomes},
+ year = {2003},
+ volume = {13},
+ number = {9},
+ pages = {2178-2189},}
+
+
+ @article{Emms2015,
+ journal = {Genome Biology}
+ author = {4. Emms DM, Kelly S},
+ title = {OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy},
+ year = {2015},
+ volume = {16},
+ number = {1},
+ pages = {157},}
+
+
+
diff -r 000000000000 -r 109a0eb7791f plant_tribes_scaffolds.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/plant_tribes_scaffolds.loc.sample Thu Apr 06 13:34:02 2017 -0400
@@ -0,0 +1,4 @@
+## Plant Tribes scaffolds
+#Value Name Path Description
+#22Gv1.0 22Gv1.0 /plant_tribes/scaffolds/22Gv1.0 22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0)
+#22Gv1.1 22Gv1.1 /plant_tribes/scaffolds/22Gv1.1 22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1)
diff -r 000000000000 -r 109a0eb7791f tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Thu Apr 06 13:34:02 2017 -0400
@@ -0,0 +1,6 @@
+
+
+ value, name, path, description
+
+
+
diff -r 000000000000 -r 109a0eb7791f utils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.py Thu Apr 06 13:34:02 2017 -0400
@@ -0,0 +1,42 @@
+import os
+import shutil
+import sys
+
+
+def check_execution_errors(rc, stderr):
+ if rc != 0:
+ stop_err(stderr.read())
+
+
+def move_directory_files(source_dir, destination_dir):
+ source_directory = os.path.abspath(source_dir)
+ destination_directory = os.path.abspath(destination_dir)
+ if not os.path.isdir(destination_directory):
+ os.makedirs(destination_directory)
+ for dir_entry in os.listdir(source_directory):
+ source_entry = os.path.join(source_directory, dir_entry)
+ shutil.move(source_entry, destination_directory)
+
+
+def stop_err(msg):
+ sys.stderr.write(msg)
+ sys.exit(1)
+
+
+def write_html_output(output, title, dir):
+ with open(output, 'w') as fh:
+ fh.write('
%s
\n' % title)
+ fh.write('\n')
+ fh.write('Size | Name |
\n')
+ for index, fname in enumerate(sorted(os.listdir(dir))):
+ if index % 2 == 0:
+ bgcolor = '#D8D8D8'
+ else:
+ bgcolor = '#FFFFFF'
+ try:
+ size = str(os.path.getsize(os.path.join(dir, fname)))
+ except:
+ size = 'unknown'
+ link = '%s\n' % (fname, fname)
+ fh.write('%s | %s |
\n' % (bgcolor, size, link))
+ fh.write('
\n')