# HG changeset patch
# User greg
# Date 1491923124 14400
# Node ID f4361d941aa3812075930a314aa31bdcfb30100e
Uploaded
diff -r 000000000000 -r f4361d941aa3 gene_family_phylogeny_builder.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_family_phylogeny_builder.py Tue Apr 11 11:05:24 2017 -0400
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+import argparse
+import subprocess
+
+import utils
+
+OUTPUT_DIR = 'phylogenomicsAnalysis_dir'
+
+parser = argparse.ArgumentParser()
+
+parser.add_argument('--alignments_type', dest='alignments_type', help='Input alignments type produced by the GeneFamilyAligner')
+parser.add_argument('--bootstrap_replicates', dest='bootstrap_replicates', type=int, default=None, help='Number of replicates for rapid bootstrap analysis')
+parser.add_argument('--config_dir', dest='config_dir', help='Directory containing default configuration files')
+parser.add_argument('--max_orthogroup_size', dest='max_orthogroup_size', type=int, help='Maximum number of sequences in orthogroup alignments')
+parser.add_argument('--method', dest='method', help='Protein clustering method')
+parser.add_argument('--min_orthogroup_size', dest='min_orthogroup_size', type=int, help='Minimum number of sequences in orthogroup alignments')
+parser.add_argument('--num_threads', dest='num_threads', type=int, help='Number of threads to use for execution')
+parser.add_argument('--orthogroup_aln', dest='orthogroup_aln', help="Input dataset files_path")
+parser.add_argument('--output', dest='output', help='Output for phylogenetic trees')
+parser.add_argument('--output_dir', dest='output_dir', help='output.files_path')
+parser.add_argument('--rooting_order', dest='rooting_order', default=None, help='Rooting order configuration for rooting trees')
+parser.add_argument('--scaffold', dest='scaffold', help='Orthogroups or gene families proteins scaffold')
+parser.add_argument('--sequence_type', dest='sequence_type', help="Sequence type used in the phylogenetic inference")
+parser.add_argument('--tree_inference', dest='tree_inference', help='Phylogenetic trees inference method')
+
+args = parser.parse_args()
+
+# Build the command line.
+cmd = 'GeneFamilyPhylogenyBuilder'
+cmd += ' --alignment_type %s' % args.tree_inference
+if args.bootstrap_replicates is not None:
+ cmd += ' --bootstrap_replicates %d' % args.bootstrap_replicates
+cmd += ' --config_dir %s' % args.config_dir
+cmd += ' --max_orthogroup_size %d' % args.max_orthogroup_size
+cmd += ' --method %s' % args.method
+cmd += ' --min_orthogroup_size %d' % args.min_orthogroup_size
+cmd += ' --num_threads %d' % args.num_threads
+cmd += ' --orthogroup_aln %s' % args.orthogroup_aln
+if args.rooting_order is not None:
+ cmd += ' --rooting_order %s' % args.rooting_order
+cmd += ' --scaffold %s' % args.scaffold
+cmd += ' --sequence_type %s' % args.sequence_type
+cmd += ' --tree_inference %s' % args.tree_inference
+# Run the command.
+proc = subprocess.Popen(args=cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE, shell=True)
+rc = proc.wait()
+utils.check_execution_errors(rc, proc.stderr)
+utils.move_directory_files(OUTPUT_DIR, args.output_dir)
+utils.write_html_output(args.output, 'Phylogenetic trees', args.output_dir)
diff -r 000000000000 -r f4361d941aa3 gene_family_phylogeny_builder.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/gene_family_phylogeny_builder.xml Tue Apr 11 11:05:24 2017 -0400
@@ -0,0 +1,157 @@
+
+ builds gene family phylogenetic trees
+
+ macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This tool is one of the PlantTribes collection of automated modular analysis pipelines that utilize objective classifications of
+complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. It performs phylogenomic
+analyses by creating multiple sequence alignments and inferred maximum likelihood phylogenies for orthogroups produced by the
+**GeneFamilyAligner** tool.
+
+-----
+
+**Required options**
+
+**Other options**
+
+
+
+
+
+
+
diff -r 000000000000 -r f4361d941aa3 macros.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml Tue Apr 11 11:05:24 2017 -0400
@@ -0,0 +1,163 @@
+
+
+ 0.8
+
+
+ plant_tribes_assembly_post_processor
+
+
+
+
+ plant_tribes_gene_family_aligner
+
+
+
+
+ plant_tribes_gene_family_classifier
+
+
+
+
+ plant_tribes_gene_family_integrator
+
+
+
+
+ plant_tribes_kaks_analysis
+
+
+
+
+ plant_tribes_gene_family_phylogeny_builder
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @misc{None,
+ journal = {None},
+ author = {1. Wafula EK},
+ title = {Manuscript in preparation},
+ year = {None},
+ url = {https://github.com/dePamphilis/PlantTribes},}
+
+
+
+
+ @article{Sasidharan2012,
+ journal = {Nucleic Acids Research},
+ author = {2. Sasidharan R, Nepusz T, Swarbreck D, Huala E, Paccanaro A},
+ title = {GFam: a platform for automatic annotation of gene families},
+ year = {2012},
+ pages = {gks631},}
+
+
+ @article{Li2003,
+ journal = {Genome Research}
+ author = {3. Li L, Stoeckert CJ, Roos DS},
+ title = {OrthoMCL: identification of ortholog groups for eukaryotic genomes},
+ year = {2003},
+ volume = {13},
+ number = {9},
+ pages = {2178-2189},}
+
+
+ @article{Emms2015,
+ journal = {Genome Biology}
+ author = {4. Emms DM, Kelly S},
+ title = {OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy},
+ year = {2015},
+ volume = {16},
+ number = {1},
+ pages = {157},}
+
+
+
diff -r 000000000000 -r f4361d941aa3 plant_tribes_scaffolds.loc.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/plant_tribes_scaffolds.loc.sample Tue Apr 11 11:05:24 2017 -0400
@@ -0,0 +1,4 @@
+## Plant Tribes scaffolds
+#Value Name Path Description
+#22Gv1.0 22Gv1.0 /plant_tribes/scaffolds/22Gv1.0 22 plant genomes (Angiosperms clusters, version 1.0; 22Gv1.0)
+#22Gv1.1 22Gv1.1 /plant_tribes/scaffolds/22Gv1.1 22 plant genomes (Angiosperms clusters, version 1.1; 22Gv1.1)
diff -r 000000000000 -r f4361d941aa3 tool_data_table_conf.xml.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample Tue Apr 11 11:05:24 2017 -0400
@@ -0,0 +1,6 @@
+
+
+ value, name, path, description
+
+
+
diff -r 000000000000 -r f4361d941aa3 utils.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/utils.py Tue Apr 11 11:05:24 2017 -0400
@@ -0,0 +1,42 @@
+import os
+import shutil
+import sys
+
+
+def check_execution_errors(rc, stderr):
+ if rc != 0:
+ stop_err(stderr.read())
+
+
+def move_directory_files(source_dir, destination_dir):
+ source_directory = os.path.abspath(source_dir)
+ destination_directory = os.path.abspath(destination_dir)
+ if not os.path.isdir(destination_directory):
+ os.makedirs(destination_directory)
+ for dir_entry in os.listdir(source_directory):
+ source_entry = os.path.join(source_directory, dir_entry)
+ shutil.move(source_entry, destination_directory)
+
+
+def stop_err(msg):
+ sys.stderr.write(msg)
+ sys.exit(1)
+
+
+def write_html_output(output, title, dir):
+ with open(output, 'w') as fh:
+ fh.write('
%s
\n' % title)
+ fh.write('\n')
+ fh.write('| Size | Name |
\n')
+ for index, fname in enumerate(sorted(os.listdir(dir))):
+ if index % 2 == 0:
+ bgcolor = '#D8D8D8'
+ else:
+ bgcolor = '#FFFFFF'
+ try:
+ size = str(os.path.getsize(os.path.join(dir, fname)))
+ except:
+ size = 'unknown'
+ link = '%s\n' % (fname, fname)
+ fh.write('| %s | %s |
\n' % (bgcolor, size, link))
+ fh.write('
\n')