changeset 28:a63b610ec5bd draft

Uploaded
author greg
date Fri, 27 Jan 2017 15:03:15 -0500
parents f041f7778540
children 59d1ec149c64
files gene_family_classifier.xml
diffstat 1 files changed, 19 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/gene_family_classifier.xml	Tue Jan 24 10:32:56 2017 -0500
+++ b/gene_family_classifier.xml	Fri Jan 27 15:03:15 2017 -0500
@@ -1,7 +1,7 @@
-<tool id="plant_tribes_gene_family_classifier" name="GeneFamilyClassifier" version="1.0.0">
-    <description>pipeline</description>
+<tool id="plant_tribes_gene_family_classifier" name="Classify gene sequences" version="0.3">
+    <description>into precomputed orthologous gene family clusters</description>
     <requirements>
-        <requirement type="package" version="0.2">plant_tribes_gene_family_classifier</requirement>
+        <requirement type="package" version="0.3">plant_tribes_gene_family_classifier</requirement>
     </requirements>
     <stdio>
         <!-- Anything other than zero is an error -->
@@ -71,9 +71,9 @@
 
             #if $create_ortho_sequences:
                 #if $create_corresponding_coding_sequences:
-                    && echo "Sequences classified into pre-computed orthologous plant gene family clusters with corresponding coding sequences: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output
+                    && echo "Sequences classified into precomputed orthologous plant gene family clusters with corresponding coding sequences: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output
                 #else:
-                    && echo "Sequences classified into pre-computed orthologous plant gene family clusters: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output
+                    && echo "Sequences classified into precomputed orthologous plant gene family clusters: `ls $orthogroups_fasta_src_dir | grep f | wc -l` files" > $output
                 #end if
                 && ls -al $orthogroups_fasta_src_dir | grep f >> $output
                 && mv $orthogroups_fasta_src_dir/* $dest_dir || true
@@ -119,7 +119,7 @@
             </param>
             <when value="basic" />
             <when value="advanced">
-                <param name="super_orthogroups" type="select" label="SuperOrthogroups MCL clustering" help="blastp e-value matrix between all pairs of orthogroups">
+                <param name="super_orthogroups" type="select" label="Super Orthogroups" help="Secondary MCL clusters of orthogroups">
                     <option value="min_evalue" selected="true">Minimum e-value</option>
                     <option value="avg_evalue">Average e-value</option>
                 </param>
@@ -182,18 +182,24 @@
     </tests>
     <help>
 This tool is one of the PlantTribes' collection of automated modular analysis pipelines that utilize objective classifications of
-complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies. It performs gene family
-classification of the post processed de novo transcripts using either blastp (faster), HMMScan (slower but more sensitive to remote
-homologs) or both (more exhaustive).
+complete protein sequences from sequenced plant genomes to perform comparative evolutionary studies.  This tool classifies gene
+sequences into precomputed orthologous gene family clusters using either blastp (faster), HMMScan (slower but more sensitive
+to remote homologs) or both (more exhaustive).
+
+This tool accepts any of the following as input.
+
+* the postprocessed assemblies produced by the **Postprocess de novo assembly transcripts into putative coding sequences** tool
+* externally predicted coding sequences and their corresponding amino acid translations derived from a transcriptome assembly
+* gene predictions from a sequenced genome
 
 -----
 
 **Options**
 
  * **Orthogroups or gene families proteins scaffold** - PlantTribes scaffolds data.
- * **Protein clustering method** - One of GFam, OrthoFinder or OrthoMCL.
- * **Protein classification method** - One of blastp, HMMScan or both.
- * **SuperOrthogroups MCL clustering** - blastp e-value matrix between all pairs of orthogroups.
+ * **Protein clustering method** - One of GFam (domain architecture based clustering), OrthoFinder (broadly defined clusters) or OrthoMCL (narrowly defined clusters).
+ * **Protein classification method** - blastp (faster), HMMScan (slower but more sensative to the remote homologs) or both (more exhaustive).
+ * **Super Orthogroups** - Secondary MCL clusters of orthogroups.
  * **Minumum single copy taxa required in orthogroup** - Used with "Minumum single copy taxa required in orthogroup" configuration only.
  * **Minumum taxa required in single copy orthogroup** - Used with "Minumum single copy taxa required in orthogroup" configuration only.
  * **Corresponding coding sequences (CDS) fasta file** - Used only when selecting "Create orthogroup fasta files?".
@@ -202,7 +208,7 @@
     <citations>
             <citation type="bibtex">
                 @unpublished{None,
-                author = {None},
+                author = {Eric Wafula},
                 title = {None},
                 year = {None},
                 eprint = {None},