changeset 16:4a0837f2b995 draft

Uploaded
author greg
date Fri, 28 Apr 2017 09:20:56 -0400
parents 5a5f80ea6306
children f3b6ddb952eb
files gene_family_aligner.xml macros.xml
diffstat 2 files changed, 79 insertions(+), 63 deletions(-) [+]
line wrap: on
line diff
--- a/gene_family_aligner.xml	Tue Apr 11 14:22:26 2017 -0400
+++ b/gene_family_aligner.xml	Fri Apr 28 09:20:56 2017 -0400
@@ -1,5 +1,5 @@
 <tool id="plant_tribes_gene_family_aligner" name="GeneFamilyAligner" version="@WRAPPER_VERSION@.0">
-    <description>aligns gene family sequences</description>
+    <description>aligns integrated orthologous gene family clusters</description>
     <macros>
         <import>macros.xml</import>
     </macros>
@@ -82,18 +82,18 @@
     </command>
     <inputs>
         <conditional name="input_format_cond">
-            <param name="input_format" type="select" label="Select type of data to sub sample">
-                <option value="ptortho">Gene family clusters</option>
-                <option value="ptorthocs">Gene family clusters with corresponding coding sequences</option>
+            <param name="input_format" type="select" label="Classified orthogroup fasta files">
+                <option value="ptortho">Proteins orthogroup fasta files</option>
+                <option value="ptorthocs">Protein and coding sequences orthogroup fasta files</option>
             </param>
             <when value="ptortho">
-                <param name="input_ptortho" format="ptortho" type="data" label="Gene family clusters">
+                <param name="input_ptortho" format="ptortho" type="data" label="Proteins orthogroup fasta files">
                     <!-- <validator type="empty_files_path" /> -->
                 </param>
                 <expand macro="cond_alignment_method" />
             </when>
             <when value="ptorthocs">
-                <param name="input_ptorthocs" format="ptorthocs" type="data" label="Gene family clusters with corresponding coding sequences">
+                <param name="input_ptorthocs" format="ptorthocs" type="data" label="Protein and coding sequences orthogroup fasta files">
                     <!-- <validator type="empty_files_path" /> -->
                 </param>
                 <expand macro="cond_alignment_method" />
@@ -130,40 +130,86 @@
     </tests>
     <help>
 This tool is one of the PlantTribes collection of automated modular analysis pipelines for comparative and evolutionary
-analyses of genome-scale gene families and transcriptomes. This tool aligns gene family sequences.
+analyses of genome-scale gene families and transcriptomes. This tool estimates protein and codon multiple sequence alignments
+of integrated orthologous gene family fasta files produced by the GeneFamilyIntegrator tool.
 
 -----
 
 **Required options**
 
- * **Select type of data to sub sample**
+ * **Classified orthogroup fasta files** - orthogroup fasta files produced by the GeneFamilyClassifier tool selected from your history.  Depending on how the GeneFamilyClassifier tool was exectured, these could either be proteins or proteins and their corresponding coding sequences.
 
-  - **Gene family clusters** - sequences classified into gene family clusters.
-  - **Gene family clusters with corresponding coding sequences** - sequences classified into gene family clusters including corresponding coding sequences.
+  - **Proteins orthogroup fasta files** - proteins fasta files.
+  - **Protein and coding sequences orthogroup fasta files** - proteins and their corresponding coding sequences fasta files.
 
     - **Construct orthogroup multiple codon alignments** - construct orthogroup multiple codon alignments.
 
- * **Select method for multiple sequence alignments**
+ * **Multiple sequence alignment method** - method for estimating orthogroup multiple sequence alignments.  PlantTribes estimates alignments using either MAFFT's L-INS-i algorithm or the divide and conquer approach implemented in the PASTA pipeline for large alignments.
 
-  - **MAFFT algorithm** - mafft algorithm.
-  - **Pasta algorithm** - pasta algorithm.
+  - **MAFFT** - MAFFT algorithm.
+  - **PASTA** - PASTA algorithm.
 
-    - **Maximum number of iterations that the PASTA algorithm will execute** - maximum number of iterations that the PASTA algorithm will execute.
+    - **PASTA iteration limit** - number of PASTA iterations. By default, PASTA performs 3 iterations.
 
 **Other options**
 
- * **Remove gappy sequences in alignments**
+ * **Alignment post-processing configuration** - select 'Yes' to enable multiple sequence alignment post-processing configuration options.
 
-  - **Select process used for gap trimming** - either nucleotide based or using trimAl's ML heuristic trimming approach
+  - **Trimming method** - multiple sequence alignment trimming method.  PlantTribes trims alignments using two automated approaches implemented in trimAl.  Gap score based trimming removes alignments sites that do not achieve a user specified gap score.  For example, a setting of 0.1 removes sites that have gaps in 90% or more of the sequences in the multiple sequence alignment.  The automated heuristic trimming approach determines the best automated trimAl method to trim a given alignment as described in the trimAl tutorial `trimAl`_.
+
+.. _trimAl: http://trimal.cgenomics.org
 
     - **Nucleotide based**
 
-      - **Remove sites in alignments with gaps of**
-      - **Maximum number of iterations** - maximum number of iterations for iterative orthogroups realignment, trimming and fitering
+      - **Gap score** - 1.0 - (the fraction of sequences with gap allowed in an alignment site).  The score is restricted to the range 0.0 - 1.0.  Zero value has no effect.
+
+  - **Remove sequences** - select 'Yes' to remove sequences in multiple sequence alignments that do not achieve a user specified alignment coverage score.  For example, a setting of 0.7 removes sequences with more than 30% gaps in the alignment.  This option requires one of the trimming methods to be set.
+
+      - **Coverage score** - minimum fraction of sites without gaps for a sequence in a multiple sequence alignment.  The score is restricted to the range 0.0 - 1.0.  Zero value has no effect.
+
+      - **Realignment iteration limit** - number of iterations to perform trimming, removal of sequences, and realignment of orthogroup sequences.  Zero value has no effect.
 
     </help>
     <citations>
         <expand macro="citation1" />
-        <expand macro="citations2to4" />
+        <citation type="bibtex">
+            @article{Wall2008,
+            journal = {Nucleic Acids Research},
+            author = {2. Wall PK, Leebens-Mack J, Muller KF, Field D, Altman NS},
+            title = {PlantTribes: a gene and gene family resource for comparative genomics in plants},
+            year = {2008},
+            volume = {36},
+            number = {suppl 1},
+            pages = {D970-D976},}
+        </citation>
+        <citation type="bibtex">
+            @article{Katoh2013,
+            journal = {Molecular biology and evolution},
+            author = {3. Katoh K, Standley DM},
+            title = {MAFFT multiple sequence alignment software version 7: improvements in performance and usability},
+            year = {2013},
+            volume = {30},
+            number = {4},
+            pages = {772-780},}
+        </citation>
+        <citation type="bibtex">
+            @article{Mirarab2014,
+            journal = {Research in Computational Molecular Biology (RECOMB)},
+            author = {4. Mirarab S, Nguyen N, Warnow T},
+            title = {PASTA: Ultra-Large Multiple Sequence Alignment. In R. Sharan (Ed.)},
+            year = {2014},
+            pages = {177–191},
+            url = {https://github.com/smirarab/pasta},}
+        </citation>
+        <citation type="bibtex">
+            @article{Capella-Gutierrez2009,
+            journal = {Bioinformatics,},
+            author = {5. Capella-Gutierrez S, Silla-Martínez JM, Gabaldón T},
+            title = {trimAl: a tool for automated alignment trimming in large-scale phylogenetic analyses},
+            year = {2009},
+            volume = {25},
+            number = {15},
+            pages = {1972-1973},}
+        </citation>
     </citations>
 </tool>
--- a/macros.xml	Tue Apr 11 14:22:26 2017 -0400
+++ b/macros.xml	Fri Apr 28 09:20:56 2017 -0400
@@ -3,7 +3,7 @@
     <token name="@WRAPPER_VERSION@">0.8</token>
     <xml name="requirements_assembly_post_processor">
         <requirements>
-            <requirement type="package" version="0.4">plant_tribes_assembly_post_processor</requirement>
+            <requirement type="package" version="0.8">plant_tribes_assembly_post_processor</requirement>
         </requirements>
     </xml>
     <xml name="requirements_gene_family_aligner">
@@ -59,13 +59,13 @@
         </param>
     </xml>
     <xml name="param_orthogroup_fna">
-        <param name="orthogroup_fna" type="select" label="Process corresponding gene family classification orthogroups CDS fasta files?">
+        <param name="orthogroup_fna" type="select" label="Orthogroups coding sequences?">
             <option value="yes" selected="true">Yes</option>
             <option value="no">No</option>
         </param>
     </xml>
     <xml name="param_scaffold">
-        <param name="scaffold" type="select" label="Orthogroups or gene families proteins scaffold">
+        <param name="scaffold" type="select" label="Gene family scaffold">
             <options from_data_table="plant_tribes_scaffolds" />
             <validator type="no_options" message="No PlantTribes scaffolds are available.  Use the PlantTribes Scaffolds Download Data Manager tool in Galaxy to install and populate the PlantTribes scaffolds data table." />
         </param>
@@ -78,9 +78,9 @@
     </xml>
     <xml name="cond_alignment_method">
         <conditional name="alignment_method_cond">
-            <param name="alignment_method" type="select" force_select="true" label="Select method for multiple sequence alignments">
-                <option value="mafft" selected="true">MAFFT algorithm</option>
-                <option value="pasta">PASTA algorithm</option>
+            <param name="alignment_method" type="select" force_select="true" label="Multiple sequence alignment method">
+                <option value="mafft" selected="true">MAFFT</option>
+                <option value="pasta">PASTA</option>
             </param>
             <when value="mafft" />
             <when value="pasta">
@@ -90,31 +90,31 @@
     </xml>
     <xml name="cond_remove_gappy_sequences">
         <conditional name="remove_gappy_sequences_cond">
-            <param name="remove_gappy_sequences" type="select" label="Remove gappy sequences in alignments?">
+            <param name="remove_gappy_sequences" type="select" label="Alignment post-processing configuration?">
                 <option value="no" selected="true">No</option>
                 <option value="yes">Yes</option>
             </param>
             <when value="no" />
             <when value="yes">
                 <conditional name="trim_type_cond">
-                    <param name="trim_type" type="select" label="Select process used for trimming">
-                        <option value="gap_trimming" selected="true">Remove gappy sites in alignments (gap trimming)</option>
-                        <option value="automated_trimming">Trim alignments using trimAl's ML heuristic trimming approach (automated trimming)</option>
+                    <param name="trim_type" type="select" label="Trimming method">
+                        <option value="gap_trimming" selected="true">Gap score based trimming</option>
+                        <option value="automated_trimming">Automated heuristic trimming</option>
                     </param>
                     <when value="gap_trimming">
-                        <param name="gap_trimming" type="float" optional="true" min="0" max="1.0" label="Remove sites in alignments with gaps of" />
+                        <param name="gap_trimming" type="float" optional="true" min="0" max="1.0" label="Gap score" />
                     </when>
                     <when value="automated_trimming" />
                 </conditional>
                 <conditional name="remove_sequences_with_gaps_cond">
-                    <param name="remove_sequences_with_gaps" type="select" label="Remove sequences with specified gaps?">
+                    <param name="remove_sequences_with_gaps" type="select" label="Remove sequences?">
                         <option value="no" selected="true">No</option>
                         <option value="yes">Yes</option>
                     </param>
                     <when value="no" />
                     <when value="yes">
-                        <param name="remove_sequences_with_gaps_of" type="float" optional="true" min="0" max="1" label="Remove sequences with gaps of" />
-                        <param name="iterative_realignment" type="integer" optional="true" min="0" label="Maximum number of iterations" />
+                        <param name="remove_sequences_with_gaps_of" type="float" optional="true" min="0" max="1" label="Coverage score" />
+                        <param name="iterative_realignment" type="integer" optional="true" min="0" label="Realignment iteration limit" />
                     </when>
                 </conditional>
             </when>
@@ -130,34 +130,4 @@
             url = {https://github.com/dePamphilis/PlantTribes},}
         </citation>
     </xml>
-    <xml name="citations2to4">
-        <citation type="bibtex">
-            @article{Sasidharan2012,
-            journal = {Nucleic Acids Research},
-            author = {2. Sasidharan R, Nepusz T, Swarbreck D, Huala E, Paccanaro A},
-            title = {GFam: a platform for automatic annotation of gene families},
-            year = {2012},
-            pages = {gks631},}
-        </citation>
-        <citation type="bibtex">
-            @article{Li2003,
-            journal = {Genome Research}
-            author = {3. Li L, Stoeckert CJ, Roos DS},
-            title = {OrthoMCL: identification of ortholog groups for eukaryotic genomes},
-            year = {2003},
-            volume = {13},
-            number = {9},
-            pages = {2178-2189},}
-        </citation>
-        <citation type="bibtex">
-            @article{Emms2015,
-            journal = {Genome Biology}
-            author = {4. Emms DM, Kelly S},
-            title = {OrthoFinder: solving fundamental biases in whole genome comparisons dramatically improves orthogroup inference accuracy},
-            year = {2015},
-            volume = {16},
-            number = {1},
-            pages = {157},}
-        </citation>
-    </xml>
 </macros>