diff compare_categories.xml @ 0:c1bd0c560018 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/qiime commit bcbe76277f3e60303faf826f8ce7f018bc663a9a-dirty
author bebatut
date Tue, 02 Feb 2016 05:50:37 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/compare_categories.xml	Tue Feb 02 05:50:37 2016 -0500
@@ -0,0 +1,177 @@
+<tool id="qiime_compare_categories" name="compare categories" version="1.9.1galaxy1">
+
+    <description>Analyzes statistical significance of sample groupings using 
+        distance matrices</description>
+
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <expand macro="requirements" />
+
+    <command>
+<![CDATA[     
+    compare_categories.py 
+        --method=$method 
+        -i $input_dm 
+        -m $mapping_file 
+        -c $categories 
+        -o compare_categories_output
+        #if $num_permutations:
+            -n $num_permutations
+        #end if
+]]>
+    </command>
+
+    <inputs>
+        <param label="--method: the statistical method to use. Valid options: 
+            adonis, anosim, bioenv, morans_i, mrpp, permanova, permdisp, 
+            dbrda" name="method" optional="False" type="select">
+            <option value="adonis">adonis</option>
+            <option value="anosim">anosim</option>
+            <option value="bioenv">bioenv</option>
+            <option value="morans_i">morans_i</option>
+            <option value="mrpp">mrpp</option>
+            <option value="permanova">permanova</option>
+            <option value="permdisp">permdisp</option>
+            <option value="dbrda">dbrda</option>
+        </param>
+        <param label="-i/--input_dm: the input distance matrix. WARNING: Only 
+            symmetric, hollow distance matrices may be used as input. Asymmetric 
+            distance matrices, such as those obtained by the UniFrac Gain metric 
+            (i.e. beta_diversity.py -m unifrac_g), should not be used as input" 
+            name="input_dm" optional="False" type="data"/>
+        <param label="-m/--mapping_file: the metadata mapping file" 
+            name="mapping_file" optional="False" type="data"/>
+        <param label="-c/--categories: a comma-delimited list of categories from 
+            the mapping file. Note: all methods except for BIO-ENV accept just a 
+            single category. If multiple categories are provided, only the first 
+            will be used" name="categories" optional="False" type="text"/>
+        <param default="999" label="-n/--num_permutations: the number of permutations 
+            to use when calculating statistical significance. Only applies to 
+            adonis, ANOSIM, MRPP, PERMANOVA, PERMDISP, and db-RDA. Must be greater 
+            than or equal to zero [default: 999]" name="num_permutations" 
+            optional="True" type="integer"/>
+    </inputs>
+
+    <outputs>
+        <data format="txt" from_work_dir="compare_categories_output/*.txt" 
+            name="output_dir" label="Compare_categories.txt"/>
+    </outputs>
+
+    <tests>
+        <test>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+**What it does**
+
+This script allows for the analysis of the strength and statistical
+significance of sample groupings using a distance matrix as the primary input.
+Several statistical methods are available: adonis, ANOSIM, BIO-ENV, Moran's I,
+MRPP, PERMANOVA, PERMDISP, and db-RDA.
+
+Note: R's vegan and ape packages are used to compute many of these methods, and
+for the ones that are not, their implementations are based on the
+implementations found in those packages. It is recommended to read through the
+detailed descriptions provided by the authors (they are not reproduced here)
+and to refer to the primary literature for complete details, including the
+methods' assumptions. To view the documentation of a method in R, prepend a
+question mark before the method name. For example:
+
+?vegan::adonis
+
+The following are brief descriptions of the available methods:
+
+adonis - Partitions a distance matrix among sources of variation in order to
+describe the strength and significance that a categorical or continuous
+variable has in determining variation of distances. This is a nonparametric
+method and is nearly equivalent to db-RDA (see below) except when distance
+matrices constructed with semi-metric or non-metric dissimilarities are
+provided, which may result in negative eigenvalues. adonis is very similar to
+PERMANOVA, though it is more robust in that it can accept either categorical or
+continuous variables in the metadata mapping file, while PERMANOVA can only
+accept categorical variables. See vegan::adonis for more details.
+
+ANOSIM - Tests whether two or more groups of samples are significantly
+different based on a categorical variable found in the metadata mapping file.
+You can specify a category in the metadata mapping file to separate
+samples into groups and then test whether there are significant differences
+between those groups. For example, you might test whether 'Control' samples are
+significantly different from 'Fast' samples. Since ANOSIM is nonparametric,
+significance is determined through permutations. See vegan::anosim for more
+details.
+
+BIO-ENV - Finds subsets of variables whose Euclidean distances (after scaling
+the variables) are maximally rank-correlated with the distance matrix. For
+example, the distance matrix might contain UniFrac distances between
+communities, and the variables might be numeric environmental variables (e.g.,
+pH and latitude). Correlation between the community distance matrix and
+Euclidean environmental distance matrix is computed using Spearman's rank
+correlation coefficient (rho). This method will only accept categories that are
+numerical (continuous or discrete). This is currently the only method in the
+script that accepts more than one category (via -c). See vegan::bioenv for more
+details. This method is also known as BEST (previously called BIO-ENV) in the
+PRIMER-E software package.
+
+Moran's I - This method uses the numerical (e.g. geographical) data supplied to
+identify what type of spatial configuration occurs in the samples. For example,
+are they dispersed, clustered, or of no distinctly noticeable configuration
+when compared to each other? This method will only accept a category that is
+numerical. See ape::Moran.I for more details.
+
+MRPP - This method tests whether two or more groups of samples are
+significantly different based on a categorical variable found in the metadata
+mapping file. You can specify a category in the metadata mapping file to
+separate samples into groups and then test whether there are significant
+differences between those groups. For example, you might test whether 'Control'
+samples are significantly different from 'Fast' samples. Since MRPP is
+nonparametric, significance is determined through permutations. See
+vegan::mrpp for more details.
+
+PERMANOVA - This method is very similar to adonis except that it only accepts a
+categorical variable in the metadata mapping file. It uses an ANOVA
+experimental design and returns a pseudo-F value and a p-value. Since PERMANOVA
+is nonparametric, significance is determined through permutations.
+
+PERMDISP - This method analyzes the multivariate homogeneity of group
+dispersions (variances). In essence, it determines whether the variances of
+groups of samples are significantly different. The results of both parametric
+and nonparametric significance tests are provided in the output. This method is
+generally used as a companion to PERMANOVA. See vegan::betadisper for more
+details.
+
+db-RDA - This method is very similar to adonis and will only differ if certain
+non-Euclidean semi- or non-metrics are used to generate the input distance
+matrix, and negative eigenvalues are encountered. The only difference then will
+be in the p-values, not the R^2 values. As part of the output, an ordination
+plot is also generated that shows grouping/clustering of samples based on a
+category in the metadata mapping file. This category is used to explain the
+variability between samples. Thus, the ordination output of db-RDA is similar
+to PCoA except that it is constrained, while PCoA is unconstrained (i.e. with
+db-RDA, you must specify which category should be used to explain the
+variability in your data). See vegan::capscale for more details.
+
+For more information and examples pertaining to this script, please refer to
+the accompanying tutorial, which can be found at
+http://qiime.org/tutorials/category_comparison.html.
+
+
+At least one file will be created in the output directory specified by -o. For
+most methods, a single output file containing the results of the test (e.g. the
+effect size statistic and p-value) will be created. The format of the output
+files will vary between methods as some are generated by native QIIME code,
+while others are generated by R's vegan or ape packages. Please refer to the
+script description for details on how to access additional information for
+these methods, including what information is included in the output files.
+
+db-RDA is the only exception in that two output files are created: a results
+text file and a PDF of the ordination plot.
+    ]]>
+    </help>
+
+    <citations>
+        <expand macro="citations" />
+    </citations>    
+</tool>