comparison w4mcorcov.xml @ 3:61935618f92c draft

planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit 7682e8e7ae2bfb926d94b414b9a1649389f33582
author eschen42
date Sun, 12 Nov 2017 16:40:57 -0500
parents a06344808ffc
children d3bb34e764fe
comparison
equal deleted inserted replaced
2:a06344808ffc 3:61935618f92c
1 <tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.4"> 1 <tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.5">
2 2
3 <description>OPLS-DA Contrasts of Univariate Results</description> 3 <description>OPLS-DA Contrasts of Univariate Results</description>
4 4
5 <requirements> 5 <requirements>
6 <requirement type="package">r-batch</requirement> 6 <requirement type="package">r-batch</requirement>
32 <inputs> 32 <inputs>
33 <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="Features x samples (tabular data - decimal: '.'; missing: NA; mode: numerical; separator: tab character)" /> 33 <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="Features x samples (tabular data - decimal: '.'; missing: NA; mode: numerical; separator: tab character)" />
34 <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="Samples x metadata (tabular data - decimal: '.'; missing: NA; mode: character or numerical; separator: tab character)" /> 34 <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="Samples x metadata (tabular data - decimal: '.'; missing: NA; mode: character or numerical; separator: tab character)" />
35 <param name="variableMetadata_in" label="Variable metadata file (ideally from Univariate)" type="data" format="tabular" help="Features x metadata (tabular data - decimal: '.'; missing: NA; mode: character or numerical; separator: tab character)" /> 35 <param name="variableMetadata_in" label="Variable metadata file (ideally from Univariate)" type="data" format="tabular" help="Features x metadata (tabular data - decimal: '.'; missing: NA; mode: character or numerical; separator: tab character)" />
36 <param name="facC" label="Factor of interest" type="text" help="REQUIRED - The name of the column of sampleMetadata corresponding to the qualitative variable used to define the contrasts. Except when the 'Univariate Significance-test' is set to 'none', this also must be a portion of the column names in the variableMetadata file."/> 36 <param name="facC" label="Factor of interest" type="text" help="REQUIRED - The name of the column of sampleMetadata corresponding to the qualitative variable used to define the contrasts. Except when the 'Univariate Significance-test' is set to 'none', this also must be a portion of the column names in the variableMetadata file."/>
37 <param name="tesC" label="Univariate Significance-Test" type="select" help="Either 'none' or the name of the statistical test that was run by the 'Univariate' tool to produce the variableMetadata file; that name must also be a portion of the column names in that file."> 37 <param name="tesC" label="Univariate significance-test" type="select" help="Either 'none' or the name of the statistical test that was run by the 'Univariate' tool to produce the variableMetadata file; that name must also be a portion of the column names in that file.">
38 <option value="none">none - Display all features from variableMetadata (rather than choosing a subset based on significance in univariate testing)</option> 38 <option value="none">none - Display all features from variableMetadata (rather than choosing a subset based on significance in univariate testing)</option>
39 <option value="ttest">ttest - Student's t-test (parametric test, qualitative factor with exactly 2 levels)</option> 39 <option value="ttest">ttest - Student's t-test (parametric test, qualitative factor with exactly 2 levels)</option>
40 <option value="anova">anova - Analysis of variance (parametric test, qualitative factor with more than 2 levels)</option> 40 <option value="anova">anova - Analysis of variance (parametric test, qualitative factor with more than 2 levels)</option>
41 <option value="wilcoxon">wilcoxon - Wilcoxon rank test (nonparametric test, qualitative factor with exactly 2 levels)</option> 41 <option value="wilcoxon">wilcoxon - Wilcoxon rank test (nonparametric test, qualitative factor with exactly 2 levels)</option>
42 <option value="kruskal">kruskal - Kruskal-Wallis rank test (nonparametric test, qualitative factor with more than 2 levels)</option> 42 <option value="kruskal">kruskal - Kruskal-Wallis rank test (nonparametric test, qualitative factor with more than 2 levels)</option>
46 type="boolean" 46 type="boolean"
47 checked="true" 47 checked="true"
48 truevalue="TRUE" 48 truevalue="TRUE"
49 falsevalue="FALSE" 49 falsevalue="FALSE"
50 label="Retain only pairwise-significant features" 50 label="Retain only pairwise-significant features"
51 help="When 'none' is chosen, all features are included in the analysis. Otherwise, when this option is set to 'Yes', analysis will be performed including only features that differ significantly for the pair of levels being contrasted; when set to 'No', any feature that varies significantly across all levels will be included (i.e., exclude any feature that is not significantly different across all levels). See examples below."/> 51 help="When 'none' is chosen as the test, all features are included in the analysis (i. e., this parameter is ignored). Otherwise, when this option is set to 'Yes', analysis will be performed including only features that differ significantly for the pair of levels being contrasted; when set to 'No', any feature that varies significantly across all levels will be included (i.e., exclude any feature that is not significantly different across all levels). See examples below."/>
52 <param name="levCSV" label="Levels of interest" type="text" value = "*" help="Comma-separated level-names (or comma-less regular expressions to match level-names) to consider in analysis; must match at least two levels; levels must be non-numeric; may include wild cards or regular expressions. Note that extra space characters will affect results - 'a,b' is correct, but 'a , b' is not and may fail or give different results."> 52 <param name="levCSV" label="Levels of interest" type="text" value = "*" help="Comma-separated level-names (or comma-less regular expressions to match level-names) to consider in analysis; must match at least two levels; levels must be non-numeric; may include wild cards or regular expressions. Note that extra space characters will affect results - 'a,b' is correct, but 'a , b' is not and may fail or give different results.">
53 <sanitizer> 53 <sanitizer>
54 <valid initial="string.letters"> 54 <valid initial="string.letters">
55 <add preset="string.digits"/> 55 <add preset="string.digits"/>
56 <add value="&#36;" /> <!-- $ dollar, dollar-sign --> 56 <add value="&#36;" /> <!-- $ dollar, dollar-sign -->
73 <add value="&#125;" /> <!-- } r-cube, right-curly-bracket --> 73 <add value="&#125;" /> <!-- } r-cube, right-curly-bracket -->
74 <!-- IMPORTANT - Note that single and double quotes are not part of this list; they have the potential to make the 'command' section insecure or broken. --> 74 <!-- IMPORTANT - Note that single and double quotes are not part of this list; they have the potential to make the 'command' section insecure or broken. -->
75 </valid> 75 </valid>
76 </sanitizer> 76 </sanitizer>
77 </param> 77 </param>
78 <param name="matchingC" label="Level-name matching" type="select" help="How to specify level-names generically (if at all)."> 78 <param name="matchingC" label="Level-name matching" type="select" help="How to specify level-names generically. (See help below for details on using wild cards or regular expressions.)">
79 <option value="none">do no generic matching (default)</option> 79 <option value="none">do no generic matching (default)</option>
80 <option value="wildcard" selected="true">use wild-cards for matching level-names</option> 80 <option value="wildcard" selected="true">use wild-cards for matching level-names</option>
81 <option value="regex">use regular expressions for matching level-names</option> 81 <option value="regex">use regular expressions for matching level-names</option>
82 </param> 82 </param>
83 <param name="labelFeatures" type="text" value="3" label="Number of features having extreme loadings to label on cov-vs.-cor plot" help="Specify the number of features at each of the loading-extremes that should be labelled (with the name of the feature) on the covariance-vs.-correlation plot; specify 'ALL' to label all features; this choice has no effect on the OPLS-DA loadings plot."/> 83 <param name="labelFeatures" type="text" value="3" label="How many features having extreme loadings should be labelled on cov-vs.-cor plot" help="Specify the number of features at each of the loading-extremes that should be labelled (with the name of the feature) on the covariance-vs.-correlation plot; specify 'ALL' to label all features or '0' to label no features; this choice has no effect on the OPLS-DA loadings plot."/>
84 <param 84 <param
85 name="labelOrthoFeatures" 85 name="labelOrthoFeatures"
86 type="boolean" 86 type="boolean"
87 checked="false" 87 checked="false"
88 truevalue="TRUE" 88 truevalue="TRUE"
89 falsevalue="FALSE" 89 falsevalue="FALSE"
90 label="label orthogonal features" 90 label="Label features having extreme orthogonal loadings"
91 help="When labeling only extreme features in the cor-vs.-cov plot, use 'yes' to label extreme orthogonal features (both loado and loadp); the default value 'no' labels only extreme features along the parallel projection (loadp). Choosing 'yes' may clutter the plot."/> 91 help="When using the preceding parameter to label only features at the loading-extremess in the cor-vs.-cov plot, use 'no' here to label only features having extreme parallel loadings (loadp); this is the default. Choose 'yes' to add labels also to features having extreme orthogonal loadings (both loado and loadp); this may clutter the plot."/>
92 </inputs> 92 </inputs>
93 93
94 <outputs> 94 <outputs>
95 <!-- 95 <!--
96 pdf1: summaries of each contrasts, clearly labeled by level=pair name 96 pdf1: summaries of each contrasts, clearly labelled by level=pair name
97 * first PCA score-plot 97 * first PCA score-plot
98 * then PLS score-plot 98 * then PLS score-plot
99 * then PLS S-PLOT; color in red features with VIP > 1; color in grey any non-pairwise-significant features, if these are included 99 * then PLS S-PLOT; color in red features with VIP > 1; color in grey any non-pairwise-significant features, if these are included
100 --> 100 -->
101 <data name="contrast_detail" label="${tool.name}_${variableMetadata_in.name}_detail" format="pdf" /> 101 <data name="contrast_detail" label="${tool.name}_${variableMetadata_in.name}_detail" format="pdf" />
343 The purpose of the 'PLS-DA Contrasts' tool is to visualize GC-MS or LC-MS features that are possible biomarkers. 343 The purpose of the 'PLS-DA Contrasts' tool is to visualize GC-MS or LC-MS features that are possible biomarkers.
344 344
345 The W4M 'Univariate' tool (Thévenot *et al.*, 2015) adds the results of family-wise corrected pairwise significance-tests as columns of the **variableMetadata** dataset. 345 The W4M 'Univariate' tool (Thévenot *et al.*, 2015) adds the results of family-wise corrected pairwise significance-tests as columns of the **variableMetadata** dataset.
346 For instance, suppose that you ran Kruskal-Wallis testing for a column named 'cluster' in sampleMetadata that has values 'k1' and 'k2' and at least one other value. 346 For instance, suppose that you ran Kruskal-Wallis testing for a column named 'cluster' in sampleMetadata that has values 'k1' and 'k2' and at least one other value.
347 347
348 - A column of variableMetadata would be labeled 'cluster_kruskal_sig' and would have values '1' and '0'; when the samples are grouped by 'cluster', '1' means that there is strong evidence against the hypothesis that there is no difference among the intensities for the feature across all sample-groups. 348 - A column of variableMetadata would be labelled 'cluster_kruskal_sig' and would have values '1' and '0'; when the samples are grouped by 'cluster', '1' means that there is strong evidence against the hypothesis that there is no difference among the intensities for the feature across all sample-groups.
349 - A column of variableMetadata would be labeled 'cluster_kruskal_k1.k2_sig' and would have values '1' and '0', where '1' means that there is significant evidence against the hypothesis that samples from sampleMetadata whose 'cluster' column contains 'k1' or 'k2' have the same intensity for that feature. 349 - A column of variableMetadata would be labelled 'cluster_kruskal_k1.k2_sig' and would have values '1' and '0', where '1' means that there is significant evidence against the hypothesis that samples from sampleMetadata whose 'cluster' column contains 'k1' or 'k2' have the same intensity for that feature.
350 350
351 The 'PLS-DA Contrasts' tool produces graphics and data for OPLS-DA contrasts of feature-intensities between significantly different pairs of factor-levels. For each factor-level, the tool performs a contrast with all other factor-levels combined and then separately with each other factor-level. 351 The 'PLS-DA Contrasts' tool produces graphics and data for OPLS-DA contrasts of feature-intensities between significantly different pairs of factor-levels. For each factor-level, the tool performs a contrast with all other factor-levels combined and then separately with each other factor-level.
352 352
353 **Along the left-to-right axis, the plots show the supervised projection of the variation explained by the predictor** (i.e., the factor specified when invoking the tool); **the top-to-bottom axis displays the variation that is orthogonal to the predictor level** (i.e., independent of it). 353 **Along the left-to-right axis, the plots show the supervised projection of the variation explained by the predictor** (i.e., the factor specified when invoking the tool); **the top-to-bottom axis displays the variation that is orthogonal to the predictor level** (i.e., independent of it).
354 354
355 Although this tool can be used in a purely exploratory manner by supplying the variableMetadata file without the columns added by the W4M 'Univariate' tool, **the preferred workflow is to use univariate testing to exclude features that are not significantly different and use OPLS-DA to visualize the differences identified in univariate testing** (Thévenot *et al.*, 2015); an appropriate exception would be to visualize contrasts of a specific list of metabolites. 355 Although this tool can be used in a purely exploratory manner by supplying the variableMetadata file without the columns added by the W4M 'Univariate' tool, **the preferred workflow is to use univariate testing to exclude features that are not significantly different and use OPLS-DA to visualize the differences identified in univariate testing** (Thévenot *et al.*, 2015); an appropriate exception would be to visualize contrasts of a specific list of metabolites.
356 356
357 It must be stressed that there may be no *single* definitive computational approach to select features that are reliable biomarkers, especially from a small number of samples or experiments. A few possible choices are examining extreme values on S-PLOTs, examining "variable importance in projection VIP for OPLS-DA" (Galindo-Prieto *et al.* 2014), and examining a feature's "selectivity ratio" (Rajalahti *et al.*, 2009). In this spirit, this tool reports the S-PLOT covariance and correlation (Wiklund *op. cit.*) and VIP metrics, and it introduces an informal "salience" metric to flag features that may merit attention without dimensional reduction; future versions may add selectivity ratio. 357 It must be stressed that there may be no *single* definitive computational approach to select features that are reliable biomarkers, especially from a small number of samples or experiments. A few possible choices are:
358
359 - picking features with maximum loadings along the projection parallel to the predictor (loadp),
360 - examining extreme values on S-PLOTs (for which covariance is linearly related to loadp),
361 - examining "variable importance in projection VIP for OPLS-DA" (Galindo-Prieto *et al.* 2014), and
362 - examining a feature's "selectivity ratio" (Rajalahti *et al.*, 2009).
363
364 In this spirit, this tool reports the S-PLOT covariance and correlation (Wiklund *op. cit.*) and VIP metrics, and it introduces an informal "salience" metric to flag features that may merit attention without dimensional reduction; future versions may add selectivity ratio.
358 365
359 For a more systematic approach to biomarker identification, please consider the W4M 'biosigner' tool (Rinuardo *et al.* 2016), which applies three different identification metrics to the selection process. 366 For a more systematic approach to biomarker identification, please consider the W4M 'biosigner' tool (Rinuardo *et al.* 2016), which applies three different identification metrics to the selection process.
360 367
361 Regardless of how any potential biomarker is identified, further validation analysis (e.g., independent confirmatory experiments) is needed before it is recommended for general application. 368 Regardless of how any potential biomarker is identified, further validation analysis (e.g., independent confirmatory experiments) is needed before it is recommended for general application.
362 369
434 [IN] Label how many extreme features 441 [IN] Label how many extreme features
435 | Specify the number of features at each of the loading-extremes that should be labelled (with the name of the feature) on the covariance-vs.-correlation plot; specify 'ALL' to label all features; this choice has no effect on the OPLS-DA loadings plot. 442 | Specify the number of features at each of the loading-extremes that should be labelled (with the name of the feature) on the covariance-vs.-correlation plot; specify 'ALL' to label all features; this choice has no effect on the OPLS-DA loadings plot.
436 | 443 |
437 444
438 [IN] Label features with extreme loado 445 [IN] Label features with extreme loado
439 | If the previous parameter has limited the the number of features to be labeled at each of the loading-extremes, then the extreme values for both loado and loadp will be labeled when this parameter is set to 'yes'; otherwise (in the default case) only extreme values for loadp will be lableld. The default was chosen to make the plot less cluttered. 446 | If the previous parameter has limited the the number of features to be labelled at each of the loading-extremes, then the extreme values for both loado and loadp will be labelled when this parameter is set to 'yes'; otherwise (in the default case) only extreme values for loadp will be lableld. The default was chosen to make the plot less cluttered.
440 | 447 |
441 448
442 [OUT] Contrast-detail output PDF 449 [OUT] Contrast-detail output PDF
443 | Several plots for each two-projection OPLS-DA analysis: 450 | Several plots for each two-projection OPLS-DA analysis:
444 451
642 649
643 650
644 Release notes 651 Release notes
645 ------------- 652 -------------
646 653
647 0.98.4 654 0.98.5
648 655
649 - bug fix: fit feature-labels within clipping region of cor-vs.cov plot 656 - bug fix: fit feature-labels within clipping region of cor-vs.cov plot
650 - new feature: optionally (and by default) suppress labels for features with extreme orthogonal loadings 657 - new feature: optionally (and by default) suppress labels for features with extreme orthogonal loadings
651 658
652 0.98.3 659 0.98.3
653 660
654 - add support for two-level factors 661 - add support for two-level factors
655 - add adjusted mz and rt to output tables 662 - add adjusted mz and rt to output tables
656 - allow explicitly setting the number of features with extreme loadings to be labeled on the correlation vs. covariance plot 663 - allow explicitly setting the number of features with extreme loadings to be labelled on the correlation vs. covariance plot
657 - add loadings to corcov table 664 - add loadings to corcov table
658 665
659 0.98.2 666 0.98.2
660 667
661 - first release 668 - first release
662 669
663 670
664 ]]></help> 671 ]]></help>
665 <citations> 672 <citations>
673 <citation type="doi">10.5281/zenodo.1034784</citation>
666 <!-- Galindo_Prieto_2014 Variable influence on projection (VIP) for OPLS --> 674 <!-- Galindo_Prieto_2014 Variable influence on projection (VIP) for OPLS -->
667 <citation type="doi">10.1002/cem.2627</citation> 675 <citation type="doi">10.1002/cem.2627</citation>
668 <!-- Giacomoni_2014 W4M 2.5 --> 676 <!-- Giacomoni_2014 W4M 2.5 -->
669 <citation type="doi">10.1093/bioinformatics/btu813</citation> 677 <citation type="doi">10.1093/bioinformatics/btu813</citation>
670 <!-- Guitton_2017 W4M 3.0 --> 678 <!-- Guitton_2017 W4M 3.0 -->