Mercurial > repos > eschen42 > w4mcorcov
changeset 6:0b49916c5c52 draft
planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit 4428e3252d54c8a8e0e5d85e8eaaeb13e9b21de7
author | eschen42 |
---|---|
date | Wed, 05 Sep 2018 19:24:47 -0400 |
parents | 1d046f648b47 |
children | ca9938f2eb6a |
files | w4mcorcov.xml w4mcorcov_calc.R w4mcorcov_input.R w4mcorcov_lib.R w4mcorcov_output.R w4mcorcov_salience.R w4mcorcov_util.R w4mcorcov_wrapper.R |
diffstat | 8 files changed, 1246 insertions(+), 601 deletions(-) [+] |
line wrap: on
line diff
--- a/w4mcorcov.xml Fri Mar 02 08:26:36 2018 -0500 +++ b/w4mcorcov.xml Wed Sep 05 19:24:47 2018 -0400 @@ -1,104 +1,160 @@ -<tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.7"> - - <description>OPLS-DA Contrasts of Univariate Results</description> - - <requirements> - <requirement type="package">r-batch</requirement> - <requirement type="package">bioconductor-ropls</requirement> - <!-- <requirement type="package">r-foreach</requirement> --> - </requirements> - - <stdio> - <exit_code range="1:" level="fatal" /> - </stdio> - - <command><![CDATA[ - cd $__tool_directory__; Rscript w4mcorcov_wrapper.R - dataMatrix_in "$dataMatrix_in" - sampleMetadata_in "$sampleMetadata_in" - variableMetadata_in "$variableMetadata_in" - tesC "$tesC" - facC "$facC" - pairSigFeatOnly "$pairSigFeatOnly" - levCSV '$levCSV' - matchingC '$matchingC' - labelFeatures '$labelFeatures' - labelOrthoFeatures '$labelOrthoFeatures' - contrast_detail '$contrast_detail' - contrast_corcov '$contrast_corcov' - contrast_salience '$contrast_salience' +<tool id="w4mcorcov" name="OPLS-DA_Contrasts" version="0.98.15"> + <description>OPLS-DA Contrasts of Univariate Results</description> + <macros> + <xml name="paramPairSigFeatOnly"> + <param name="pairSigFeatOnly" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" + label="Retain only pairwise-significant features" + help="When this option is set to 'Yes', analysis will be performed including only features that differ significantly for the pair of levels being contrasted; when set to 'No', any feature that varies significantly across all levels will be included (i.e., exclude any feature that is not significantly different across all levels). See examples below." /> + </xml> + <xml name="cplots"> + <param name="cplot_y" label="C-plot Y-axis" type="select" help="Choose the Y-axis for C-plots."> + <option value="correlation">Plot VIP versus correlation</option> + <option value="covariance">Plot VIP versus covariance</option> + </param> + <param name="cplot_p" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" + label="Produce predictor C-plot" + help="When this option is set to 'Yes', correlation will be plotted against vip4 for predictor loadings." /> + <param name="cplot_o" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE" + label="Produce orthogonal C-plot" + help="When this option is set to 'Yes', correlation will be plotted against vip4 for orthogonal loadings." /> + </xml> + </macros> + <requirements> + <!-- + <requirement type="package" version="3.4.1">r-base</requirement> + <requirement type="package" version="1.1_4">r-batch</requirement> + <requirement type="package" version="1.2.14">bioconductor-ropls</requirement> + --> + <requirement type="package">r-base</requirement> + <requirement type="package">r-batch</requirement> + <requirement type="package" version="1.10.0">bioconductor-ropls</requirement> + </requirements> + <command detect_errors="aggressive"><![CDATA[ + Rscript '$__tool_directory__/w4mcorcov_wrapper.R' + dataMatrix_in '$dataMatrix_in' + sampleMetadata_in '$sampleMetadata_in' + variableMetadata_in '$variableMetadata_in' + facC '$facC' + #if str( $signif_test.tesC ) == "none": + tesC "none" + pairSigFeatOnly "FALSE" + #else: + tesC '$signif_test.tesC' + pairSigFeatOnly '$signif_test.pairSigFeatOnly' + #end if + levCSV '$levCSV' + matchingC '$matchingC' + labelFeatures '$labelFeatures' + #if str( $xplots.expPlot ) == "none": + cplot_p "FALSE" + cplot_o "FALSE" + cplot_y "correlation" + #else if str( $xplots.expPlot ) == "cplot": + cplot_p '$xplots.cplot_p' + cplot_o '$xplots.cplot_o' + cplot_y '$xplots.cplot_y' + #end if + contrast_detail '$contrast_detail' + contrast_corcov '$contrast_corcov' + contrast_salience '$contrast_salience' ]]></command> - <inputs> - <param name="dataMatrix_in" label="Data matrix file" type="data" format="tabular" help="Features x samples (tabular data - decimal: '.'; missing: NA; mode: numerical; separator: tab character)" /> - <param name="sampleMetadata_in" label="Sample metadata file" type="data" format="tabular" help="Samples x metadata (tabular data - decimal: '.'; missing: NA; mode: character or numerical; separator: tab character)" /> - <param name="variableMetadata_in" label="Variable metadata file (ideally from Univariate)" type="data" format="tabular" help="Features x metadata (tabular data - decimal: '.'; missing: NA; mode: character or numerical; separator: tab character)" /> - <param name="facC" label="Factor of interest" type="text" help="REQUIRED - The name of the column of sampleMetadata corresponding to the qualitative variable used to define the contrasts. Except when the 'Univariate Significance-test' is set to 'none', this also must be a portion of the column names in the variableMetadata file."/> - <param name="tesC" label="Univariate significance-test" type="select" help="Either 'none' or the name of the statistical test that was run by the 'Univariate' tool to produce the variableMetadata file; that name must also be a portion of the column names in that file."> - <option value="none">none - Display all features from variableMetadata (rather than choosing a subset based on significance in univariate testing)</option> - <option value="ttest">ttest - Student's t-test (parametric test, qualitative factor with exactly 2 levels)</option> - <option value="anova">anova - Analysis of variance (parametric test, qualitative factor with more than 2 levels)</option> - <option value="wilcoxon">wilcoxon - Wilcoxon rank test (nonparametric test, qualitative factor with exactly 2 levels)</option> - <option value="kruskal">kruskal - Kruskal-Wallis rank test (nonparametric test, qualitative factor with more than 2 levels)</option> - </param> - <param - name="pairSigFeatOnly" - type="boolean" - checked="true" - truevalue="TRUE" - falsevalue="FALSE" - label="Retain only pairwise-significant features" - help="When 'none' is chosen as the test, all features are included in the analysis (i. e., this parameter is ignored). Otherwise, when this option is set to 'Yes', analysis will be performed including only features that differ significantly for the pair of levels being contrasted; when set to 'No', any feature that varies significantly across all levels will be included (i.e., exclude any feature that is not significantly different across all levels). See examples below."/> - <param name="levCSV" label="Levels of interest" type="text" value = "*" help="Comma-separated level-names (or comma-less regular expressions to match level-names) to consider in analysis; must match at least two levels; levels must be non-numeric; may include wild cards or regular expressions. Note that extra space characters will affect results - 'a,b' is correct, but 'a , b' is not and may fail or give different results."> - <sanitizer> - <valid initial="string.letters"> - <add preset="string.digits"/> - <add value="$" /> <!-- $ dollar, dollar-sign --> - <add value="(" /> <!-- ( left-paren --> - <add value=")" /> <!-- ) right-paren --> - <add value="*" /> <!-- * splat, asterisk --> - <add value="+" /> <!-- + plus --> - <add value="," /> <!-- , comma --> - <add value="-" /> <!-- - dash, minus-sign --> - <add value="." /> <!-- . dot, period --> - <add value=":" /> <!-- : colon --> - <add value=";" /> <!-- ; semi, semicolon --> - <add value="?" /> <!-- ? what, question mark --> - <add value="[" /> <!-- [ l-squib, left-squre-bracket --> - <add value="\" /> <!-- \ whack, backslash --> - <add value="]" /> <!-- ] r-squib, right-squre-bracket --> - <add value="^" /> <!-- ^ hat, caret --> - <add value="{" /> <!-- { l-cube, left-curly-bracket --> - <add value="|" /> <!-- | pipe --> - <add value="}" /> <!-- } r-cube, right-curly-bracket --> - <!-- IMPORTANT - Note that single and double quotes are not part of this list; they have the potential to make the 'command' section insecure or broken. --> - </valid> - </sanitizer> - </param> - <param name="matchingC" label="Level-name matching" type="select" help="How to specify level-names generically. (See help below for details on using wild cards or regular expressions.)"> - <option value="none">do no generic matching (default)</option> - <option value="wildcard" selected="true">use wild-cards for matching level-names</option> - <option value="regex">use regular expressions for matching level-names</option> - </param> - <param name="labelFeatures" type="text" value="3" label="How many features having extreme loadings should be labelled on cov-vs.-cor plot" help="Specify the number of features at each of the loading-extremes that should be labelled (with the name of the feature) on the covariance-vs.-correlation plot; specify 'ALL' to label all features or '0' to label no features; this choice has no effect on the OPLS-DA loadings plot."/> - <param - name="labelOrthoFeatures" - type="boolean" - checked="false" - truevalue="TRUE" - falsevalue="FALSE" - label="Label features having extreme orthogonal loadings" - help="When using the preceding parameter to label only features at the loading-extremess in the cor-vs.-cov plot, use 'no' here to label only features having extreme parallel loadings (loadp); this is the default. Choose 'yes' to add labels also to features having extreme orthogonal loadings (both loado and loadp); this may clutter the plot."/> - </inputs> - - <outputs> + <inputs> + <param name="dataMatrix_in" format="tabular" label="Data matrix file" type="data" + help="variables ✖ samples" /> + <param name="sampleMetadata_in" format="tabular" label="Sample metadata file" type="data" + help="sample metadata, one row per sample" /> + <param name="variableMetadata_in" format="tabular" label="Variable metadata file (ideally from Univariate)" + type="data" help="variable metadata, one row per variable" /> + <param name="facC" type="text" + label="Factor of interest" + help="REQUIRED - The name of the column of sampleMetadata corresponding to the qualitative variable used to define the contrasts. Except when the 'Univariate Significance-test' is set to 'none', this also must be a portion of the column names in the variableMetadata file."> + <sanitizer> + <valid initial="string.letters"> + <add preset="string.digits"/> + <add value="." /> <!-- dot, period --> + <add value="_" /> <!-- underscore --> + <!-- R does not permit dashes in column names; neither does SQL --> + </valid> + </sanitizer> + </param> + <conditional name="signif_test"> + <param name="tesC" label="Univariate significance-test" type="select" help="Either 'none' or the name of the statistical test that was run by the 'Univariate' tool to produce the variableMetadata file; that name must also be a portion of the column names in that file."> + <option value="none">none - Display all features from variableMetadata (rather than choosing a subset based on significance in univariate testing)</option> + <option value="ttest">ttest - Student's t-test (parametric test, qualitative factor with exactly 2 levels)</option> + <option value="anova">anova - Analysis of variance (parametric test, qualitative factor with more than 2 levels)</option> + <option value="wilcoxon">wilcoxon - Wilcoxon rank test (nonparametric test, qualitative factor with exactly 2 levels)</option> + <option value="kruskal">kruskal - Kruskal-Wallis rank test (nonparametric test, qualitative factor with more than 2 levels)</option> + </param> + <when value="none" /> + <when value="ttest"> + <expand macro="paramPairSigFeatOnly" /> + </when> + <when value="anova"> + <expand macro="paramPairSigFeatOnly" /> + </when> + <when value="wilcoxon"> + <expand macro="paramPairSigFeatOnly" /> + </when> + <when value="kruskal"> + <expand macro="paramPairSigFeatOnly" /> + </when> + </conditional> + <param name="levCSV" type="text" value="*" label="Levels of interest" + help="Comma-separated level-names (or comma-less regular expressions to match level-names) to consider in analysis; must match at least two levels; levels must be non-numeric; may include wild cards or regular expressions. Note that extra space characters will affect results - 'a,b' is correct, but 'a , b' is not and may fail or give different results."> + <sanitizer> + <valid initial="string.letters"> + <add preset="string.digits"/> + <add value="$" /> <!-- $ dollar, dollar-sign --> + <add value="(" /> <!-- ( left-paren --> + <add value=")" /> <!-- ) right-paren --> + <add value="*" /> <!-- * splat, asterisk --> + <add value="+" /> <!-- + plus --> + <add value="," /> <!-- , comma --> + <add value="-" /> <!-- - dash, minus-sign --> + <add value="." /> <!-- . dot, period --> + <add value=":" /> <!-- : colon --> + <add value=";" /> <!-- ; semi, semicolon --> + <add value="?" /> <!-- ? what, question mark --> + <add value="[" /> <!-- [ l-squib, left-squre-bracket --> + <add value="\" /> <!-- \ whack, backslash --> + <add value="]" /> <!-- ] r-squib, right-squre-bracket --> + <add value="^" /> <!-- ^ hat, caret --> + <add value="_" /> <!-- underscore --> + <add value="{" /> <!-- { l-cube, left-curly-bracket --> + <add value="|" /> <!-- | pipe --> + <add value="}" /> <!-- } r-cube, right-curly-bracket --> + <!-- IMPORTANT - Note that single and double quotes are not part of this list; they have the potential to make the 'command' section insecure or broken. --> + </valid> + </sanitizer> + </param> + <param name="matchingC" label="Level-name matching" type="select" help="How to specify level-names generically. (See help below for details on using wild cards or regular expressions.)"> + <option value="none">do no generic matching</option> + <option value="wildcard" selected="true">use wild-cards for matching level-names (default)</option> + <option value="regex">use regular expressions for matching level-names</option> + </param> + <param name="labelFeatures" type="text" value="3" + label="How many features having extreme loadings should be labelled on cov-vs.-cor plot" + help="Specify the number of features at each of the loading-extremes that should be labelled (with the name of the feature) on the covariance-vs.-correlation plot; specify 'ALL' to label all features or '0' to label no features; this choice has no effect on the OPLS-DA loadings plot."/> + <conditional name="xplots"> + <param name="expPlot" label="Extra plots to include" type="select" help="Choosing 'none' hides further choices."> + <option value="none">Do not include additonal extra plots.</option> + <option value="cplot">Include C-plots (predictor-loading vs. 'vip4p' and orthogonal-loading versus 'vip4o')</option> + </param> + <when value="none" /> + <when value="cplot"> + <expand macro="cplots" /> + </when> + </conditional> + </inputs> + <outputs> <!-- pdf1: summaries of each contrasts, clearly labelled by level=pair name * first PCA score-plot * then PLS score-plot * then PLS S-PLOT; color in red features with VIP > 1; color in grey any non-pairwise-significant features, if these are included --> - <data name="contrast_detail" label="${tool.name}_${variableMetadata_in.name}_detail" format="pdf" /> + <data name="contrast_detail" format="pdf" label="${tool.name}_${variableMetadata_in.name}_detail" /> <!-- tsv1: cor and cov table with columns: * feature-ID @@ -108,9 +164,9 @@ * Wiklund_2008 covariance * Galindo_Prieto_2014 VIP for predictive components, VIP[4,p] * Galindo_Prieto_2014 VIP for orthogonal components, VIP[4,o] - * (When filtering on significance of univariate tests) Significance of test of null hypothesis that there is no difference between the two classes, i.e, the pair-wise test. + * When filtering on significance of univariate tests,significance of test of null hypothesis that there is no difference between the two classes, i.e, the pair-wise test. --> - <data name="contrast_corcov" label="${tool.name}_${variableMetadata_in.name}_corcov" format="tabular" /> + <data name="contrast_corcov" format="tabular" label="${tool.name}_${variableMetadata_in.name}_corcov" /> <!-- tsv2: salience table with columns (experimental feature): * feature-ID @@ -118,10 +174,10 @@ * Salient robust coefficient of variation, i.e., for the feature, the mean absolute deviation of the intensity for the salient level divided by the median intensity for the salient level * Salience, i.e., for the feature, the median of the class-level having the greatest intensity divided by the mean of the medians for all class-levels. --> - <data name="contrast_salience" label="${tool.name}_${variableMetadata_in.name}_salience" format="tabular" /> + <data name="contrast_salience" format="tabular" label="${tool.name}_${variableMetadata_in.name}_salience" /> </outputs> - <tests> + <!-- test #1 --> <test> <param name="dataMatrix_in" value="input_dataMatrix.tsv"/> <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/> @@ -130,7 +186,6 @@ <param name="facC" value="k10"/> <param name="pairSigFeatOnly" value="FALSE"/> <param name="labelFeatures" value="3"/> - <param name="labelOrthogonalFeatures" value="FALSE"/> <param name="levCSV" value="k[12],k[3-4]"/> <param name="matchingC" value="regex"/> <output name="contrast_corcov"> @@ -146,22 +201,16 @@ <has_text text="level1Level2Sig" /> <!-- first matched line --> <has_text text="M349.2383T700" /> - <has_text text="-0.05007" /> - <has_text text="-5.8455" /> - <has_text text="0.0961269" /> - <has_text text="0.1848301" /> + <has_text text="-0.3704185" /> + <has_text text="-36.6668927" /> + <has_text text="0.4914638" /> + <has_text text="0.01302117" /> <!-- second matched line --> <has_text text="M207.9308T206" /> - <has_text text="-0.2967565" /> - <has_text text="-19.56942" /> - <has_text text="1.6023" /> - <has_text text="1.35368" /> - <!-- third matched line --> - <has_text text="M211.0607T263" /> - <has_text text="0.47052" /> - <has_text text="15.910087" /> - <has_text text="0.89838" /> - <has_text text="0.125372" /> + <has_text text="0.3235022" /> + <has_text text="5.97529097" /> + <has_text text="0.207196379" /> + <has_text text="0.04438632" /> </assert_contents> </output> <output name="contrast_salience"> @@ -186,6 +235,7 @@ </assert_contents> </output> </test> + <!-- test #2 --> <test> <param name="dataMatrix_in" value="input_dataMatrix.tsv"/> <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/> @@ -194,7 +244,6 @@ <param name="facC" value="k10"/> <param name="pairSigFeatOnly" value="TRUE"/> <param name="labelFeatures" value="3"/> - <param name="labelOrthogonalFeatures" value="TRUE"/> <param name="levCSV" value="k[12],k[3-4]"/> <param name="matchingC" value="regex"/> <output name="contrast_corcov"> @@ -209,21 +258,11 @@ <has_text text="vip4o" /> <has_text text="level1Level2Sig" /> <!-- first matched line --> - <has_text text="M349.2383T700" /> - <has_text text="-0.99601577" /> - <has_text text="-947.55795176" /> - <!-- second matched line --> - <has_text text="M207.9308T206" /> - <has_text text="0.688549" /> - <has_text text="58.22352" /> - <has_text text="1.394687" /> - <has_text text="0.06049885" /> - <!-- third matched line --> - <has_text text="M211.0607T263" /> - <has_text text="-0.572018" /> - <has_text text="-14.57769" /> - <has_text text="0.7780899" /> - <has_text text="0.3678166776" /> + <has_text text="M200.005T296" /> + <has_text text="-0.24533821" /> + <has_text text="-3.3573953" /> + <has_text text="0.1157346" /> + <has_text text="0.0647860" /> </assert_contents> </output> <output name="contrast_salience"> @@ -248,15 +287,14 @@ </assert_contents> </output> </test> + <!-- test #3 --> <test> <param name="dataMatrix_in" value="input_dataMatrix.tsv"/> <param name="sampleMetadata_in" value="input_sampleMetadata.tsv"/> <param name="variableMetadata_in" value="input_variableMetadata.tsv"/> <param name="tesC" value="none"/> <param name="facC" value="k10"/> - <param name="pairSigFeatOnly" value="TRUE"/> <param name="labelFeatures" value="3"/> - <param name="labelOrthogonalFeatures" value="FALSE"/> <param name="levCSV" value="k[12],k[3-4]"/> <param name="matchingC" value="regex"/> <output name="contrast_corcov"> @@ -271,24 +309,19 @@ <has_text text="vip4o" /> <!-- first matched line --> <has_text text="M349.2383T700" /> - <has_text text="-0.64331257" /> - <has_text text="-161.82220" /> - <has_text text="1.862455" /> - <has_text text="0.2105143" /> + <has_text text="-0.37867079" /> + <has_text text="-37.71066" /> + <has_text text="0.5246766" /> + <has_text text="0.0103341" /> <!-- second matched line --> <has_text text="M207.9308T206" /> - <has_text text="-0.313507" /> - <has_text text="-20.0476" /> - <has_text text="1.6956987" /> - <has_text text="1.19247" /> - <!-- third matched line --> - <has_text text="M211.0607T263" /> - <has_text text="-0.38986114" /> - <has_text text="-23.747718" /> - <has_text text="1.064296856" /> - <has_text text="1.16507455" /> + <has_text text="0.31570433" /> + <has_text text="5.86655640" /> + <has_text text="0.2111623" /> + <has_text text="0.0488654" /> </assert_contents> </output> + <!-- test #4 --> <output name="contrast_salience"> <assert_contents> <!-- column-labels line --> @@ -311,10 +344,150 @@ </assert_contents> </output> </test> + <!-- test #4 --> + <test> + <param name="dataMatrix_in" value="issue1_input_dataMatrix.tsv"/> + <param name="sampleMetadata_in" value="issue1_input_sampleMetadata.tsv"/> + <param name="variableMetadata_in" value="issue1_input_variableMetadata.tsv"/> + <param name="tesC" value="none"/> + <param name="facC" value="tissue_flowering"/> + <param name="labelFeatures" value="3"/> + <param name="levCSV" value="*"/> + <param name="matchingC" value="wildcard"/> + <output name="contrast_corcov"> + <assert_contents> + <!-- column-labels line --> + <has_text text="featureID" /> + <has_text text="factorLevel1" /> + <has_text text="factorLevel2" /> + <has_text text="correlation" /> + <has_text text="covariance" /> + <has_text text="vip4p" /> + <has_text text="vip4o" /> + <!-- first matched line --> + <has_text text="NM516T251" /> + <has_text text="flower_yes" /> + <has_text text="other" /> + <has_text text="0.03402807" /> + <has_text text="0.03526926" /> + <has_text text="0.43664386" /> + <has_text text="0.587701897" /> + <has_text text="0.026082688" /> + <has_text text="0.0437742145" /> + <has_text text="516.0845" /> + <has_text text="250.8762" /> + </assert_contents> + </output> + <output name="contrast_salience"> + <assert_contents> + <!-- column-labels line --> + <has_text text="featureID" /> + <has_text text="salientLevel" /> + <has_text text="salientRCV" /> + <has_text text="salience" /> + <has_text text="mz" /> + <has_text text="rt" /> + <!-- first matched line --> + <has_text text="PM518T369" /> + <has_text text="flower_yes" /> + <has_text text="0.58655260" /> + <has_text text="4.414469" /> + <has_text text="518.1656" /> + <has_text text="368.59817" /> + </assert_contents> + </output> + </test> + <!-- test #5 --> + <test> + <param name="dataMatrix_in" value="input_dataMatrix.tsv"/> + <param name="sampleMetadata_in" value="issue6_input_sampleMetadata.tsv"/> + <param name="variableMetadata_in" value="input_variableMetadata.tsv"/> + <param name="tesC" value="none"/> + <param name="facC" value="k._10"/> + <param name="labelFeatures" value="3"/> + <param name="levCSV" value="k1,k.2"/> + <param name="matchingC" value="none"/> + <output name="contrast_corcov"> + <assert_contents> + <!-- column-labels line --> + <has_text text="featureID" /> + <has_text text="factorLevel1" /> + <has_text text="factorLevel2" /> + <has_text text="correlation" /> + <has_text text="covariance" /> + <has_text text="vip4p" /> + <has_text text="vip4o" /> + <!-- first matched line --> + <has_text text="M349.2383T700" /> + <has_text text="0.43361563" /> + <has_text text="37.76875778" /> + <has_text text="0.54672558" /> + <has_text text="0.3920409" /> + <!-- second matched line --> + <has_text text="M207.9308T206" /> + <has_text text="-0.3365475" /> + <has_text text="-6.337903" /> + <has_text text="0.270297" /> + <has_text text="0.037661" /> + </assert_contents> + </output> + </test> + <!-- test #6 - issue 6 --> + <test> + <param name="dataMatrix_in" value="input_dataMatrix.tsv"/> + <param name="sampleMetadata_in" value="issue6_input_sampleMetadata.tsv"/> + <param name="variableMetadata_in" value="input_variableMetadata.tsv"/> + <param name="tesC" value="none"/> + <param name="facC" value="k._10"/> + <param name="labelFeatures" value="3"/> + <param name="levCSV" value="k_3,k-4"/> + <param name="matchingC" value="none"/> + <output name="contrast_corcov"> + <assert_contents> + <!-- column-labels line --> + <has_text text="featureID" /> + <has_text text="factorLevel1" /> + <has_text text="factorLevel2" /> + <has_text text="correlation" /> + <has_text text="covariance" /> + <has_text text="vip4p" /> + <has_text text="vip4o" /> + <!-- first matched line --> + <has_text text="M349.2383T700" /> + <has_text text="-0.0435663" /> + <has_text text="-1.9068114" /> + <has_text text="0.0304592" /> + <has_text text="0.104748883" /> + </assert_contents> + </output> + </test> + <!-- test #6 - issue 8 --> + <test> + <param name="dataMatrix_in" value="input_dataMatrix.tsv"/> + <param name="sampleMetadata_in" value="issue8_input_sampleMetadata.tsv"/> + <param name="variableMetadata_in" value="input_variableMetadata.tsv"/> + <param name="tesC" value="none"/> + <param name="facC" value="k._10"/> + <param name="labelFeatures" value="3"/> + <param name="levCSV" value="k_3,k-4"/> + <param name="matchingC" value="none"/> + <output name="contrast_corcov"> + <assert_contents> + <!-- column-labels line --> + <has_text text="featureID" /> + <has_text text="factorLevel1" /> + <has_text text="factorLevel2" /> + <has_text text="correlation" /> + <has_text text="covariance" /> + <has_text text="vip4p" /> + <has_text text="vip4o" /> + <!-- k1 rejected by levCSV, leaving only k_3 and k-4 --> + <not_has_text text="k1" /> + <not_has_text text="other" /> + </assert_contents> + </output> + </test> </tests> - <!-- - .. |reg| unicode:: U+000AE .. REGISTERED SIGN - see http://docutils.sourceforge.net/docutils/parsers/rst/include/isonum.txt or /usr/share/docutils/parsers/rst/include/isonum.txt - --> <help><![CDATA[ **Run PLS-DA Contrasts of Univariate Results** @@ -348,20 +521,20 @@ - A column of variableMetadata would be labelled 'cluster_kruskal_sig' and would have values '1' and '0'; when the samples are grouped by 'cluster', '1' means that there is strong evidence against the hypothesis that there is no difference among the intensities for the feature across all sample-groups. - A column of variableMetadata would be labelled 'cluster_kruskal_k1.k2_sig' and would have values '1' and '0', where '1' means that there is significant evidence against the hypothesis that samples from sampleMetadata whose 'cluster' column contains 'k1' or 'k2' have the same intensity for that feature. -The 'PLS-DA Contrasts' tool produces graphics and data for OPLS-DA contrasts of feature-intensities between significantly different pairs of factor-levels. For each factor-level, the tool performs a contrast with all other factor-levels combined and then separately with each other factor-level. +The 'PLS-DA Contrasts' tool produces graphics and data for OPLS-DA contrasts of feature-intensities between significantly different pairs of factor-levels. For each factor-level, the tool performs a contrast with all other factor-levels combined and then separately with each other factor-level. **Along the left-to-right axis, the plots show the supervised projection of the variation explained by the predictor** (i.e., the factor specified when invoking the tool); **the top-to-bottom axis displays the variation that is orthogonal to the predictor level** (i.e., independent of it). -Although this tool can be used in a purely exploratory manner by supplying the variableMetadata file without the columns added by the W4M 'Univariate' tool, **the preferred workflow is to use univariate testing to exclude features that are not significantly different and use OPLS-DA to visualize the differences identified in univariate testing** (Th]]>é<![CDATA[venot *et al.*, 2015); an appropriate exception would be to visualize contrasts of a specific list of metabolites. +Although this tool can be used in a purely exploratory manner by supplying the variableMetadata file without the columns added by the W4M 'Univariate' tool, **the preferred workflow may be to use univariate testing to exclude features that are not significantly different and then to use OPLS-DA to visualize the differences identified in univariate testing** (Th]]>é<![CDATA[venot *et al.*, 2015); an appropriate exception would be to visualize contrasts of a specific list of metabolites. It must be stressed that there may be no *single* definitive computational approach to select features that are reliable biomarkers, especially from a small number of samples or experiments. A few possible choices are: - picking features with maximum loadings along the projection parallel to the predictor (loadp), -- examining extreme values on S-PLOTs (for which covariance is linearly related to loadp), +- examining extreme values on S-PLOTs - examining "variable importance in projection VIP for OPLS-DA" (Galindo-Prieto *et al.* 2014), and - examining a feature's "selectivity ratio" (Rajalahti *et al.*, 2009). -In this spirit, this tool reports the S-PLOT covariance and correlation (Wiklund *op. cit.*) and VIP metrics, and it introduces an informal "salience" metric to flag features that may merit attention without dimensional reduction; future versions may add selectivity ratio. +In this spirit, this tool reports the S-PLOT covariance and correlation (Wiklund *op. cit.*) and VIP metrics, and it introduces an informal "salience" metric to flag features that may merit attention without dimensional reduction; future versions may add selectivity ratio. For a more systematic approach to biomarker identification, please consider the W4M 'biosigner' tool (Rinuardo *et al.* 2016), which applies three different identification metrics to the selection process. @@ -425,9 +598,9 @@ | [IN] Retain only pairwise-significant features + | *Note that when 'Test' is 'none', all features are included in the analysis and this parameter is not settable.* | When **true**, for each contrast of two levels, include only those features which pass the significance threshold for that contrast. Choosing true results in an OPLS-DA model that better reflects and visualizes the difference detected by univariate analysis, with somewhat increased reliability of prediction (as assessed by cross-validation). | When **false**, include all features that pass the significance threshold when testing for difference across all factor-levels. This choice produces a plot that displays more features but is not necessarily more informative. - | *Note that when 'Test' is 'none', all features are included in the analysis and this parameter has no effect.* | [IN] Levels of interest @@ -442,17 +615,15 @@ | Specify the number of features at each of the loading-extremes that should be labelled (with the name of the feature) on the covariance-vs.-correlation plot; specify 'ALL' to label all features; this choice has no effect on the OPLS-DA loadings plot. | -[IN] Label features with extreme loado - | If the previous parameter has limited the the number of features to be labelled at each of the loading-extremes, then the extreme values for both loado and loadp will be labelled when this parameter is set to 'yes'; otherwise (in the default case) only extreme values for loadp will be lableld. The default was chosen to make the plot less cluttered. - | - [OUT] Contrast-detail output PDF | Several plots for each two-projection OPLS-DA analysis: -- (top-left) **correlation-versus-covariance plot** of OPLS-DA results (a work-alike for the S-PLOT, computed using formula in Supplement to Wiklund, *op. cit.*); point-color becomes saturated as the "variable importance in projection to the predictive components" (VIP\ :subscript:`4,p` from Galindo-Prieto *et al.* 2014) ranges from 0.83 and 1.21 (Mehmood *et al.* 2012) -- (bottom-left) **model-overview plot** for the two projections; grey bars are the correlation coefficient for the fitted data; black bars indicate performance in cross-validation tests (Th]]>é<![CDATA[venot, 2017) -- (top-right) OPLS-DA **scores-plot** for the two projections (Th]]>é<![CDATA[venot *et al.*, 2015) -- (bottom-right) OPLS-DA **loadings-plot** for the two projections (*ibid.*) +- (first row, left) **correlation-versus-covariance plot** of OPLS-DA results (a work-alike for the S-PLOT, computed using formula in Supplement to Wiklund, *op. cit.*); point-color becomes saturated as the "variable importance in projection to the predictive components" (VIP\ :subscript:`4,p` from Galindo-Prieto *et al.* 2014) ranges from 0.83 and 1.21 (Mehmood *et al.* 2012), for use to identify features for consideration as biomarkers. +- (second row, left) **model-overview plot** for the two projections; grey bars are the correlation coefficient for the fitted data; black bars indicate performance in cross-validation tests (Th]]>é<![CDATA[venot, 2017) +- (first row, right) OPLS-DA **scores-plot** for the two projections (Th]]>é<![CDATA[venot *et al.*, 2015) +- (second row, right) **correlation-versus-covariance plot** of OPLS-DA results **orthogonal to the predictor** (see section "S-Plot of Orthogonal Component" in Wiklund, *op. cit.*, pp. 120-121; this characterizes features with the greatest variation independent of the predictor). +- (third row, left, when "**predictor C-plot**" is chosen under "Extra plots to include") plot of the covariance or correlation vs. the VIP for the projection *parallel to the predictor*, for use to identify features for consideration as biomarkers. +- (third row, right, when "**orthogonal C-plot**" is chosen under "Extra plots to include") plotof the covariance or correlation vs. the VIP for the projection *orthogonal to the predictor*, for use to identify features varying considerably without regard to the predictor. [OUT] Contrast Correlation-Covarinace data TABULAR | A tab-separated values file of metadata for each feature for each contrast in which it was included. @@ -471,7 +642,7 @@ - **level1Level2Sig** - (Only present when a test other than "none" is chosen) '1' when feature varies significantly across all classes (i.e., not pair-wise); '0' otherwise [OUT] Feature "Salience" data TABULAR - | Metrics for the "salient level" for each feature, i.e., the level at which the feature is more prominent than any other level. This is *not* at all related to the SIMCA OPLS-DA S-PLOT; rather, it is intended as a potential (and unproven) way to identify features that may suggest potential biomarkers without dimensional reduction of data. This is a tab-separated values file having the following columns: + | Metrics for the "salient level" for each feature, i.e., the level at which the feature is more prominent than any other level. This is *not* at all related to the SIMCA OPLS-DA S-PLOT; rather, it is intended as a potential way to discover features for consideration as potential biomarkers without dimensionally reducting the data. This is a tab-separated values file having the following columns: - **featureID** - feature identifier - **salientLevel** - salient level, i.e., for the feature, the class-level having the greatest median intensity @@ -535,15 +706,15 @@ **Input files** - +-------------------+-------------------------------------------------------------------------------------------------------------------+ - | Input File | Download from URL | - +===================+===================================================================================================================+ - | Data matrix | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/input_dataMatrix.tsv | - +-------------------+-------------------------------------------------------------------------------------------------------------------+ - | Sample metadata | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/input_sampleMetadata.tsv | - +-------------------+-------------------------------------------------------------------------------------------------------------------+ - | Variable metadata | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/input_variableMetadata.tsv | - +-------------------+-------------------------------------------------------------------------------------------------------------------+ + +-----------------------------------------------------------------------------------------------------------------------------------------------+ + | Download from URL | + +===============================================================================================================================================+ + | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/input_dataMatrix.tsv | + +-----------------------------------------------------------------------------------------------------------------------------------------------+ + | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/input_sampleMetadata.tsv | + +-----------------------------------------------------------------------------------------------------------------------------------------------+ + | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/input_variableMetadata.tsv | + +-----------------------------------------------------------------------------------------------------------------------------------------------+ **Example 1:** Include in the analysis only features identified as pair-wise significant in the Univariate test. @@ -562,84 +733,80 @@ +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ | Number of features having extreme loadings | ALL | +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Output primary table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_corcov.tsv | + | Output primary table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_corcov.tsv | +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Output salience table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_salience.tsv | + | Output salience table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_salience.tsv | +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Output figures PDF | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_detail.pdf | + | Output figures PDF | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_detail.pdf | +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ **Example 2:** Include in the analysis only features identified as overall-significant in the Univariate test. Note that this even includes these features in contrasts where they were not determined to be pair-wise significant in the Univariate test. Thus, more features are included than in Example 1. - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Input Parameter or Result | Value | - +============================================+========================================================================================================================================+ - | Factor of interest | k10 | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Univariate Significance-Test | kruskal | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Retain only pairwise-significant features | No | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Levels of interest | ``*`` | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Level-name matching | use wild cards for matching level-names | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Number of features having extreme loadings | 5 | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Output primary table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_corcov_all.tsv | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Output salience table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_salience_all.tsv | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Output figures PDF | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_detail_all.pdf | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ + +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+ + | Input Parameter or Result | Value | + +============================================+============================================================================================================================================+ + | Factor of interest | k10 | + +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+ + | Univariate Significance-Test | kruskal | + +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+ + | Retain only pairwise-significant features | No | + +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+ + | Levels of interest | ``*`` | + +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+ + | Level-name matching | use wild cards for matching level-names | + +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+ + | Number of features having extreme loadings | 5 | + +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+ + | Output primary table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_corcov_all.tsv | + +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+ + | Output salience table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_salience_all.tsv | + +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+ + | Output figures PDF | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_detail_all.pdf | + +--------------------------------------------+--------------------------------------------------------------------------------------------------------------------------------------------+ **Example 3:** Include all features in the analysis without regard to Univariate testing. Univariate testing is not even a pre-requisite to using the tool when 'none' is selected for the test. Thus, more features are included than in Example 2. - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Input Parameter or Result | Value | - +============================================+========================================================================================================================================+ - | Factor of interest | k10 | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Univariate Significance-Test | none | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Retain only pairwise-significant features | No | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Levels of interest | k[12],k[3-4] | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Level-name matching | use regular expressions for matching level-names | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Number of features having extreme loadings | 0 | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Output primary table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_corcov_global.tsv | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Output salience table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_salience_global.tsv | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Output figures PDF | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_detail_global.pdf | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ + +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+ + | Input Parameter or Result | Value | + +============================================+==============================================================================================================================================+ + | Factor of interest | k10 | + +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+ + | Univariate Significance-Test | none | + +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+ + | Levels of interest | k[12],k[3-4] | + +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+ + | Level-name matching | use regular expressions for matching level-names | + +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+ + | Number of features having extreme loadings | 0 | + +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+ + | Output primary table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_corcov_global.tsv | + +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+ + | Output salience table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_salience_global.tsv | + +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+ + | Output figures PDF | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_detail_global.pdf | + +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+ **Example 4:** Analysis of a two-level factor (including all features). This suppresses the contrasts of "each factor vs. the aggregate of all the others". - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Input Parameter or Result | Value | - +============================================+========================================================================================================================================+ - | Factor of interest | lohi | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Univariate Significance-Test | none | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Retain only pairwise-significant features | No | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Levels of interest | low,high | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Level-name matching | use regular expressions for matching level-names | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Number of features having extreme loadings | 3 | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Output primary table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_corcov_lohi.tsv | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Output salience table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_salience_lohi.tsv | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ - | Output figures PDF | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/test-data/expected_contrast_detail_lohi.pdf | - +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------+ + +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+ + | Input Parameter or Result | Value | + +============================================+==============================================================================================================================================+ + | Factor of interest | lohi | + +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+ + | Univariate Significance-Test | none | + +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+ + | Levels of interest | low,high | + +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+ + | Level-name matching | use regular expressions for matching level-names | + +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+ + | Number of features having extreme loadings | 3 | + +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+ + | Output primary table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_corcov_lohi.tsv | + +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+ + | Output salience table | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_salience_lohi.tsv | + +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+ + | Output figures PDF | https://raw.githubusercontent.com/HegemanLab/w4mcorcov_galaxy_wrapper/master/tools/w4mcorcov/test-data/expected_contrast_detail_lohi.pdf | + +--------------------------------------------+----------------------------------------------------------------------------------------------------------------------------------------------+ Trademarks @@ -648,36 +815,9 @@ OPLS-DA, SIMCA, and S-PLOT are registered trademarks of the Umetrics company. http://umetrics.com/about-us/trademarks -Release notes -------------- - -0.98.7 - -- bug fix: handle case of a treatment level with only one sample. - -0.98.6 - -- bug fix: set 'crossvalI' param (of R function 'ropls::opls') to the number of samples when the there are fewer than seven samples. - -0.98.5 - -- bug fix: fit feature-labels within clipping region of cor-vs.cov plot -- new feature: optionally (and by default) suppress labels for features with extreme orthogonal loadings - -0.98.3 - -- add support for two-level factors -- add adjusted mz and rt to output tables -- allow explicitly setting the number of features with extreme loadings to be labelled on the correlation vs. covariance plot -- add loadings to corcov table - -0.98.2 - -- first release - - ]]></help> <citations> + <!-- this tool --> <citation type="doi">10.5281/zenodo.1034784</citation> <!-- Galindo_Prieto_2014 Variable influence on projection (VIP) for OPLS --> <citation type="doi">10.1002/cem.2627</citation> @@ -711,6 +851,6 @@ <citation type="doi">10.1021/ac0713510</citation> </citations> <!-- - vim:noet:sw=4:ts=4 + vim:et:sw=4:ts=4 --> </tool>
--- a/w4mcorcov_calc.R Fri Mar 02 08:26:36 2018 -0500 +++ b/w4mcorcov_calc.R Wed Sep 05 19:24:47 2018 -0400 @@ -7,9 +7,23 @@ #### OPLS-DA algoC <- "nipals" -do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_show_loado_labels, x_progress = print, x_env, x_crossval_i) { +do_detail_plot <- function( + x_dataMatrix +, x_predictor +, x_is_match +, x_algorithm +, x_prefix +, x_show_labels +, x_progress = print +, x_env +, x_crossval_i +) { off <- function(x) if (x_show_labels == "0") 0 else x - if ( x_is_match && ncol(x_dataMatrix) > 0 && length(unique(x_predictor))> 1 && x_crossval_i < nrow(x_dataMatrix) ) { + if ( x_is_match + && ncol(x_dataMatrix) > 0 + && length(unique(x_predictor))> 1 + && x_crossval_i < nrow(x_dataMatrix) + ) { my_oplsda <- opls( x = x_dataMatrix , y = x_predictor @@ -19,86 +33,270 @@ , printL = FALSE , plotL = FALSE , crossvalI = x_crossval_i + , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2. ) + # strip out variables having negligible variance + x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE] my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) + # x_progress(strF(x_dataMatrix)) + # x_progress(strF(my_oplsda)) + #x_progress(head(my_oplsda_suppLs_y_levels)) + #x_progress(unique(my_oplsda_suppLs_y_levels)) fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] - my_cor_vs_cov <- cor_vs_cov( - matrix_x = x_dataMatrix - , ropls_x = my_oplsda + do_s_plot <- function( + x_env + , predictor_projection_x = TRUE + , cplot_x = FALSE + , cor_vs_cov_x = NULL ) - with( - my_cor_vs_cov - , { - min_x <- min(covariance) - max_x <- max(covariance) - lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) - covariance <- covariance / lim_x - lim_x <- 1.2 - main_label <- sprintf("%s for level %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2) - main_cex <- min(1.0, 46.0/nchar(main_label)) - # "It is generally accepted that a variable should be selected if vj>1, [27–29], - # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." - # (Mehmood 2012 doi:10.1186/1748-7188-6-27) - vipco <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) - alpha <- 0.1 + 0.4 * vipco - red <- as.numeric(correlation > 0) * vipco - blue <- as.numeric(correlation < 0) * vipco - plus_cor <- correlation - plus_cov <- covariance - cex <- 0.75 - plot( - y = plus_cor - , x = plus_cov - , type="p" - , xlim=c( -lim_x - off(0.2), lim_x + off(0.2) ) - , ylim=c( -1.0 - off(0.2), 1.0 + off(0.2) ) - , xlab = sprintf("relative covariance(feature,t1)") - , ylab = sprintf("correlation(feature,t1)") - , main = main_label - , cex.main = main_cex - , cex = cex - , pch = 16 - , col = rgb(blue = blue, red = red, green = 0, alpha = alpha) - ) - low_x <- -0.7 * lim_x - high_x <- 0.7 * lim_x - text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") - text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") - if ( x_show_labels != "0" ) { - my_loadp <- loadp - my_loado <- loado - names(my_loadp) <- tsv1$featureID - names(my_loado) <- tsv1$featureID - if ( x_show_labels == "ALL" ) { - n_labels <- length(loadp) - } else { - n_labels <- as.numeric(x_show_labels) + { + #print(ls(x_env)) # "cplot_y" etc + #print(str(x_env$cplot_y)) # chr "covariance" + if (cplot_x) { + #print(x_env$cplot_y) # "covariance" + cplot_y_correlation <- (x_env$cplot_y == "correlation") + #print(cplot_y_correlation) # FALSE + } + if (is.null(cor_vs_cov_x)) { + my_cor_vs_cov <- cor_vs_cov( + matrix_x = x_dataMatrix + , ropls_x = my_oplsda + , predictor_projection_x = predictor_projection_x + , x_progress + ) + } else { + my_cor_vs_cov <- cor_vs_cov_x + } + # print("str(my_cor_vs_cov)") + # str(my_cor_vs_cov) + if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { + if (is.null(cor_vs_cov_x)) { + x_progress("No cor_vs_cov data produced") + } + plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") + text(x=1, y=1, labels="too few covariance data") + return(my_cor_vs_cov) + } + with( + my_cor_vs_cov + , { + min_x <- min(covariance, na.rm = TRUE) + max_x <- max(covariance, na.rm = TRUE) + lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) + covariance <- covariance / lim_x + lim_x <- 1.2 + # "It is generally accepted that a variable should be selected if vj>1, [27–29], + # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." + # (Mehmood 2012 doi:10.1186/1748-7188-6-27) + plus_cor <- correlation + plus_cov <- covariance + cex <- 0.65 + which_projection <- if (projection == 1) "t1" else "o1" + which_loading <- if (projection == 1) "parallel" else "orthogonal" + if (projection == 1) { # predictor-projection + vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) + if (!cplot_x) { # S-plot predictor-projection + my_xlab <- "relative covariance(feature,t1)" + my_x <- plus_cov + my_ylab <- "correlation(feature,t1)" + my_y <- plus_cor + } else { # C-plot predictor-projection + my_xlab <- "variable importance in predictor-projection" + my_x <- vip4p + if (cplot_y_correlation) { + my_ylab <- "correlation(feature,t1)" + my_y <- plus_cor + } else { + my_ylab <- "relative covariance(feature,t1)" + my_y <- plus_cov + } + } + if (cplot_x) { + lim_x <- max(my_x, na.rm = TRUE) * 1.1 + my_xlim <- c( 0, lim_x + off(0.2) ) + } else { + my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) + } + my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) + my_load_distal <- loadp + my_load_proximal <- loado + red <- as.numeric(correlation > 0) * vipcp + blue <- as.numeric(correlation < 0) * vipcp + alpha <- 0.1 + 0.4 * vipcp + red[is.na(red)] <- 0 + blue[is.na(blue)] <- 0 + alpha[is.na(alpha)] <- 0 + my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha) + main_label <- sprintf("%s for level %s versus %s" + , x_prefix, fctr_lvl_1, fctr_lvl_2) + } else { # orthogonal projection + vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83))) + if (!cplot_x) { + my_xlab <- "relative covariance(feature,to1)" + my_x <- -plus_cov + } else { + my_xlab <- "variable importance in orthogonal projection" + my_x <- vip4o + } + if (!cplot_x) { # S-plot orthogonal projection + my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) + my_ylab <- "correlation(feature,to1)" + my_y <- plus_cor + } else { # C-plot orthogonal projection + lim_x <- max(my_x, na.rm = TRUE) * 1.1 + my_xlim <- c( 0, lim_x + off(0.2) ) + if (cplot_y_correlation) { + my_ylab <- "correlation(feature,to1)" + my_y <- plus_cor + } else { + my_ylab <- "relative covariance(feature,to1)" + my_y <- plus_cov + } + } + my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) + my_load_distal <- loado + my_load_proximal <- loadp + alpha <- 0.1 + 0.4 * vipco + alpha[is.na(alpha)] <- 0 + my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha) + main_label <- sprintf( + "Features influencing orthogonal projection for %s versus %s" + , fctr_lvl_1, fctr_lvl_2) } - n_labels <- min( n_labels, (1 + length(loadp)) / 2 ) - labels_to_show <- c( - names(head(sort(my_loadp),n = n_labels)) - , names(tail(sort(my_loadp),n = n_labels)) + main_cex <- min(1.0, 46.0/nchar(main_label)) + my_feature_label_slant <- -30 # slant feature labels 30 degrees downward + plot( + y = my_y + , x = my_x + , type = "p" + , xlim = my_xlim + , ylim = my_ylim + , xlab = my_xlab + , ylab = my_ylab + , main = main_label + , cex.main = main_cex + , cex = cex + , pch = 16 + , col = my_col ) - if ( x_show_loado_labels ) { - labels_to_show <- c( - labels_to_show - , names(head(sort(my_loado),n = n_labels)) - , names(tail(sort(my_loado),n = n_labels)) - ) + low_x <- -0.7 * lim_x + high_x <- 0.7 * lim_x + if (projection == 1 && !cplot_x) { + text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") + text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") } - labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" )) - text( - y = plus_cor - 0.013 - , x = plus_cov + 0.020 - , cex = 0.4 - , labels = labels - , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # rgb(blue = blue, red = red, green = 0, alpha = 0.2 + 0.8 * alpha) - , srt = -30 # slant 30 degrees downward - , adj = 0 # left-justified - ) + if ( x_show_labels != "0" ) { + names(my_load_distal) <- tsv1$featureID + names(my_load_proximal) <- tsv1$featureID + if ( x_show_labels == "ALL" ) { + n_labels <- length(my_load_distal) + } else { + n_labels <- as.numeric(x_show_labels) + } + n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 ) + labels_to_show <- c( + names(head(sort(my_load_distal),n = n_labels)) + , names(tail(sort(my_load_distal),n = n_labels)) + ) + labels <- unname( + sapply( + X = tsv1$featureID + , FUN = function(x) if( x %in% labels_to_show ) x else "" + ) + ) + x_text_offset <- 0.024 + y_text_off <- 0.017 + if (!cplot_x) { # S-plot + y_text_offset <- if (projection == 1) -y_text_off else y_text_off + } else { # C-plot + y_text_offset <- + sapply( + X = (my_y > 0) + , FUN = function(x) { if (x) y_text_off else -y_text_off } + ) + } + label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { + # print("str(x_arg)") + # print(str(x_arg)) + # print("str(y_arg)") + # print(str(y_arg)) + # print("str(labels_arg)") + # print(str(labels_arg)) + if (length(labels_arg) > 0) { + unique_slant <- unique(slant_arg) + if (length(unique_slant) == 1) { + text( + y = y_arg + , x = x_arg + x_text_offset + , cex = 0.4 + , labels = labels_arg + , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels + , srt = slant_arg + , adj = 0 # left-justified + ) + } else { + for (slant in unique_slant) { + text( + y = y_arg[slant_arg == slant] + , x = x_arg[slant_arg == slant] + x_text_offset + , cex = 0.4 + , labels = labels_arg[slant_arg == slant] + , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels + , srt = slant + , adj = 0 # left-justified + ) + } + } + } + } + if (!cplot_x) { + my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant + } else { + my_slant <- sapply( + X = (my_y > 0) + , FUN = function(x) if (x) 2 else -2 + ) * my_feature_label_slant + } + if (length(my_x) > 1) { + label_features( + x_arg = my_x [my_x > 0] + , y_arg = my_y [my_x > 0] - y_text_offset + , labels_arg = labels[my_x > 0] + , slant_arg = (if (!cplot_x) -my_slant else (my_slant)) + ) + if (!cplot_x) { + label_features( + x_arg = my_x [my_x < 0] + , y_arg = my_y [my_x < 0] + y_text_offset + , labels_arg = labels[my_x < 0] + , slant_arg = my_slant + ) + } + } else { + if (!cplot_x) { + my_slant <- (if (my_x > 1) -1 else 1) * my_slant + my_y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset + } else { + my_slant <- (if (my_y > 1) -1 else 1) * my_slant + my_y_arg = my_y + (if (my_y > 1) -1 else 1) * y_text_offset + } + label_features( + x_arg = my_x + , y_arg = my_y_arg + , labels_arg = labels + , slant_arg = my_slant + ) + } + } } - } + ) + return (my_cor_vs_cov) + } + my_cor_vs_cov <- do_s_plot( + x_env = x_env + , predictor_projection_x = TRUE + , cplot_x = FALSE ) typeVc <- c("correlation", # 1 "outlier", # 2 @@ -118,36 +316,93 @@ } else { my_typevc <- c("(dummy)","overview","(dummy)") } + my_ortho_cor_vs_cov_exists <- FALSE for (my_type in my_typevc) { if (my_type %in% typeVc) { - # print(sprintf("plotting type %s", my_type)) tryCatch({ - plot( - x = my_oplsda - , typeVc = my_type - , parCexN = 0.4 - , parDevNewL = FALSE - , parLayL = TRUE - , parEllipsesL = TRUE - ) + if ( my_type != "x-loading" ) { + plot( + x = my_oplsda + , typeVc = my_type + , parCexN = 0.4 + , parDevNewL = FALSE + , parLayL = TRUE + , parEllipsesL = TRUE + ) + if (my_type == "overview") { + sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2) + title(sub = sub_label) + } + } else { + my_ortho_cor_vs_cov <- do_s_plot( + x_env = x_env + , predictor_projection_x = FALSE + , cplot_x = FALSE + ) + my_ortho_cor_vs_cov_exists <- TRUE + } }, error = function(e) { - x_progress(sprintf("factor level %s or %s may have only one sample", fctr_lvl_1, fctr_lvl_2)) + x_progress( + sprintf( + "factor level %s or %s may have only one sample - %s" + , fctr_lvl_1 + , fctr_lvl_2 + , e$message + ) + ) }) } else { - # print("plotting dummy graph") plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") text(x=1, y=1, labels="no orthogonal projection is possible") } } + cplot_p <- x_env$cplot_p + cplot_o <- x_env$cplot_o + if (cplot_p || cplot_o) { + if (cplot_p) { + do_s_plot( + x_env = x_env + , predictor_projection_x = TRUE + , cplot_x = TRUE + , cor_vs_cov_x = my_cor_vs_cov + ) + did_plots <- 1 + } else { + did_plots <- 0 + } + if (cplot_o) { + if (my_ortho_cor_vs_cov_exists) { + do_s_plot( + x_env = x_env + , predictor_projection_x = FALSE + , cplot_x = TRUE + , cor_vs_cov_x = my_ortho_cor_vs_cov + ) + } else { + plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") + text(x=1, y=1, labels="no orthogonal projection is possible") + } + did_plots <- 1 + did_plots + } + if (did_plots == 1) { + plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n", fg = "white") + } + } return (my_cor_vs_cov) } else { - # x_progress(sprintf("x_is_match = %s, ncol(x_dataMatrix) = %d, length(unique(x_predictor)) = %d",x_is_match, ncol(x_dataMatrix), length(unique(x_predictor)))) return (NULL) } } # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510 -corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = function(t){}, salience_tsv_action = function(t){}) { +corcov_calc <- function( + calc_env +, failure_action = stop +, progress_action = function(x) { } +, corcov_tsv_action = function(t) { } +, salience_tsv_action = function(t) { } +, extra_plots = c() +) { if ( ! is.environment(calc_env) ) { failure_action("corcov_calc: fatal error - 'calc_env' is not an environment") return ( FALSE ) @@ -174,22 +429,23 @@ # matchingC is one of { "none", "wildcard", "regex" } matchingC <- calc_env$matchingC labelFeatures <- calc_env$labelFeatures - labelOrthoFeatures <- calc_env$labelOrthoFeatures # arg/env checking if (!(facC %in% names(smpl_metadata))) { - failure_action(sprintf("bad parameter! Factor name '%s' not found in sampleMetadata", facC)) + failure_action( + sprintf("bad parameter! Factor name '%s' not found in sampleMetadata" + , facC)) return ( FALSE ) } mz <- vrbl_metadata$mz names(mz) <- vrbl_metadata$variableMetadata mz_lookup <- function(feature) unname(mz[feature]) - + rt <- vrbl_metadata$rt names(rt) <- vrbl_metadata$variableMetadata rt_lookup <- function(feature) unname(rt[feature]) - + # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv) salience_df <- calc_env$salience_df <- w4msalience( data_matrix = data_matrix @@ -236,12 +492,9 @@ } # transpose matrix because ropls matrix is the transpose of XCMS matrix + tdm <- t(data_matrix) # Wiklund_2008 centers and pareto-scales data before OPLS-DA S-plot - # center - cdm <- center_colmeans(t(data_matrix)) - # pareto-scale - my_scale <- sqrt(apply(cdm, 2, sd, na.rm=TRUE)) - scdm <- sweep(cdm, 2, my_scale, "/") + # However, data should be neither centered nor pareto scaled here because ropls::opls does that; this fixes issue #2. # pattern to match column names like k10_kruskal_k4.k3_sig col_pattern <- sprintf('^%s_%s_(.*)[.](.*)_sig$', facC, tesC) @@ -265,7 +518,10 @@ # for each column name, extract the parts of the name matched by 'col_pattern', if any the_colnames <- colnames(vrbl_metadata) if ( ! Reduce( f = "||", x = grepl(tesC, the_colnames) ) ) { - failure_action(sprintf("bad parameter! variableMetadata must contain results of W4M Univariate test '%s'.", tesC)) + failure_action( + sprintf( + "bad parameter! variableMetadata must contain results of W4M Univariate test '%s'." + , tesC)) return ( FALSE ) } col_matches <- lapply( @@ -292,34 +548,52 @@ } } level_union <- unique(sort(level_union)) - overall_significant <- 1 == ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE ) + overall_significant <- 1 == ( + if (intersample_sig_col %in% colnames(vrbl_metadata)) { + vrbl_metadata[,intersample_sig_col] + } else { + 1 + } + ) if ( length(level_union) > 2 ) { chosen_samples <- smpl_metadata_facC %in% level_union chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) col_selector <- vrbl_metadata_names[ overall_significant ] - my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] + my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] plot_action <- function(fctr_lvl_1, fctr_lvl_2) { - progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) - predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 ) + progress_action( + sprintf("calculating/plotting contrast of %s vs. %s" + , fctr_lvl_1, fctr_lvl_2) + ) + predictor <- sapply( + X = chosen_facC + , FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 + ) my_cor_cov <- do_detail_plot( x_dataMatrix = my_matrix , x_predictor = predictor - , x_is_match = is_match + , x_is_match = TRUE , x_algorithm = algoC - , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" + , x_prefix = if (pairSigFeatOnly) { + "Significantly contrasting features" + } else { + "Significant features" + } , x_show_labels = labelFeatures - , x_show_loado_labels = labelOrthoFeatures , x_progress = progress_action , x_crossval_i = min(7, length(chosen_samples)) , x_env = calc_env ) if ( is.null(my_cor_cov) ) { - progress_action("NOTHING TO PLOT.") + progress_action("NOTHING TO PLOT") } else { my_tsv <- my_cor_cov$tsv1 my_tsv$mz <- mz_lookup(my_tsv$featureID) my_tsv$rt <- rt_lookup(my_tsv$featureID) - my_tsv["level1Level2Sig"] <- vrbl_metadata[ match(my_tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] + my_tsv["level1Level2Sig"] <- vrbl_metadata[ + match(my_tsv$featureID, vrbl_metadata_names) + , vrbl_metadata_col + ] tsv <<- my_tsv corcov_tsv_action(tsv) did_plot <<- TRUE @@ -348,50 +622,79 @@ fctr_lvl_2 <- col_match[3] # ^^ # Factor-level 2 # only process this column if both factors are members of lvlCSV is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) - progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) - # TODO delete next line displaying currently-processed column - # cat(sprintf("%s %s %s %s\n", vrbl_metadata_col, fctr_lvl_1, fctr_lvl_2, is_match)) - # choose only samples with one of the two factors for this column - chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) - predictor <- smpl_metadata_facC[chosen_samples] - # extract only the significantly-varying features and the chosen samples - fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE ) - col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ] - my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] - my_cor_cov <- do_detail_plot( - x_dataMatrix = my_matrix - , x_predictor = predictor - , x_is_match = is_match - , x_algorithm = algoC - , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" - , x_show_labels = labelFeatures - , x_show_loado_labels = labelOrthoFeatures - , x_progress = progress_action - , x_crossval_i = min(7, length(chosen_samples)) - , x_env = calc_env - ) - if ( is.null(my_cor_cov) ) { - progress_action("NOTHING TO PLOT.") + if (is_match) { + progress_action( + sprintf("calculating/plotting contrast of %s vs. %s." + , fctr_lvl_1, fctr_lvl_2 + ) + ) + # choose only samples with one of the two factors for this column + chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) + predictor <- smpl_metadata_facC[chosen_samples] + # extract only the significantly-varying features and the chosen samples + fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * + ( if (intersample_sig_col %in% colnames(vrbl_metadata)) { + vrbl_metadata[,intersample_sig_col] + } else { + 1 + } + ) + col_selector <- vrbl_metadata_names[ + if ( pairSigFeatOnly ) fully_significant else overall_significant + ] + my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] + my_cor_cov <- do_detail_plot( + x_dataMatrix = my_matrix + , x_predictor = predictor + , x_is_match = is_match + , x_algorithm = algoC + , x_prefix = if (pairSigFeatOnly) { + "Significantly contrasting features" + } else { + "Significant features" + } + , x_show_labels = labelFeatures + , x_progress = progress_action + , x_crossval_i = min(7, length(chosen_samples)) + , x_env = calc_env + ) + if ( is.null(my_cor_cov) ) { + progress_action("NOTHING TO PLOT.") + } else { + tsv <- my_cor_cov$tsv1 + tsv$mz <- mz_lookup(tsv$featureID) + tsv$rt <- rt_lookup(tsv$featureID) + tsv["level1Level2Sig"] <- vrbl_metadata[ + match(tsv$featureID, vrbl_metadata_names) + , vrbl_metadata_col + ] + corcov_tsv_action(tsv) + did_plot <- TRUE + } } else { - tsv <- my_cor_cov$tsv1 - tsv$mz <- mz_lookup(tsv$featureID) - tsv$rt <- rt_lookup(tsv$featureID) - tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] - corcov_tsv_action(tsv) - did_plot <- TRUE + progress_action( + sprintf("skipping contrast of %s vs. %s." + , fctr_lvl_1, fctr_lvl_2 + ) + ) } } } } } else { # tesC == "none" + # find all the levels for factor facC in sampleMetadata level_union <- unique(sort(smpl_metadata_facC)) + # identify the selected levels for factor facC from sampleMetadata + level_include <- sapply(X = level_union, FUN = isLevelSelected) + # discard the non-selected levels for factor facC + level_union <- level_union[level_include] if ( length(level_union) > 1 ) { if ( length(level_union) > 2 ) { ## pass 1 - contrast each selected level with all other levels combined into one "super-level" ## completed <- c() lapply( X = level_union - , FUN = function(x) { + , FUN = function(x) { fctr_lvl_1 <- x[1] fctr_lvl_2 <- { if ( fctr_lvl_1 %in% completed ) @@ -402,39 +705,50 @@ } chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) fctr_lvl_2 <- "other" - # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) ) - progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) if (length(unique(chosen_samples)) < 1) { - progress_action("NOTHING TO PLOT...") + progress_action( + sprintf("Skipping contrast of %s vs. %s; there are no chosen samples." + , fctr_lvl_1, fctr_lvl_2) + ) } else { chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) - predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 ) - my_matrix <- scdm[ chosen_samples, , drop = FALSE ] + predictor <- sapply( + X = chosen_facC + , FUN = function(fac) { + if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 + } + ) + my_matrix <- tdm[ chosen_samples, , drop = FALSE ] # only process this column if both factors are members of lvlCSV is_match <- isLevelSelected(fctr_lvl_1) - my_cor_cov <- do_detail_plot( - x_dataMatrix = my_matrix - , x_predictor = predictor - , x_is_match = is_match - , x_algorithm = algoC - , x_prefix = "Features" - , x_show_labels = labelFeatures - , x_show_loado_labels = labelOrthoFeatures - , x_progress = progress_action - , x_crossval_i = min(7, length(chosen_samples)) - , x_env = calc_env - ) - if ( is.null(my_cor_cov) ) { - progress_action("NOTHING TO PLOT") + if (is_match) { + progress_action( + sprintf("Calculating/plotting contrast of %s vs. %s" + , fctr_lvl_1, fctr_lvl_2) + ) + my_cor_cov <- do_detail_plot( + x_dataMatrix = my_matrix + , x_predictor = predictor + , x_is_match = is_match + , x_algorithm = algoC + , x_prefix = "Features" + , x_show_labels = labelFeatures + , x_progress = progress_action + , x_crossval_i = min(7, length(chosen_samples)) + , x_env = calc_env + ) + if ( is.null(my_cor_cov) ) { + progress_action("NOTHING TO PLOT...") + } else { + tsv <- my_cor_cov$tsv1 + tsv$mz <- mz_lookup(tsv$featureID) + tsv$rt <- rt_lookup(tsv$featureID) + corcov_tsv_action(tsv) + did_plot <<- TRUE + } } else { - tsv <- my_cor_cov$tsv1 - tsv$mz <- mz_lookup(tsv$featureID) - tsv$rt <- rt_lookup(tsv$featureID) - corcov_tsv_action(tsv) - did_plot <<- TRUE } } - #print("baz") "dummy" # need to return a value; otherwise combn fails with an error } ) @@ -444,52 +758,67 @@ utils::combn( x = level_union , m = 2 - , FUN = function(x) { + , FUN = function(x) { fctr_lvl_1 <- x[1] fctr_lvl_2 <- x[2] chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) - # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) ) - progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) if (length(unique(chosen_samples)) < 1) { - progress_action("NOTHING TO PLOT...") + progress_action( + sprintf("Skipping contrast of %s vs. %s. - There are no chosen samples." + , fctr_lvl_1, fctr_lvl_2 + ) + ) } else { chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) predictor <- chosen_facC - my_matrix <- scdm[ chosen_samples, , drop = FALSE ] + my_matrix <- tdm[ chosen_samples, , drop = FALSE ] # only process this column if both factors are members of lvlCSV is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) - my_cor_cov <- do_detail_plot( - x_dataMatrix = my_matrix - , x_predictor = predictor - , x_is_match = is_match - , x_algorithm = algoC - , x_prefix = "Features" - , x_show_labels = labelFeatures - , x_show_loado_labels = labelOrthoFeatures - , x_progress = progress_action - , x_crossval_i = min(7, length(chosen_samples)) - , x_env = calc_env - ) - if ( is.null(my_cor_cov) ) { - progress_action("NOTHING TO PLOT") + if (is_match) { + progress_action( + sprintf("Calculating/plotting contrast of %s vs. %s." + , fctr_lvl_1, fctr_lvl_2) + ) + my_cor_cov <- do_detail_plot( + x_dataMatrix = my_matrix + , x_predictor = predictor + , x_is_match = is_match + , x_algorithm = algoC + , x_prefix = "Features" + , x_show_labels = labelFeatures + , x_progress = progress_action + , x_crossval_i = min(7, length(chosen_samples)) + , x_env = calc_env + ) + if ( is.null(my_cor_cov) ) { + progress_action("NOTHING TO PLOT.....") + } else { + tsv <- my_cor_cov$tsv1 + tsv$mz <- mz_lookup(tsv$featureID) + tsv$rt <- rt_lookup(tsv$featureID) + corcov_tsv_action(tsv) + did_plot <<- TRUE + } } else { - tsv <- my_cor_cov$tsv1 - tsv$mz <- mz_lookup(tsv$featureID) - tsv$rt <- rt_lookup(tsv$featureID) - corcov_tsv_action(tsv) - did_plot <<- TRUE + progress_action( + sprintf("Skipping contrast of %s vs. %s." + , fctr_lvl_1, fctr_lvl_2 + ) + ) } } - #print("baz") "dummy" # need to return a value; otherwise combn fails with an error } ) } else { - progress_action("NOTHING TO PLOT....") + progress_action("NOTHING TO PLOT......") } } if (!did_plot) { - failure_action(sprintf("bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'", facC, originalLevCSV)) + failure_action( + sprintf( + "bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'" + , facC, originalLevCSV)) return ( FALSE ) } return ( TRUE ) @@ -500,27 +829,67 @@ # Wiklund_2008 doi:10.1021/ac0713510 # Galindo_Prieto_2014 doi:10.1002/cem.2627 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R -cor_vs_cov <- function(matrix_x, ropls_x) { +cor_vs_cov <- function( + matrix_x +, ropls_x +, predictor_projection_x = TRUE +, x_progress = print +) { + tryCatch({ + return( + cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress) + ) + }, error = function(e) { + x_progress( + sprintf( + "cor_vs_cov fatal error - %s" + , as.character(e) # e$message + ) + ) + return ( NULL ) + }) +} + +cor_vs_cov_try <- function( + matrix_x +, ropls_x +, predictor_projection_x = TRUE +, x_progress = print +) { x_class <- class(ropls_x) - if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) - stop( "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class ", as.character(x_class) ) + if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) + stop( + paste( + "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class " + , as.character(x_class) + ) + ) } result <- list() + result$projection <- projection <- if (predictor_projection_x) 1 else 2 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS if ( ropls_x@suppLs$algoC == "nipals") { # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x])) - score_matrix <- ropls_x@scoreMN + if (predictor_projection_x) + score_matrix <- ropls_x@scoreMN + else + score_matrix <- ropls_x@orthoScoreMN score_matrix_transposed <- t(score_matrix) score_matrix_magnitude <- mag(score_matrix) - result$covariance <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) - result$correlation <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi ) + result$covariance <- + score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) + result$correlation <- + score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi ) } else { # WARNING - untested code - I don't have test data to exercise this branch # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F - score_matrix <- ropls_x@scoreMN + if (predictor_projection_x) + score_matrix <- ropls_x@scoreMN + else + score_matrix <- ropls_x@orthoScoreMN score_matrix_transposed <- t(score_matrix) cov_divisor <- nrow(matrix_x) - 1 result$covariance <- sapply( @@ -540,8 +909,8 @@ } ) } - result$correlation <- result$correlation[1,,drop = TRUE] - result$covariance <- result$covariance[1,,drop = TRUE] + result$correlation <- result$correlation[ 1, , drop = TRUE ] + result$covariance <- result$covariance [ 1, , drop = TRUE ] # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) @@ -556,15 +925,42 @@ feature_count <- length(ropls_x@vipVn) result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count) result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) - # print(sprintf("sd(covariance) = %f; sd(correlation) = %f", sd(result$covariance), sd(result$correlation))) superresult <- list() if (length(result$vip4o) == 0) result$vip4o <- NA - greaterLevel <- sapply( X = result$correlation, FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 ) - superresult$tsv1 <- data.frame( - featureID = names(ropls_x@vipVn) + greaterLevel <- sapply( + X = result$correlation + , FUN = function(my_corr) + tryCatch({ + if ( is.nan( my_corr ) ) { + print("my_corr is NaN") + NA + } else { + if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 + } + }, error = function(e) { + x_progress( + sprintf( + "cor_vs_cov -> sapply: error - substituting NA - %s" + , as.character(e) + ) + ) + NA + }) + ) + + # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 + featureID <- names(ropls_x@vipVn) + greaterLevel <- greaterLevel[featureID] + result$correlation <- result$correlation[featureID] + result$covariance <- result$covariance[featureID] + # end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 + + tsv1 <- data.frame( + featureID = featureID , factorLevel1 = result$level1 , factorLevel2 = result$level2 , greaterLevel = greaterLevel + , projection = result$projection , correlation = result$correlation , covariance = result$covariance , vip4p = result$vip4p @@ -573,7 +969,11 @@ , loado = result$loado , row.names = NULL ) - rownames(superresult$tsv1) <- superresult$tsv1$featureID + tsv1 <- tsv1[!is.na(tsv1$correlation),] + tsv1 <- tsv1[!is.na(tsv1$covariance),] + superresult$tsv1 <- tsv1 + rownames(superresult$tsv1) <- tsv1$featureID + superresult$projection <- result$projection superresult$covariance <- result$covariance superresult$correlation <- result$correlation superresult$vip4p <- result$vip4p @@ -581,12 +981,11 @@ superresult$loadp <- result$loadp superresult$loado <- result$loado superresult$details <- result - # #print(superresult$tsv1) result$superresult <- superresult # Include thise in case future consumers of this routine want to use it in currently unanticipated ways - result$oplsda <- ropls_x + result$oplsda <- ropls_x result$predictor <- ropls_x@suppLs$y # in case future consumers of this routine want to use it in currently unanticipated ways return (superresult) } - +# vim: sw=2 ts=2 et :
--- a/w4mcorcov_input.R Fri Mar 02 08:26:36 2018 -0500 +++ b/w4mcorcov_input.R Wed Sep 05 19:24:47 2018 -0400 @@ -1,6 +1,6 @@ # read_data_frame - read a w4m data frame, with error handling # e.g., data_matrix_input_env <- read_data_frame(dataMatrix_in, "data matrix input") -read_data_frame <- function(file_path, kind_string, failure_action = failure_action) { +read_data_frame <- function(file_path, kind_string, rdf_failure_action = failure_action) { my.env <- new.env() my.env$success <- FALSE my.env$msg <- sprintf("no message reading %s", kind_string) @@ -14,7 +14,7 @@ } ) if (!my.env$success) { - failure_action(my.env$msg) + rdf_failure_action(my.env$msg) return ( FALSE ) } return (my.env) @@ -36,7 +36,7 @@ my_failure_action( sprintf("bad parameter xcms_data_type '%s'", xcms_data_type) ) return ( FALSE ) } - if ( is.character(xcms_data_in) ){ + if ( is.character(xcms_data_in) ) { # case: xcms_data_in is a path to a file xcms_data_input_env <- read_data_frame( xcms_data_in, sprintf("%s input", xcms_data_type) ) if (!xcms_data_input_env$success) { @@ -44,30 +44,6 @@ return ( FALSE ) } return ( xcms_data_input_env$data ) - # commenting out pasted code that is not tested here - # } else if ( is.data.frame(xcms_data_in) || is.matrix(xcms_data_in) ) { - # # case: xcms_data_in is a data.frame or matrix - # return(xcms_data_in) - # } else if ( is.list(xcms_data_in) || is.environment(xcms_data_in) ) { - # # NOTE WELL: is.list succeeds for data.frame, so the is.data.frame test must appear before the is.list test - # # case: xcms_data_in is a list - # if ( ! exists(xcms_data_type, where = xcms_data_in) ) { - # my_failure_action(sprintf("%s xcms_data_in is missing member '%s'"), ifelse(is.environment(xcms_data_in),"environment","list"), xcms_data_type) - # return ( FALSE ) - # } - # prospect <- getElement(name = xcms_data_type, object = xcms_data_in) - # if ( ! is.data.frame(prospect) && ! is.matrix(prospect) ) { - # utils::str("list - str(prospect)") - # utils::str(prospect) - # if ( is.list(xcms_data_in) ) { - # my_failure_action(sprintf("the first member of xcms_data_in['%s'] is neither a data.frame nor a matrix but is a %s", xcms_data_type, typeof(prospect))) - # } else { - # my_failure_action(sprintf("the first member of xcms_data_in$%s is neither a data.frame nor a matrix but is a %s", xcms_data_type, typeof(prospect))) - # } - # return ( prospect ) - # } - # # stop("stopping here for a snapshot") - # return ( prospect ) } else { # case: xcms_data_in is invalid my_failure_action( sprintf("xcms_data_in has unexpected type %s", typeof(xcms_data_in)) ) @@ -189,6 +165,10 @@ data_matrix <- as.matrix(data_matrix) } + # Omit any feature not found in variableMetadata and any sample not found in sampleMetadata + # For something more elaborate, see https://github.com/HegemanLab/w4mclassfilter + data_matrix <- data_matrix[rownames(data_matrix) %in% rownames(vrbl_metadata),colnames(data_matrix) %in% rownames(smpl_metadata)] + input_env$data_matrix <- data_matrix # ... } else {
--- a/w4mcorcov_lib.R Fri Mar 02 08:26:36 2018 -0500 +++ b/w4mcorcov_lib.R Wed Sep 05 19:24:47 2018 -0400 @@ -1,3 +1,3 @@ suppressMessages(library(batch)) -# suppressMessages(library(foreach)) suppressMessages(library(ropls)) +suppressMessages(library(methods))
--- a/w4mcorcov_output.R Fri Mar 02 08:26:36 2018 -0500 +++ b/w4mcorcov_output.R Wed Sep 05 19:24:47 2018 -0400 @@ -0,0 +1,77 @@ + +# turn off all plotting devices +dev.off.all <- function() { + while (!is.null(dev.list())) { dev.off() } +} + +# capture plot and write to PDF; then close any devices opened in the process +plot2pdf <- function( + file.name +, plot.function +, width = 12 +, height = 12 +) { + # capture plot and write to PDF + cur.dev <- dev.list() + filename <- file.name + pdf(file = filename, width = width, height = height) + plot.function() + # close any devices opened in the process + dev.off() + if (is.null(cur.dev)) { + dev.off.all() + } else { + while ( length(dev.list()) > length(cur.dev) ) { dev.off() } + } +} + +# print and capture plot and write to PDF; then close any devices opened in the process +# This is needed for ggplot which does not print the plot when invoked within a function. +print2pdf <- function( + file.name +, plot.function +, width = 12 +, height = 12 +) { + plot2pdf( + file.name = file.name + , width = width + , height = height + , plot.function = function() { + print(plot.function()) + } + ) +} + +iso8601.znow <- function() +{ + strftime(as.POSIXlt(Sys.time(), "UTC"), "%Y-%m-%dT%H:%M:%SZ") +} + +# pdf.name <- function(name) +# { +# paste0(name, "_", iso8601.filename.fragment(), ".pdf") +# } +# +# tsv.name <- function(name) +# { +# paste0(name, "_", iso8601.filename.fragment(), ".tsv") +# } +# + +tsv_action_factory <- function(file, colnames, append) { + return ( + function(tsv) { + write.table( + x = tsv + , file = file + , sep = "\t" + , quote = FALSE + , row.names = FALSE + , col.names = colnames + , append = append + ) + } + ) +} +
--- a/w4mcorcov_salience.R Fri Mar 02 08:26:36 2018 -0500 +++ b/w4mcorcov_salience.R Wed Sep 05 19:24:47 2018 -0400 @@ -61,8 +61,8 @@ ) rcvOfFeatureBySampleClassLevel[is.nan(rcvOfFeatureBySampleClassLevel)] <- max(9999,max(rcvOfFeatureBySampleClassLevel, na.rm = TRUE)) - # "For each feature, 'select max(median_feature_intensity) from feature'." - maxApplyMedianOfFeatureBySampleClassLevel <- sapply( + # "For each feature, 'select max(max_feature_intensity) from feature'." + maxApplyMaxOfFeatureBySampleClassLevel <- sapply( X = 1:n_features , FUN = function(i) { match( @@ -84,19 +84,19 @@ # the feature name feature = features # the name (or factor-level) of the class-level with the highest median intensity for the feature - , max_level = medianOfFeatureBySampleClassLevel[maxApplyMedianOfFeatureBySampleClassLevel,1] + , max_level = medianOfFeatureBySampleClassLevel[maxApplyMaxOfFeatureBySampleClassLevel,1] # the median intensity for the feature and the level max_level , max_median = sapply( X = 1:n_features , FUN = function(i) { - maxOfFeatureBySampleClassLevel[maxApplyMedianOfFeatureBySampleClassLevel[i], 1 + i] + maxOfFeatureBySampleClassLevel[maxApplyMaxOfFeatureBySampleClassLevel[i], 1 + i] } ) # the coefficient of variation (expressed as a proportion) for the intensity for the feature and the level max_level , max_rcv = sapply( X = 1:n_features , FUN = function(i) { - rcvOfFeatureBySampleClassLevel[maxApplyMedianOfFeatureBySampleClassLevel[i], i] + rcvOfFeatureBySampleClassLevel[maxApplyMaxOfFeatureBySampleClassLevel[i], i] } ) # the mean of the medians of intensity for all class-levels for the feature
--- a/w4mcorcov_util.R Fri Mar 02 08:26:36 2018 -0500 +++ b/w4mcorcov_util.R Wed Sep 05 19:24:47 2018 -0400 @@ -21,68 +21,54 @@ return (retval) } -# turn off all plotting devices -dev.off.all <- function() { - while (!is.null(dev.list())) { dev.off() } +errorSink <- function(which_function, ...) { + var_args <- "..." + tryCatch( + var_args <<- (deparse(..., width.cutoff = 60)) + , error = function(e) {print(e$message)} + ) + if (var_args == "...") + return + # format error for logging + format_error <- function(e) { + sprintf( + "Error\n{ message: %s\n, arguments: %s\n}\n" + , e$message + , Reduce(f = paste, x = var_args) + ) + } + format_warning <- function(e) { + sprintf( + "Warning\n{ message: %s\n, arguments: %s\n}\n" + , e$message + , Reduce(f = paste, x = var_args) + ) + } + sink_number <- sink.number() + sink(stderr()) + tryCatch( + var_args <- (deparse(..., width.cutoff = 60)) + , expr = { + retval <- which_function(...) + } + , error = function(e) cat(format_error(e), file = stderr()) + , warning = function(w) cat(format_warning(w), file = stderr()) + ) + while (sink.number() > sink_number) { + sink() + } } - -# capture plot and write to PDF; then close any devices opened in the process -plot2pdf <- function( - file.name -, plot.function -, width = 12 -, height = 12 -) { - # capture plot and write to PDF - cur.dev <- dev.list() - filename <- file.name - pdf(file = filename, width = width, height = height) - plot.function() - # close any devices opened in the process - dev.off() - if (is.null(cur.dev)) { - dev.off.all() - } else { - while ( length(dev.list()) > length(cur.dev) ) { dev.off() } - } +errorPrint <- function(...) { + errorSink(which_function = print, ...) +} +errorCat <- function(...) { + errorSink(which_function = cat, ..., "\n") } -# print and capture plot and write to PDF; then close any devices opened in the process -# This is needed for ggplot which does not print the plot when invoked within a function. -print2pdf <- function( - file.name -, plot.function -, width = 12 -, height = 12 -) { - plot2pdf( - file.name = file.name - , width = width - , height = height - , plot.function = function() { - print(plot.function()) - } - ) -} -iso8601.znow <- function() -{ - strftime(as.POSIXlt(Sys.time(), "UTC"), "%Y-%m-%dT%H:%M:%SZ") -} - -# pdf.name <- function(name) -# { -# paste0(name, "_", iso8601.filename.fragment(), ".pdf") -# } -# -# tsv.name <- function(name) -# { -# paste0(name, "_", iso8601.filename.fragment(), ".tsv") -# } -# -# # pseudo-inverse - computational inverse non-square matrix a +# # pseudo-inverse - computational inverse of non-square matrix a # p.i <- function(a) { # solve(t(a) %*% a) %*% t(a) # } - +# vim: sw=2 ts=2 et ai :
--- a/w4mcorcov_wrapper.R Fri Mar 02 08:26:36 2018 -0500 +++ b/w4mcorcov_wrapper.R Wed Sep 05 19:24:47 2018 -0400 @@ -22,18 +22,45 @@ ## subroutines ##---------- -source("w4mcorcov_lib.R") -source("w4mcorcov_util.R") -source("w4mcorcov_input.R") -source("w4mcorcov_salience.R") -source("w4mcorcov_calc.R") -source("w4mcorcov_output.R") +# from: https://github.com/molgenis/molgenis-pipelines/wiki/How-to-source-another_file.R-from-within-your-R-script +LocationOfThisScript = function() # Function LocationOfThisScript returns the location of this .R script (may be needed to source other files in same dir) +{ + this.file = NULL + # This file may be 'sourced' + for (i in -(1:sys.nframe())) { + if (identical(sys.function(i), base::source)) this.file = (normalizePath(sys.frame(i)$ofile)) + } + + if (!is.null(this.file)) return(dirname(this.file)) + + # But it may also be called from the command line + cmd.args = commandArgs(trailingOnly = FALSE) + cmd.args.trailing = commandArgs(trailingOnly = TRUE) + cmd.args = cmd.args[seq.int(from=1, length.out=length(cmd.args) - length(cmd.args.trailing))] + res = gsub("^(?:--file=(.*)|.*)$", "\\1", cmd.args) + + # If multiple --file arguments are given, R uses the last one + res = tail(res[res != ""], 1) + if (0 < length(res)) return(dirname(res)) + + # Both are not the case. Maybe we are in an R GUI? + return(NULL) +} + +script.dir <- LocationOfThisScript() + +source(paste(script.dir, "w4mcorcov_lib.R", sep="/")) +source(paste(script.dir, "w4mcorcov_util.R", sep="/")) +source(paste(script.dir, "w4mcorcov_input.R", sep="/")) +source(paste(script.dir, "w4mcorcov_salience.R", sep="/")) +source(paste(script.dir, "w4mcorcov_calc.R", sep="/")) +source(paste(script.dir, "w4mcorcov_output.R", sep="/")) ## log file ##--------- my_log <- function(x, ...) { cat(paste(iso8601.znow(), " ", x, ..., nl, sep=""))} -my_fatal <- function(x, ...) { +my_fatal <- function(x, ...) { my_log("ERROR: ", x, ...) quit(save = "no", status = 11, runLast = TRUE) } @@ -45,7 +72,12 @@ # MAIN # ######## +errorPrint(sessionInfo()) + argVc <- unlist(parseCommandArgs(evaluate=FALSE)) +errorCat("\n\n---\n\nArguments that were passed to R are as follows:\n") +errorPrint(argVc) + my_env <- new.env() ##------------------------------ @@ -63,7 +95,6 @@ my_env$contrast_detail <- as.character(argVc["contrast_detail"]) my_env$contrast_corcov <- as.character(argVc["contrast_corcov"]) my_env$contrast_salience <- as.character(argVc["contrast_salience"]) -# print(sprintf("contrast_salience: %s", my_env$contrast_salience)) # other parameters @@ -73,7 +104,9 @@ my_env$levCSV <- as.character(argVc["levCSV"]) my_env$matchingC <- as.character(argVc["matchingC"]) my_env$labelFeatures <- as.character(argVc["labelFeatures"]) # number of features to label at each extreme of the loadings or 'ALL' -my_env$labelOrthoFeatures <- as.logical(argVc["labelOrthoFeatures"]) +my_env$cplot_o <- as.logical(argVc["cplot_o"]) # TRUE if orthogonal C-plot is requested +my_env$cplot_p <- as.logical(argVc["cplot_p"]) # TRUE if parallel C-plot is requested +my_env$cplot_y <- as.character(argVc["cplot_y"]) # Choice of covariance/correlation for Y-axis on C-plot label_features <- my_env$labelFeatures labelfeatures_check <- TRUE @@ -93,22 +126,6 @@ quit(save = "no", status = 10, runLast = TRUE) } -tsv_action_factory <- function(file, colnames, append) { - return ( - function(tsv) { - write.table( - x = tsv - , file = file - , sep = "\t" - , quote = FALSE - , row.names = FALSE - , col.names = colnames - , append = append - ) - } - ) -} - corcov_tsv_colnames <- TRUE corcov_tsv_append <- FALSE corcov_tsv_action <- function(tsv) { @@ -146,24 +163,70 @@ # compute and plot the correlation_vs_covariance details plot # The parameter settings here are generally taken from bioconductor ropls::plot.opls source. - marVn <- c(4.6, 4.1, 2.6, 1.6) - old_par <- par( - font = 2 # bold font face - , font.axis = 2 # bold font face for axis - , font.lab = 2 # bold font face for x and y labels - , lwd = 2 # line-width - interpretation is device spcific - , mar = marVn # margins - , pch = 18 # black diamond plot-character, see help for graphics::points - # , mfrow = c(2,2) # two rows by two columns - , pty = "s" # force plots to be square - ) + if ( my_env$cplot_p || my_env$cplot_o ) { + old_par <- par( + font = 2 # bold font face + , font.axis = 2 # bold font face for axis + , font.lab = 2 # bold font face for x and y labels + , lwd = 2 # line-width - interpretation is device spcific + , pch = 18 # black diamond plot-character, see help for graphics::points + , pty = "m" # do not force plots to be square + , no.readonly = TRUE # only save writable parameters + ) + pdf_height <- 12 + pdf_width <- 8 + my_layout <- function() { + # lay out 2 columns by 3 rows with extra width at the margin of individual plots + layout( + matrix( + # blank row plot 1 & 2 blank row plot 3 & 4 blank row plot 5 & 6 blank row + c(0,0,0,0,0, 0,1,0,2,0, 0,0,0,0,0, 0,3,0,4,0, 0,0,0,0,0, 0,5,0,6,0, 0,0,0,0,0) + , nrow = 7 + , ncol = 5 + , byrow = TRUE + ) + # slim columns 1, 3, and 5 + , widths = c(0.1, 0.9, 0.1, 0.9, 0.1) + # slim rows 1, 3, 5, and 7 + , heights = c(0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1) + ) + } + } else { + old_par <- par( + font = 2 # bold font face + , font.axis = 2 # bold font face for axis + , font.lab = 2 # bold font face for x and y labels + , lwd = 2 # line-width - interpretation is device spcific + , pch = 18 # black diamond plot-character, see help for graphics::points + , pty = "m" # do not force plots to be square + , no.readonly = TRUE # only save writable parameters + ) + pdf_height <- 8 + pdf_width <- 8 + my_layout <- function() { + # lay out 2 columns by 2 rows with extra width at the margin of individual plots + layout( + matrix( + # blank row plot 1 & 2 blank row plot 3 & 4 blank row + c(0,0,0,0,0, 0,1,0,2,0, 0,0,0,0,0, 0,3,0,4,0, 0,0,0,0,0) + , nrow = 5 + , ncol = 5 + , byrow = TRUE + ) + # slim columns 1, 3, and 5 + , widths = c(0.1, 0.9, 0.1, 0.9, 0.1) + # slim rows 1, 3, and 5 + , heights = c(0.1, 0.9, 0.1, 0.9, 0.1) + ) + } + } plot2pdf( file.name = my_env$contrast_detail - , width = 8 - , height = 8 + , width = pdf_width + , height = pdf_height , plot.function = function() { - # plot layout four plots per page - layout(matrix(1:4, byrow = TRUE, nrow = 2)) + # plot layout four or six plots per page + my_layout() my_result <<- corcov_calc( calc_env = my_env , failure_action = my_fatal @@ -174,7 +237,7 @@ } ) par(old_par) - + my_log( "-------------------------- Finished data processing --------------------------") }