Mercurial > repos > iuc > rgcca
comparison rgcca.xml @ 0:4809cae1b724 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/rgcca commit 00f9e92845737e05a4afb1c93043f35b7e4ea771"
| author | iuc |
|---|---|
| date | Tue, 12 Jan 2021 10:11:26 +0000 |
| parents | |
| children | 6bf48c098d36 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:4809cae1b724 |
|---|---|
| 1 <tool id="rgcca" name="RGCCA" version="@TOOL_VERSION@+galaxy0"> | |
| 2 | |
| 3 <description>performs multiblock data analysis of several sets of variables (blocks) observed on the same group of individuals.</description> | |
| 4 | |
| 5 <macros> | |
| 6 <import>macro.xml</import> | |
| 7 </macros> | |
| 8 | |
| 9 <edam_topics> | |
| 10 <edam_topic>topic_2269</edam_topic> | |
| 11 </edam_topics> | |
| 12 | |
| 13 <edam_operations> | |
| 14 <edam_operation>operation_2945</edam_operation> | |
| 15 <edam_operation>operation_3465</edam_operation> | |
| 16 <edam_operation>operation_0337</edam_operation> | |
| 17 </edam_operations> | |
| 18 | |
| 19 <requirements> | |
| 20 <requirement type="package" version="@TOOL_VERSION@">rgccacmd</requirement> | |
| 21 </requirements> | |
| 22 | |
| 23 <command detect_errors="exit_code"><![CDATA[ | |
| 24 #set data_paths = ",".join([str(_.file_name) for _ in $blocks]) | |
| 25 #set data_names = ",".join([str(_.element_identifier).replace(',', '_') for _ in $blocks]) | |
| 26 Rscript '$__tool_directory__/launcher.R' | |
| 27 --datasets '${data_paths}' | |
| 28 --names '${data_names}' | |
| 29 --o1 '$individual_plot' --o2 '$corcircle' --o3 '$top_variables' --o4 '$ave' --o5 '$design' --o6 '$individual_table' --o7 '$variable_table' --o8 '$rdata' | |
| 30 $parse.header | |
| 31 --separator $parse.separator | |
| 32 $analyse.superblock | |
| 33 $analyse.scale | |
| 34 #if $analyse.tau.bool == 'false' | |
| 35 --penalty $analyse.tau.value | |
| 36 #else | |
| 37 --penalty $analyse.tau.bool | |
| 38 #end if | |
| 39 --ncomp $analyse.ncomp | |
| 40 --scheme $analyse.scheme | |
| 41 #if $analyse.method.family == '1' | |
| 42 --type pca | |
| 43 #else | |
| 44 --type $analyse.method.type | |
| 45 #end if | |
| 46 #if $analyse.connection | |
| 47 --connection $analyse.connection | |
| 48 #end if | |
| 49 #if $analyse.supervised.learning_mode == 'supervised' | |
| 50 --response $analyse.supervised.block_response | |
| 51 #end if | |
| 52 #if $graphic.response | |
| 53 --group $graphic.response | |
| 54 #end if | |
| 55 --compx $graphic.compx | |
| 56 --compy $graphic.compy | |
| 57 --nmark $graphic.nmark | |
| 58 $graphic.text | |
| 59 --block $graphic.blockx | |
| 60 --block_y $graphic.blocky | |
| 61 ]]></command> | |
| 62 | |
| 63 <inputs> | |
| 64 <param name="blocks" type="data" format="tsv,tabular,txt,csv" multiple="true" optional="false" label = "Load blocks" | |
| 65 help="TSV file containing a matrix with: (i) quantitative values only (decimal should be separated by '.'), (ii) the samples in lines (should be labelled in the 1rst column) and (iii) variables in columns (should have a header)."/> | |
| 66 | |
| 67 <section name="parse" title="Advanced parsing" help="By default, on tabulated files with a header."> | |
| 68 <param name="header" type="boolean" truevalue="" falsevalue="-H" checked="true" label="Consider the first row as header of columns" help="Used for both blocks and color files."/> | |
| 69 <param name="separator" type="select" display="radio" label="Column separator" help="Character used to separate the column (for all blocks, connection and color files)."> | |
| 70 <option value="1" selected="true">Tabulation</option> | |
| 71 <option value="2">Semicolon</option> | |
| 72 </param> | |
| 73 </section> | |
| 74 | |
| 75 <section name="analyse" title="Advanced analysis" | |
| 76 help="By default, the analysis: is a Regularised Generalised Canonical Correlation Analysis, scales the blocks, uses a superblock with a factorial scheme function, a tau equals to one and two components for each block."> | |
| 77 | |
| 78 <param name="ncomp" type="integer" label="Number of component" value="2" min="2" max="5" | |
| 79 help="The number of component to use in the analysis for each block (should not be greater than the minimum number of variable among the blocks)."/> | |
| 80 | |
| 81 <param name="scale" type="boolean" truevalue="" falsevalue="--scale" checked="true" label="Scale the blocks" | |
| 82 help="A data centering step is always performed. If activated, each block is normalised and divided by the square root of its number of variables."/> | |
| 83 | |
| 84 <conditional name="method"> | |
| 85 | |
| 86 <param name="family" type="select" label="Analysis method"> | |
| 87 <option value="1">One block</option> | |
| 88 <option value="2">Two blocks</option> | |
| 89 <option value="m" selected="true">Multiple blocks</option> | |
| 90 <option value="ms">Multiple blocks with superblock</option> | |
| 91 </param> | |
| 92 | |
| 93 <when value="2"> | |
| 94 <param name="type" type="select" label=" "> | |
| 95 <option value="pls">Partial Least Squares Regression</option> | |
| 96 <option value="cca">Canonical Correlation Analysis</option> | |
| 97 <option value="ifa">Interbattery Factor Analysis</option> | |
| 98 <option value="ra">Redundancy analysis</option> | |
| 99 </param> | |
| 100 </when> | |
| 101 | |
| 102 <when value="m"> | |
| 103 <param name="type" type="select" label=" "> | |
| 104 <option value="rgcca">Regularized Generalized CCA</option> | |
| 105 <option value="sgcca">Sparse Generalized CCA</option> | |
| 106 <option value="sumcor">SUM of CORrelations method</option> | |
| 107 <option value="ssqcor">Sum of SQuared CORrelations method</option> | |
| 108 <option value="sabscor">Sum of ABSolute value CORrelations method</option> | |
| 109 <option value="sumcov">SUM of COVariances method</option> | |
| 110 <option value="ssqcov">Sum of SQuared COVariances method</option> | |
| 111 <option value="sabscov">Sum of ABSolute value COVariances method</option> | |
| 112 <option value="maxbet">MAXBET</option> | |
| 113 <option value="maxbet-b">MAXBET-B</option> | |
| 114 </param> | |
| 115 </when> | |
| 116 | |
| 117 <when value="ms"> | |
| 118 <param name="type" type="select" label=" "> | |
| 119 <option value="gcca">Generalized CCA</option> | |
| 120 <option value="hpca">Hierarchical PCA</option> | |
| 121 <option value="mfa">Multiple Factor Analysis</option> | |
| 122 </param> | |
| 123 </when> | |
| 124 | |
| 125 <when value="1"/> | |
| 126 | |
| 127 </conditional> | |
| 128 | |
| 129 <param name="connection" optional="true" type="data" format="tsv,tabular,txt,csv" label="Load the design matrix (if superblock or supervised disabled)" | |
| 130 help="TSV file without header and without row names. This file describes the connections between the blocks. It should contain 1 (if two blocks are related) or 0 values otherwise. The columns are separated by tabulations. It is a symmetric matrix with the same dimension as the number of blocks."/> | |
| 131 | |
| 132 <param name="superblock" type="boolean" truevalue="" falsevalue="--superblock" checked="true" label="Use a superblock" | |
| 133 help="A block defined as the concatenation of all the other blocks. The space spanned by global components is viewed as a compromise space that integrated all the modalities and facilitates the visualization of the results and their interpretation. If disabled, all blocks are assumed to be connected or a connection file could be used."/> | |
| 134 | |
| 135 <conditional name="supervised"> | |
| 136 <param name="learning_mode" type="select" display="radio" label="Learning mode"> | |
| 137 <option value="unsupervised">Unsupervised</option> | |
| 138 <option value="supervised">Supervised</option> | |
| 139 </param> | |
| 140 <when value="supervised"> | |
| 141 <param name="block_response" type="integer" value="1" min="1" max="10" label="Use a block as response (supervised analysis)" help="@BLOCK_RULES@ By default, the first block is selected."/> | |
| 142 </when> | |
| 143 <when value="unsupervised"/> | |
| 144 </conditional> | |
| 145 | |
| 146 <conditional name="tau"> | |
| 147 <param name="bool" type="select" display="radio" label="Tau selection" | |
| 148 help="For RGCCA, a regularization parameter for each block (i.e., tau) [default: 1]. Tau varies from 0 (maximizing the correlation) to 1 (maximizing the covariance). For SGCCA, tau is automatically set to 1 and a shrinkage parameter can be defined instead for automatic variable selection, varying from the square root of the variable number (the fewest selected variables) to 1 (all the variables are included)."> | |
| 149 <option value="false">Manual</option> | |
| 150 <option value="optimal">Optimal</option> | |
| 151 </param> | |
| 152 <when value="false"> | |
| 153 <param name="value" type="float" label=" " value="1" min="0" max="1"/> | |
| 154 </when> | |
| 155 <when value="optimal"/> | |
| 156 </conditional> | |
| 157 | |
| 158 <param name="scheme" type="select" label="Scheme function" help="Link (i.e. scheme) function for covariance maximization is calculated with: the identity function (horst scheme), | |
| 159 the absolute values (centroid scheme), the squared values (factorial scheme). Only, the horst scheme penalizes structural | |
| 160 negative correlation. The factorial scheme discriminates more strongly the blocks than the centroid one."> | |
| 161 <option value="1">Horst : f(x)</option> | |
| 162 <option value="2" selected="true">Factorial : f(x)^2</option> | |
| 163 <option value="3">Centroid : f|x|</option> | |
| 164 <option value="4">Other: f(x)^4</option> | |
| 165 </param> | |
| 166 | |
| 167 </section> | |
| 168 | |
| 169 <section name="graphic" title="Advanced graphic" help="By default, the x-axis and y-axis are respectively the first and the second components, the number of top variables is 100 and a superblock is used."> | |
| 170 <param name="response" optional="true" type="data" format="tsv,tabular,txt,csv" label="Color the individual plot with a response variable" | |
| 171 help="A TSV file containing either: (i) an only column with a qualitative or a quantitative variable; (ii) multiple columns corresponding to a disjunctive table."/> | |
| 172 <param name="text" type="boolean" truevalue="" falsevalue="--text" checked="true" label="Display the names of the points (in biplots)"/> | |
| 173 <param name="compx" type="integer" label="Component for the X-axis" help="The component used in the X-axis for biplots and the only component used for top variable plot. @COMP_RULES@" value="1" min="1" max="5"/> | |
| 174 <param name="compy" type="integer" label="Component for the Y-axis" help="The component used in the Y-axis for biplots. @COMP_RULES@" value="2" min="1" max="5"/> | |
| 175 <param name="blockx" type="integer" value="0" min="0" max="10" label="Visualise this block" help="Block used in the X-axis for individual plot and the only block used for corcircle and top variable plots. @BLOCK_RULES@"/> | |
| 176 <param name="blocky" type="integer" value="0" min="0" max="10" label="Visualise this block for the Y-axis (in individual plot)" help="0 corresponds to the superblock (or the last block loaded), @BLOCK_RULES@ By default, the superblock is selected."/> | |
| 177 <param name="nmark" type="integer" label="Number of top variables" value="100" min="10" max="300"/> | |
| 178 </section> | |
| 179 | |
| 180 <param name="output_selector" type="select" multiple="true" label="Outputs"> | |
| 181 <option value="individuals" selected="true">Individual plot</option> | |
| 182 <option value="corcircle" selected = "true">Corcircle plot</option> | |
| 183 <option value="top_variables">Top variables plot</option> | |
| 184 <option value="ave">Averages plot</option> | |
| 185 <option value="design">Design plot</option> | |
| 186 <option value="individual_table" selected="true">Individual table</option> | |
| 187 <option value="variable_table" selected="true">Variable table</option> | |
| 188 <option value="rdata">RData file</option> | |
| 189 </param> | |
| 190 </inputs> | |
| 191 | |
| 192 <outputs> | |
| 193 <data name="individual_plot" label="${tool.name} on ${on_string}: individuals.pdf" format="pdf"> | |
| 194 <filter>"individuals" in output_selector</filter> | |
| 195 </data> | |
| 196 <data name="corcircle" label="${tool.name} on ${on_string}: corcircle.pdf" format="pdf"> | |
| 197 <filter>"corcircle" in output_selector</filter> | |
| 198 </data> | |
| 199 <data name="top_variables" label="${tool.name} on ${on_string}: top_variables.pdf" format="pdf"> | |
| 200 <filter>"top_variables" in output_selector</filter> | |
| 201 </data> | |
| 202 <data name="ave" label="${tool.name} on ${on_string}: ave.pdf" format="pdf"> | |
| 203 <filter>"ave" in output_selector</filter> | |
| 204 </data> | |
| 205 <data name="design" label="${tool.name} on ${on_string}: design.pdf" format="pdf"> | |
| 206 <filter>"design" in output_selector</filter> | |
| 207 </data> | |
| 208 <data name="individual_table" label="${tool.name} on ${on_string}: individuals.tsv" format="tsv"> | |
| 209 <filter>"individual_table" in output_selector</filter> | |
| 210 </data> | |
| 211 <data name="variable_table" label="${tool.name} on ${on_string}: variables.tsv" format="tsv"> | |
| 212 <filter>"variable_table" in output_selector</filter> | |
| 213 </data> | |
| 214 <data name="rdata" label="${tool.name} on ${on_string}: rgcca.result.RData" format="rdata"> | |
| 215 <filter>"rdata" in output_selector</filter> | |
| 216 </data> | |
| 217 </outputs> | |
| 218 | |
| 219 <tests> | |
| 220 | |
| 221 <test expect_num_outputs="8" expect_exit_code="0"> | |
| 222 <expand macro="output_tests" path="1block"/> | |
| 223 <param name="blocks" value="agriculture.tsv" ftype = "tsv"/> | |
| 224 <output name="individual_table"> | |
| 225 <assert_contents> | |
| 226 <has_n_columns n="4"/> | |
| 227 <has_line_matching | |
| 228 expression='"agriculture.axis1"\s"agriculture.axis2"\s"superblock.axis1"\s"superblock.axis2"'/> | |
| 229 <has_line_matching | |
| 230 expression='^.+(\s\-?\d+.\d+){4}$'/> | |
| 231 </assert_contents> | |
| 232 </output> | |
| 233 </test> | |
| 234 | |
| 235 <test expect_num_outputs="8" expect_exit_code="0"> | |
| 236 <expand macro="output_tests" path="3blocks_connection"/> | |
| 237 <expand macro="output_tests_3blocks"/> | |
| 238 <section name="analyse"> | |
| 239 <param name="connection" value="connection.tsv" ftype = "tsv"/> | |
| 240 <param name="superblock" value="false"/> | |
| 241 </section> | |
| 242 <assert_command> | |
| 243 <has_text text="-connection"/> | |
| 244 <has_text text="--superblock"/> | |
| 245 </assert_command> | |
| 246 </test> | |
| 247 | |
| 248 <test expect_num_outputs="8" expect_exit_code="0"> | |
| 249 <expand macro="output_tests" path="3blocks_supervised"/> | |
| 250 <expand macro="output_tests_3blocks"/> | |
| 251 <section name="analyse"> | |
| 252 <param name="superblock" value="false"/> | |
| 253 <conditional name="supervised" > | |
| 254 <param name="learning_mode" value="supervised"/> | |
| 255 <param name="block_response" value="3"/> | |
| 256 </conditional> | |
| 257 </section> | |
| 258 <assert_command> | |
| 259 <has_text text="--response 3"/> | |
| 260 <has_text text="--superblock"/> | |
| 261 </assert_command> | |
| 262 </test> | |
| 263 | |
| 264 <test expect_num_outputs="8" expect_exit_code="0"> | |
| 265 <expand macro="output_tests" path="3blocks"/> | |
| 266 <expand macro="output_tests_3blocks"/> | |
| 267 </test> | |
| 268 | |
| 269 <test expect_num_outputs="8" expect_exit_code="0"> | |
| 270 <expand macro="output_tests" path="3blocks_sgcca"/> | |
| 271 <expand macro="output_tests_3blocks"/> | |
| 272 <section name="analyse"> | |
| 273 <conditional name="method"> | |
| 274 <param name="family" value="m"/> | |
| 275 <param name="type" value="sgcca"/> | |
| 276 </conditional> | |
| 277 </section> | |
| 278 <assert_command> | |
| 279 <has_text text="sgcca"/> | |
| 280 </assert_command> | |
| 281 </test> | |
| 282 | |
| 283 <test expect_num_outputs="8" expect_exit_code="0"> | |
| 284 <expand macro="output_tests" path="2blocks" compx="3" compy="1"/> | |
| 285 <param name="blocks" value="agriculture.tsv,politic.tsv"/> | |
| 286 <section name="analyse"> | |
| 287 <param name="scale" value="false"/> | |
| 288 <conditional name="tau"> | |
| 289 <param name="bool" value="false"/> | |
| 290 <param name="value" value="0"/> | |
| 291 </conditional> | |
| 292 <param name="scheme" value="3"/> | |
| 293 <param name="ncomp" value="3"/> | |
| 294 <conditional name="method"> | |
| 295 <param name="family" value="2"/> | |
| 296 <param name="type" value="pls"/> | |
| 297 </conditional> | |
| 298 </section> | |
| 299 <section name="graphic"> | |
| 300 <param name="response" value="political_system.tsv" ftype = "tsv"/> | |
| 301 <param name="text" value="false"/> | |
| 302 <param name="compx" value="3"/> | |
| 303 <param name="compy" value="1"/> | |
| 304 <param name="blockx" value="2"/> | |
| 305 <param name="blocky" value="1"/> | |
| 306 <param name="nmark" value="11"/> | |
| 307 </section> | |
| 308 <assert_command> | |
| 309 <has_text text="pls"/> | |
| 310 <has_text text="--group"/> | |
| 311 </assert_command> | |
| 312 </test> | |
| 313 | |
| 314 </tests> | |
| 315 <help> | |
| 316 | |
| 317 ================================== | |
| 318 ABOUT | |
| 319 ================================== | |
| 320 | |
| 321 | |
| 322 **Author:** | |
| 323 Etienne CAMENEN | |
| 324 | |
| 325 | |
| 326 **Contact:** | |
| 327 arthur.tenenhaus@centralesupelec.fr | |
| 328 | |
| 329 | |
| 330 **R package:** | |
| 331 The RGCCA package is available from the CRAN repository (https://cran.r-project.org/web/packages/RGCCA). | |
| 332 | |
| 333 --------------------------------------------------- | |
| 334 | |
| 335 ================================== | |
| 336 R/SGCCA | |
| 337 ================================== | |
| 338 | |
| 339 A user-friendly multi-blocks analysis (Regularized Generalized Canonical Correlation Analysis, RGCCA) as described in [1] and [2] with all default settings predefined. The software produces figures to explore the analysis' results: individuals and variables projected on two components of the multi-block analysis, list of top variables and explained variance in the model. | |
| 340 | |
| 341 **Working example** | |
| 342 | |
| 343 | From Russett data (RGCCA package): https://github.com/rgcca-factory/RGCCA/tree/master/inst/extdata | |
| 344 | Use *agriculture.tsv* as a block. Add *industry.tsv* and *politic.tsv* as new blocks. *connection.tsv* could be used as a design matrix and *political_system.tsv* as a response variable respectively in analysis and graphic settings. | |
| 345 | |
| 346 **Documentation** | |
| 347 | |
| 348 - RGCCA: https://cran.r-project.org/web/packages/RGCCA/vignettes/vignette_RGCCA.pdf | |
| 349 - accepted input / output formats: https://github.com/rgcca-factory/RGCCA#input-files | |
| 350 <!-- - tutorial: https://github.com/BrainAndSpineInstitute/rgcca_galaxy/blob/release/0.2/README.md--> | |
| 351 | |
| 352 </help> | |
| 353 | |
| 354 <citations> | |
| 355 <citation type="doi">10.1007/s11336-017-9573-x</citation> | |
| 356 <citation type="doi">10.1007/s11336-011-9206-8</citation> | |
| 357 </citations> | |
| 358 | |
| 359 </tool> |
