comparison dimet_differential_multigroup_analysis.xml @ 0:de2f85da4e1d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/DIMet commit 3dba8748fbc8cc8e89ffc08e5febe0a0527a96a5
author iuc
date Fri, 21 Jun 2024 18:47:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:de2f85da4e1d
1 <tool id="dimet_@EXECUTABLE@" name="dimet @TOOL_LABEL@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
2 <description>
3 Differential analysis of 3 or more chosen groups of tracer metabolomics data (by DIMet)
4 </description>
5 <macros>
6 <token name="@TOOL_LABEL@">differential multigroup analysis</token>
7 <token name="@EXECUTABLE@">differential_multigroup_analysis</token>
8 <import>macros.xml</import>
9 </macros>
10 <expand macro="requirements"/>
11 <command detect_errors="exit_code"><![CDATA[
12 @INIT_CONFIG@
13 @INIT_DIFF_MULTIGROUP_ANALYSIS@
14 @INIT_DATATYPES@
15 @INIT_GROUPS@
16 @INIT_PLOT_CONDITIONS@
17 @INIT_TIMEPOINTS@
18 @INIT_MULTIGROUP_COMPARISONS@
19 HYDRA_FULL_ERROR=1 python -m dimet
20 '++hydra.run.dir=.'
21 '++figure_path=figures'
22 '++table_path=tables'
23 '++analysis={
24 dataset:{
25 _target_:dimet.data.DatasetConfig,
26 name: "Galaxy DIMet run"
27 },
28 method:{
29 _target_: dimet.method.MultiGroupComparisonConfig,
30 label: multi_group_comparison,
31 name: "Multi group statistical comparison using Kruskal-Wallis test"
32 },
33 label: multi_group_comparison,
34 datatypes:${datatypes_avail}
35 }'
36 '++analysis.method.datatypes=${datatypes_avail}'
37 '++analysis.timepoints=${timepoints}'
38 '++analysis.conditions=${comparisons}'
39 '++analysis.dataset.label='
40 '++analysis.method.correction_method=${correction_method}'
41 '++analysis.method.impute_values=${impute_values}'
42 '++analysis.dataset.subfolder='
43 '++analysis.method.grouping=${groups}'
44 '++analysis.dataset.conditions=${conditions}'
45 #if $metadata_path:
46 '++analysis.dataset.metadata=metadata'
47 #end if
48 #if str( $data_input.data_input_selector ) == "abundance":
49 #if $data_input.abundance_file:
50 '++analysis.dataset.abundances=abundance'
51 #end if
52 #elif str( $data_input.data_input_selector ) == "mean_enrichment":
53 #if $data_input.me_or_frac_contrib_file:
54 '++analysis.dataset.mean_enrichment=me_or_frac_contrib'
55 #end if
56 #elif str( $data_input.data_input_selector ) == "isotop_prop":
57 #if $data_input.isotop_prop_file:
58 '++analysis.dataset.isotopologue_proportions=isotop_prop'
59 #end if
60 #else
61 #if $data_input.isotop_abs_file:
62 '+analysis.dataset.isotopologues=isotop_abs'
63 #end if
64 #end if
65 @REMOVE_CONFIG@
66 ]]></command>
67 <inputs>
68 <expand macro="input_parameters_multi_diff_analysis"/>
69 <expand macro="plot_factor_list"/>
70 <expand macro="timepoint_multigroup"/>
71 <expand macro="compartments"/>
72 <expand macro="correction_method"/>
73 </inputs>
74
75 <outputs>
76 <collection name="report" type="list">
77 <discover_datasets pattern="__designation__" directory="tables" format="tabular"/>
78 </collection>
79 </outputs>
80 <tests>
81 <test>
82 <param name="data_input_selector" value="abundance" />
83 <param name="abundance_file" ftype="tabular" value="rawAbundances3.csv"/>
84 <param name="metadata_path" ftype="tabular" value="example3_metadata.csv"/>
85 <param name="correction_method" value="bonferroni"/>
86 <repeat name="plot_factor_list">
87 <param name="condition" value="Control"/>
88 </repeat>
89 <repeat name="plot_factor_list">
90 <param name="condition" value="Cond1"/>
91 </repeat>
92 <repeat name="plot_factor_list">
93 <param name="condition" value="Cond2"/>
94 </repeat>
95 <param name="timepoint" value="T0h,T2h"/>
96 <param name="compartments" value="cell"/>
97
98 <output_collection name="report" type="list" count="1">
99 <element file="abundance--cell--multigroup.tsv" name="abundance--cell--multigroup.tsv" ftype="tabular"/>
100 </output_collection>
101 </test>
102 </tests>
103 <help><![CDATA[
104 This module is part of DIMet: Differential analysis of Isotope-labeled targeted Metabolomics data (https://pypi.org/project/DIMet/).
105
106 **Input data files**
107
108 This tool computes the Kruskal-Wallis test over 3 or more groups,
109 to evaluate if at least one group is significantly different to the other groups (H0: median of all of the groups is equal). For illustration see the section **Metadata File Information** which contains three conditions: Control, Core_mass and Edge_tissue, across one single time point, thus 3 groups (number-of-groups-in-my-data = number-of-conditions x number-of-timepoints)).
110
111 This tool requires (at max.) 5 tab-delimited .csv files as inputs. There are two types of files:
112
113 - The measures' (or quantifications') files, that can be of 4 types.
114
115 - The metadata, a unique file with the description of the samples in your measures' files. This is compulsory.
116
117 For running DIMet @EXECUTABLE@ you need **at least one** file of measures:
118
119
120 - The total **abundances** (of the metabolites) file
121
122 - The mean **enrichment** or labelled fractional contributions
123
124 - The **isotopologues** absolute values files (optional)
125
126 - The **isotopologue proportions** file (optional)
127
128 and one metadata file, WHICH IS COMPULSORY, see section **Metadata File Information**.
129
130 The measure's files must be organized as matrices:
131
132 - The first column must contain Metabolite IDs that are unique (not repeated) within the file.
133
134 - The rest of the columns correspond to the samples
135
136 - The rows correspond to the metabolites
137
138 - The values must be tab separated, with the first row containing the sample/column labels.
139
140 See the following examples of measures' files:
141
142
143 Example - Metabolites **abundances**:
144
145 =============== ================== ================== ================== ================== ================== ==================
146 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
147 =============== ================== ================== ================== ================== ================== ==================
148 2_3-PG 8698823.9926 10718737.7217 10724373.9 8536484.5 22060650 28898956
149 2-OHGLu 36924336 424336 92060650 45165 84951950 965165051
150 Glc6P 2310 2142 2683 1683 012532068 1252172
151 Gly3P 399298 991656565 525195 6365231 89451625 4952651963
152 IsoCit 0 0 0 84915613 856236 954651610
153 =============== ================== ================== ================== ================== ================== ==================
154
155 Example - mean **enrichment** or labeled fractional contributions:
156
157 =============== ================== ================== ================== ================== ================== ==================
158 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
159 =============== ================== ================== ================== ================== ================== ==================
160 2_3-PG 0.9711 0.968 0.9909 0.991 0.40 0.9
161 2-OHGLu 0.01719 0.0246 0.554 0.555 0.73 0.68
162 Glc6P 0.06 0.66 2683 0.06 2068 2172
163 Gly3P 0.06 0.06 0.06 1 5 3
164 IsoCit 0.06 1 0.49 0.36 6 10
165 =============== ================== ================== ================== ================== ================== ==================
166
167 Example - **Isotopologues**
168
169 =============== ================== ================== ================== ================== ================== ==================
170 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
171 =============== ================== ================== ================== ================== ================== ==================
172 2_3-PG_m+0 206171.4626 285834.0353 36413.27637 27367.17784 6171.4626 119999
173 2_3-PG_m+1 123 432 101 127 206171.4626 119999
174 2_3-PG_m+2 133780.182 161461.2364 182631.3947 132170.3807 358749.348 848754.36
175 2_3-PG_m+3 8358749.348 10271010.45 10505228.3 8376820.028 62163.30727 1088.8963
176 2-OHGLu_m+0 5550339.322 6072872.833 3855047.791 3216178.72 8358749.348 10271010.45
177 2-OHGLu_m+1 0.0 0.0 0.0 0.0 206171.4626 285834.0353
178 =============== ================== ================== ================== ================== ================== ==================
179
180
181 Example - **Isotopologue proportions**:
182
183 =============== ================== ================== ================== ================== ================== ==================
184 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
185 =============== ================== ================== ================== ================== ================== ==================
186 2_3-PG_m+0 0.023701408 0.026667837 0.003395407 0.05955 0.034383527 0.12
187 2_3-PG_m+1 0.0 0.0 0.0 0.0 0.4 0.12
188 2_3-PG_m+2 0.015379329 0.01506 0.017029723 0.35483229 0.54131313 0.743
189 2_3-PG_m+3 0.960919263 0.958268099 0.97957487 0.581310816 0.017029723 0.017
190 2-OHGLu_m+0 0.972778716 0.960016157 0.238843937 0.234383527 0.9998888 0.015064063
191 2-OHGLu_m+1 0.0 0.0 0.0 0.0 0.0001112 0.960919263
192 =============== ================== ================== ================== ================== ================== ==================
193
194
195
196 **Metadata File Information**
197
198 Provide a tab-separated file that has the names of the samples in the first column and one header row.
199 Column names must be exactly in this order:
200
201 name_to_plot
202 condition
203 timepoint
204 timenum
205 compartment
206 original_name
207
208
209 Example **Metadata File**:
210
211
212 ==================== =============== ============= ============ ================ =================
213 **name_to_plot** **condition** **timepoint** **timenum** **compartment** **original_name**
214 -------------------- --------------- ------------- ------------ ---------------- -----------------
215 Control_cell-1 Control T0 0 cell MCF001089_TD01
216 Control_cell-2 Control T0 0 cell MCF001089_TD02
217 Control_cell-3 Control T0 0 cell MCF001089_TD03
218 CoreMass_cell-1 Core_mass T0 0 cell MCF001089_TD04
219 CoreMass_cell-2 Core_mass T0 0 cell MCF001089_TD05
220 CoreMass_cell-3 Core_mass T0 0 cell MCF001089_TD06
221 EdgeTiss_cell-1 Edge_tissue T0 0 cell MCF001089_TD07
222 EdgeTiss_cell-2 Edge_tissue T0 0 cell MCF001089_TD08
223 EdgeTiss_cell-3 Edge_tissue T0 0 cell MCF001089_TD09
224 ==================== =============== ============= ============ ================ =================
225
226
227 The column **original_name** must have the names of the samples as given in your data.
228
229 The column **name_to_plot** must have the names as you want them to be (or set identical to original_name if you prefer). To set names that are meaningful is a better choice, as we will take them to display the results.
230
231 The column **timenum** must contain only the numeric part of the timepoint, for example 2,0, 10, 100 (this means, without letters ("T", "t", "s", "h" etc) nor any other symbol). Make sure these time numbers are in the same units (but do not write the units here!).
232
233 The column **compartment** is an abbreviation, coined by you, for the compartments. This will be used for the results' files names: the longer the compartments names are, the longer the output files' names! Please pick short and clear abbreviations to fill this column.
234
235
236 **Running the analysis**
237
238
239 You can precise how you want your analysis to be executed, with the parameters:
240
241 - **comparisons** : the groups you want to select for comparison in the multigroup analysis
242
243 (Note that **comparisons** parameter in the multigroup analysis does not have the same usage as in the pairwise differential analysis).
244
245 - **datatypes** : the measures type(s) that you want to run
246
247 - **correction_method** : one of the methods for multiple testinc correction available in statsmodels library (bonferroni, fdr_bh, sidak, among others, see https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html).
248
249 There exist hints on use that will guide you, next to the parameters.
250
251 For more information about the implemented statistical tests, please visit: https://github.com/cbib/DIMet/wiki/2-Statistical-tests
252
253 The output files are explained in https://github.com/cbib/DIMet/wiki/3-Output
254
255 **Available data for testing**
256
257 You can test our tool with the data from our manuscript https://zenodo.org/record/10579862 (the pertinent
258 files for you are located in the subfolders inside the data folder).
259 You can also use the minimal data examples from https://zenodo.org/record/10579891
260
261 ]]>
262 </help>
263 <expand macro="citations"/>
264 </tool>