comparison dimet_differential_analysis.xml @ 0:1ccbaa9510dc draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/DIMet commit 3dba8748fbc8cc8e89ffc08e5febe0a0527a96a5
author iuc
date Fri, 21 Jun 2024 18:44:52 +0000
parents
children c756cf333cd7
comparison
equal deleted inserted replaced
-1:000000000000 0:1ccbaa9510dc
1 <tool id="dimet_@EXECUTABLE@" name="dimet @TOOL_LABEL@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
2 <description>
3 Differential analysis of tracer metabolomics data comparing two groups (by DIMet)
4 </description>
5 <macros>
6 <token name="@TOOL_LABEL@">differential analysis</token>
7 <token name="@EXECUTABLE@">differential_analysis</token>
8 <import>macros.xml</import>
9 </macros>
10 <expand macro="requirements"/>
11 <command detect_errors="exit_code"><![CDATA[
12 @INIT_CONFIG@
13 @INIT_DIFF_ANALYSIS@
14 @INIT_STAT_TEST@
15 @INIT_GROUPS@
16 @INIT_DIFF_ANALYSIS_COMPARISONS@
17 HYDRA_FULL_ERROR=1 python -m dimet
18 '++hydra.run.dir=.'
19 '++figure_path=figures'
20 '++table_path=tables'
21 '++analysis={
22 dataset:{
23 _target_: dimet.data.DatasetConfig,
24 name: "I am a synthetic data example"
25 },
26 method:{
27 _target_: dimet.method.DifferentialAnalysisConfig,
28 label: "differential_analysis",
29 name: "Pairwise computation of statistical differences",
30 draw_ellipses: null,
31 run_iris_demo: false
32 },
33 label: differential-analysis-example2
34 }'
35 '++analysis.method.qualityDistanceOverSpan='${qualityDistanceOverSpan}''
36 '++analysis.dataset.label='
37 '++analysis.timepoints=${timepoints}'
38 '++analysis.comparisons=${comparisons}'
39 '++analysis.method.statistical_test=${statistical_test}'
40 '++analysis.method.grouping=${groups}'
41 '++analysis.method.correction_method=${correction_method}'
42 '++analysis.method.disfit_tail_option="auto"'
43 '++analysis.method.impute_values=${impute_values}'
44 '++analysis.statistical_test=${statistical_test}'
45 '++analysis.dataset.subfolder='
46 '++analysis.dataset.conditions=${conditions}'
47 #if $metadata_path:
48 '++analysis.dataset.metadata=metadata'
49 #end if
50 #if str( $data_input.data_input_selector ) == "abundance":
51 #if $data_input.abundance_file:
52 '++analysis.dataset.abundances=abundance'
53 #end if
54 #elif str( $data_input.data_input_selector ) == "mean_enrichment":
55 #if $data_input.me_or_frac_contrib_file:
56 '++analysis.dataset.mean_enrichment=me_or_frac_contrib'
57 #end if
58 #elif str( $data_input.data_input_selector ) == "isotop_prop":
59 #if $data_input.isotop_prop_file:
60 '++analysis.dataset.isotopologue_proportions=isotop_prop'
61 #end if
62 #else
63 #if $data_input.isotop_abs_file:
64 '++analysis.dataset.isotopologues=isotop_abs'
65 #end if
66 #end if
67 @REMOVE_CONFIG@
68 ]]></command>
69 <inputs>
70 <expand macro="input_parameters_diff_analysis"/>
71 <expand macro="factor_list"/>
72 <expand macro="timepoint"/>
73 <expand macro="correction_method"/>
74 <param name="qualityDistanceOverSpan" type="float" min="-1.0" max="-0.1" value="-0.3" label="quality Distance Over Span" help="Default value is -0.3."/>
75 </inputs>
76
77 <outputs>
78 <collection name="report" type="list">
79 <discover_datasets pattern="__designation__" directory="tables" format="tabular"/>
80 </collection>
81 </outputs>
82 <tests>
83 <test>
84 <param name="data_input_selector" value="abundance" />
85 <param name="abundance_file" ftype="tabular" value="rawAbundances.csv"/>
86 <param name="metadata_path" ftype="tabular" value="example2_metadata.csv"/>
87 <param name="correction_method" value="bonferroni"/>
88 <param name="statistical_test_type" value="parametric"/>
89 <param name="stat_test" value="Tt"/>
90 <param name="qualityDistanceOverSpan" value="-0.3"/>
91 <repeat name="factor_list">
92 <param name="condition" value="Control"/>
93 </repeat>
94 <repeat name="factor_list">
95 <param name="condition" value="L-Cycloserine"/>
96 </repeat>
97 <param name="timepoint" value='T0,T2h'/>
98 <output_collection name="report" type="list" count="4">
99 <element file="abundance--cell-Control-T0-L-Cycloserine-T0-Tt.tsv" name="abundance--cell-Control-T0-L-Cycloserine-T0-Tt.tsv" ftype="tabular"/>
100 <element file="abundance--cell-Control-T2h-L-Cycloserine-T2h-Tt.tsv" name="abundance--cell-Control-T2h-L-Cycloserine-T2h-Tt.tsv" ftype="tabular"/>
101 <element file="abundance--med-Control-T0-L-Cycloserine-T0-Tt.tsv" name="abundance--med-Control-T0-L-Cycloserine-T0-Tt.tsv" ftype="tabular"/>
102 <element file="abundance--med-Control-T2h-L-Cycloserine-T2h-Tt.tsv" name="abundance--med-Control-T2h-L-Cycloserine-T2h-Tt.tsv" ftype="tabular"/>
103 </output_collection>
104 </test>
105 </tests>
106 <help><![CDATA[
107
108 This module is part of DIMet: Differential analysis of Isotope-labeled targeted Metabolomics data (https://pypi.org/project/DIMet/).
109
110 DIMet differential analysis compares groups to evaluate for statistical differences, in a **pairwise** mode.
111 This pairwise mode accepts one or several defined comparison(s), that will run in a single execution.
112 In this way, you do not need to re-upload your data several times,
113 instead, you upload once your data and you compose a list of comparisons:
114
115 - Tumoral,T0 vs Control,T0
116
117 - Tumoral,T2 vs Control,T2
118
119 - Tumoral,T24 vs Control,T24
120
121 - ...
122
123 then DIMet differential analysis will execute them -one by one- automatically.
124
125
126 **Input data files**
127
128 This tool requires (at max.) 5 tab-delimited .csv files as inputs. There are two types of files:
129
130 - The measures' (or quantifications') files, that can be of 4 types.
131
132 - The metadata, a unique file with the description of the samples in your measures' files. This is compulsory.
133
134 For running DIMet @EXECUTABLE@ you need **at least one file** of measures:
135
136 - The total **abundances** (of the metabolites) file
137
138 - The mean **enrichment** or labelled fractional contributions
139
140 - The **isotopologues** absolute values files (optional)
141
142 - The **isotopologue proportions** file (optional)
143
144 and one metadata file, WHICH IS COMPULSORY, see section **Metadata File Information**.
145
146
147 **Measures' files**
148
149 The measure's files must be organized as matrices:
150
151 - The first column must contain Metabolite IDs that are unique (not repeated) within the file.
152
153 - The rest of the columns correspond to the samples
154
155 - The rows correspond to the metabolites
156
157 - The values must be tab separated, with the first row containing the sample/column labels.
158
159 See the following examples of measures files:
160
161
162 Example - Metabolites **abundances**:
163
164 =============== ================== ================== ================== ================== ================== ==================
165 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
166 =============== ================== ================== ================== ================== ================== ==================
167 2_3-PG 8698823.9926 10718737.7217 10724373.9 8536484.5 22060650 28898956
168 2-OHGLu 36924336 424336 92060650 45165 84951950 965165051
169 Glc6P 2310 2142 2683 1683 012532068 1252172
170 Gly3P 399298 991656565 525195 6365231 89451625 4952651963
171 IsoCit 0 0 0 84915613 856236 954651610
172 =============== ================== ================== ================== ================== ================== ==================
173
174 Example - mean **enrichment** or labeled fractional contributions:
175
176 =============== ================== ================== ================== ================== ================== ==================
177 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
178 =============== ================== ================== ================== ================== ================== ==================
179 2_3-PG 0.9711 0.968 0.9909 0.991 0.40 0.9
180 2-OHGLu 0.01719 0.0246 0.554 0.555 0.73 0.68
181 Glc6P 0.06 0.66 2683 0.06 2068 2172
182 Gly3P 0.06 0.06 0.06 1 5 3
183 IsoCit 0.06 1 0.49 0.36 6 10
184 =============== ================== ================== ================== ================== ================== ==================
185
186 Example - **Isotopologues**
187
188 =============== ================== ================== ================== ================== ================== ==================
189 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
190 =============== ================== ================== ================== ================== ================== ==================
191 2_3-PG_m+0 206171.4626 285834.0353 36413.27637 27367.17784 6171.4626 119999
192 2_3-PG_m+1 123 432 101 127 206171.4626 119999
193 2_3-PG_m+2 133780.182 161461.2364 182631.3947 132170.3807 358749.348 848754.36
194 2_3-PG_m+3 8358749.348 10271010.45 10505228.3 8376820.028 62163.30727 1088.8963
195 2-OHGLu_m+0 5550339.322 6072872.833 3855047.791 3216178.72 8358749.348 10271010.45
196 2-OHGLu_m+1 0.0 0.0 0.0 0.0 206171.4626 285834.0353
197 =============== ================== ================== ================== ================== ================== ==================
198
199
200 Example - **Isotopologue proportions**:
201
202 =============== ================== ================== ================== ================== ================== ==================
203 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
204 =============== ================== ================== ================== ================== ================== ==================
205 2_3-PG_m+0 0.023701408 0.026667837 0.003395407 0.05955 0.034383527 0.12
206 2_3-PG_m+1 0.0 0.0 0.0 0.0 0.4 0.12
207 2_3-PG_m+2 0.015379329 0.01506 0.017029723 0.35483229 0.54131313 0.743
208 2_3-PG_m+3 0.960919263 0.958268099 0.97957487 0.581310816 0.017029723 0.017
209 2-OHGLu_m+0 0.972778716 0.960016157 0.238843937 0.234383527 0.9998888 0.015064063
210 2-OHGLu_m+1 0.0 0.0 0.0 0.0 0.0001112 0.960919263
211 =============== ================== ================== ================== ================== ================== ==================
212
213
214
215 **Metadata File Information**
216
217 Provide a tab-separated file that has the names of the samples in the first column and one header row.
218 Column names must be exactly in this order:
219
220 name_to_plot
221 condition
222 timepoint
223 timenum
224 compartment
225 original_name
226
227
228 Example **Metadata File**:
229
230
231 ==================== =============== ============= ============ ================ =================
232 **name_to_plot** **condition** **timepoint** **timenum** **compartment** **original_name**
233 -------------------- --------------- ------------- ------------ ---------------- -----------------
234 Control_cell_T0-1 Control T0 0 cell MCF001089_TD01
235 Control_cell_T0-2 Control T0 0 cell MCF001089_TD02
236 Control_cell_T0-3 Control T0 0 cell MCF001089_TD03
237 Tumoral_cell_T0-1 Tumoral T0 0 cell MCF001089_TD04
238 Tumoral_cell_T0-2 Tumoral T0 0 cell MCF001089_TD05
239 Tumoral_cell_T0-3 Tumoral T0 0 cell MCF001089_TD06
240 Tumoral_cell_T24-1 Tumoral T24 24 cell MCF001089_TD07
241 Tumoral_cell_T24-2 Tumoral T24 24 cell MCF001089_TD08
242 Tumoral_cell_T24-3 Tumoral T24 24 cell MCF001090_TD01
243 Control_med_T24-1 Control T24 24 med MCF001090_TD02
244 Control_med_T24-2 Control T24 24 med MCF001090_TD03
245 Tumoral_med_T24-1 Tumoral T24 24 med MCF001090_TD04
246 Tumoral_med_T24-2 Tumoral T24 24 med MCF001090_TD05
247 Control_med_T0-1 Control T0 0 med MCF001090_TD06
248 Tumoral_med_T0-1 Tumoral T0 0 med MCF001090_TD07
249 Tumoral_med_T0-2 Tumoral T0 0 med MCF001090_TD08
250 ==================== =============== ============= ============ ================ =================
251
252
253 The column **original_name** must have the names of the samples as given in your data.
254
255 The column **name_to_plot** must have the names as you want them to be (or set identical to original_name if you prefer). To set names that
256 are meaningful is a better choice, as we will take them to display the results.
257
258 The column **timenum** must contain only the numeric part of the timepoint, for example 2,0, 10, 100 (this means, without letters ("T", "t", "s", "h" etc)
259 nor any other symbol). Make sure these time numbers are in the same units (but do not write the units here!).
260
261 The column **compartment** is an abbreviation, coined by you, for the compartments. This will be used for the results' files names: the longer the
262 compartments names are, the longer the output files' names! Please pick short and clear abbreviations to fill this column.
263
264
265 **Running the analysis**
266
267 You can precise how you want your analysis to be executed, with the parameters:
268
269 - **conditions**: the conditions present in your data, to perform the pairwise comparison.
270
271 - **comparisons** : the pairs of [condition, timepoint] groups to compare
272
273 - **datatypes** : the measures type(s) that you want to run (see above in Input data files section)
274
275 - **statistical_test** : choose, by type of measure, the specific statistical test to be applied.
276
277 Kruskal-Wallis, Mann-Whitney, Wilcoxon’s signed rank test, Wilcoxon’s rank sum test
278 t-test, and permutation test are currently offered (we use the trusted functions from scipy library https://docs.scipy.org/doc/scipy/reference/stats.html).
279
280 For the permutation test, we have established as test statistic, the absolute difference of geometric means of the two compared groups.
281
282 - **qualityDistanceOverSpan**: a normalized distance between the intervals of values of the compared groups, that is the cutoff for
283
284 considering a minimal acceptable "separation", and therefore, to be suitable for statistical testing. A 'distance/span' == 1 is a perfect separation,
285 whereas if 'distance/span' < 0 there is no separation.
286 To use with caution in case of important dispersion of your intra-group values. Default is -0.3 (not stringent)
287
288 - **correction_method** : one of the methods for multiple testing correction available in statsmodels library (bonferroni, fdr_bh, sidak, among others, see https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html).
289
290 There exist hints on use that will guide you, next to the parameters.
291
292 For more information about the implemented statistical tests, please visit: https://github.com/cbib/DIMet/wiki/2-Statistical-tests
293
294 The output files are explained in https://github.com/cbib/DIMet/wiki/3-Output
295
296
297
298 **Available data for testing**
299
300 You can test our tool with the data from our manuscript https://zenodo.org/record/10579862 (the pertinent
301 files for you are located in the subfolders inside the data folder).
302 You can also use the minimal data examples from https://zenodo.org/record/10579891
303
304 ]]>
305 </help>
306 <expand macro="citations" />
307 </tool>