comparison dimet_timecourse_analysis.xml @ 0:d9e56e80a153 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/DIMet commit 3dba8748fbc8cc8e89ffc08e5febe0a0527a96a5
author iuc
date Fri, 21 Jun 2024 18:45:52 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d9e56e80a153
1 <tool id="dimet_@EXECUTABLE@" name="dimet @TOOL_LABEL@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
2 <description>
3 Differential analysis of tracer metabolomics data comparing consecutive time-points (by DIMet)
4 </description>
5 <macros>
6 <token name="@TOOL_LABEL@">timecourse analysis</token>
7 <token name="@EXECUTABLE@">timecourse_analysis</token>
8 <import>macros.xml</import>
9 </macros>
10 <expand macro="requirements"/>
11 <command detect_errors="exit_code"><![CDATA[
12 @INIT_CONFIG@
13 @INIT_DIFF_ANALYSIS@
14 @INIT_STAT_TEST@
15 @INIT_GROUPS@
16 @INIT_CONDITIONS_TIMECOURSE@
17 HYDRA_FULL_ERROR=1 python -m dimet
18 '++hydra.run.dir=.'
19 '++figure_path=figures'
20 '++table_path=tables'
21 '++analysis={
22 dataset:{
23 _target_: dimet.data.DatasetConfig,
24 name: "I am a synthetic data example"
25 },
26 method:{
27 _target_: dimet.method.TimeCourseAnalysisConfig,
28 label: "time_course_analysis",
29 name: "Time wise computation of statistical differences"
30 },
31 label: time_course_analysis-example
32 }'
33 '++analysis.method.qualityDistanceOverSpan='${qualityDistanceOverSpan}''
34 '++analysis.statistical_test=${statistical_test}'
35 '++analysis.method.statistical_test=${statistical_test}'
36 '++analysis.method.grouping=${groups}'
37 '++analysis.method.correction_method=${correction_method}'
38 '++analysis.method.disfit_tail_option="auto"'
39 '++analysis.method.impute_values=${impute_values}'
40 '++analysis.dataset.subfolder='
41 '++analysis.dataset.label='
42 '++analysis.dataset.conditions=${conditions}'
43 #if $metadata_path:
44 '++analysis.dataset.metadata=metadata'
45 #end if
46 #if str( $data_input.data_input_selector ) == "abundance":
47 #if $data_input.abundance_file:
48 '++analysis.dataset.abundances=abundance'
49 #end if
50 #elif str( $data_input.data_input_selector ) == "mean_enrichment":
51 #if $data_input.me_or_frac_contrib_file:
52 '++analysis.dataset.mean_enrichment=me_or_frac_contrib'
53 #end if
54 #elif str( $data_input.data_input_selector ) == "isotop_prop":
55 #if $data_input.isotop_prop_file:
56 '++analysis.dataset.isotopologue_proportions=isotop_prop'
57 #end if
58 #else
59 #if $data_input.isotop_abs_file:
60 '+analysis.dataset.isotopologues=isotop_abs'
61 #end if
62 #end if
63 @REMOVE_CONFIG@
64 ]]></command>
65 <inputs>
66 <expand macro="input_parameters_diff_analysis"/>
67 <expand macro="factor_list"/>
68 <expand macro="correction_method"/>
69 <param name="qualityDistanceOverSpan" type="float" min="-1.0" max="-0.1" value="-0.3" label="quality Distance Over Span" help="Default value is -0.3."/>
70 </inputs>
71 <outputs>
72 <collection name="report" type="list">
73 <discover_datasets pattern="__designation__" directory="tables" format="tabular"/>
74 </collection>
75 </outputs>
76 <tests>
77 <test>
78 <param name="data_input_selector" value="abundance" />
79 <param name="abundance_file" ftype="tabular" value="rawAbundances.csv"/>
80 <param name="metadata_path" ftype="tabular" value="example2_metadata.csv"/>
81 <param name="correction_method" value="bonferroni"/>
82 <param name="qualityDistanceOverSpan" value="-0.3"/>
83 <param name="statistical_test_type" value="parametric"/>
84 <param name="stat_test" value="Tt"/>
85 <repeat name="plot_factor_list">
86 <param name="condition" value="Control"/>
87 </repeat>
88 <repeat name="plot_factor_list">
89 <param name="condition" value="L-Cycloserine"/>
90 </repeat>
91 <output_collection name="report" type="list" count="4">
92 <element file="abundance--cell-Control-T2h-Control-T0-Tt.tsv" name="abundance--cell-Control-T2h-Control-T0-Tt.tsv" ftype="tabular"/>
93 <element file="abundance--cell-L-Cycloserine-T2h-L-Cycloserine-T0-Tt.tsv" name="abundance--cell-L-Cycloserine-T2h-L-Cycloserine-T0-Tt.tsv" ftype="tabular"/>
94 <element file="abundance--med-Control-T2h-Control-T0-Tt.tsv" name="abundance--med-Control-T2h-Control-T0-Tt.tsv" ftype="tabular"/>
95 <element file="abundance--med-L-Cycloserine-T2h-L-Cycloserine-T0-Tt.tsv" name="abundance--med-L-Cycloserine-T2h-L-Cycloserine-T0-Tt.tsv" ftype="tabular"/>
96 </output_collection>
97
98 </test>
99 </tests>
100 <help><![CDATA[
101 This module is part of DIMet: Differential analysis of Isotope-labeled targeted Metabolomics data (https://pypi.org/project/DIMet/).
102
103 **Input data files**
104
105 This tool performs a time course differential analysis on your time series data.
106 For illustration see the section **Metadata File Information** which contains several time points.
107
108 This time course differential analysis is sequential: by each individual condition, a comparison between the timepoints t_x+1 vs t_x
109 (e.g. [Control, 90min] vs [Control, 60min]), for all the timepoints present in the data.
110 Our tool automatically detects the conditions and timepoints, and automatically organizes the comparisons
111 (you do not need to set this part yourself, DIMet does it for you).
112
113 Note that if you need only to compare specific [condition, timepoint] pairs not comprised by
114 our automatic time course analysis, you can use the differential analysis in the pairwise mode instead.
115
116
117 This tool requires (at max.) 5 tab-delimited .csv files as inputs. There are two types of files:
118
119 - The measures' (or quantifications') files, that can be of 4 types.
120
121 - The metadata, a unique file with the description of the samples in your measures' files. This is compulsory.
122
123 For running DIMet @EXECUTABLE@ you need **at least one** file of measures:
124
125 - The total **abundances** (of the metabolites) file
126
127 - The mean **enrichment** or labelled fractional contributions
128
129 - The **isotopologues** absolute values files (optional)
130
131 - The **isotopologue proportions** file (optional)
132
133 and one metadata file, WHICH IS COMPULSORY, see section **Metadata File Information**.
134
135 The measure's files must be organized as matrices:
136
137 - The first column must contain Metabolite IDs that are unique (not repeated) within the file.
138
139 - The rest of the columns correspond to the samples
140
141 - The rows correspond to the metabolites
142
143 - The values must be tab separated, with the first row containing the sample/column labels.
144
145 See the following examples of measures' files:
146
147
148 Example - Metabolites **abundances**:
149
150 =============== ================== ================== ================== ================== ================== ==================
151 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
152 =============== ================== ================== ================== ================== ================== ==================
153 2_3-PG 8698823.9926 10718737.7217 10724373.9 8536484.5 22060650 28898956
154 2-OHGLu 36924336 424336 92060650 45165 84951950 965165051
155 Glc6P 2310 2142 2683 1683 012532068 1252172
156 Gly3P 399298 991656565 525195 6365231 89451625 4952651963
157 IsoCit 0 0 0 84915613 856236 954651610
158 =============== ================== ================== ================== ================== ================== ==================
159
160 Example - mean **enrichment** or labeled fractional contributions:
161
162 =============== ================== ================== ================== ================== ================== ==================
163 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
164 =============== ================== ================== ================== ================== ================== ==================
165 2_3-PG 0.9711 0.968 0.9909 0.991 0.40 0.9
166 2-OHGLu 0.01719 0.0246 0.554 0.555 0.73 0.68
167 Glc6P 0.06 0.66 2683 0.06 2068 2172
168 Gly3P 0.06 0.06 0.06 1 5 3
169 IsoCit 0.06 1 0.49 0.36 6 10
170 =============== ================== ================== ================== ================== ================== ==================
171
172 Example - **Isotopologues**
173
174 =============== ================== ================== ================== ================== ================== ==================
175 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
176 =============== ================== ================== ================== ================== ================== ==================
177 2_3-PG_m+0 206171.4626 285834.0353 36413.27637 27367.17784 6171.4626 119999
178 2_3-PG_m+1 123 432 101 127 206171.4626 119999
179 2_3-PG_m+2 133780.182 161461.2364 182631.3947 132170.3807 358749.348 848754.36
180 2_3-PG_m+3 8358749.348 10271010.45 10505228.3 8376820.028 62163.30727 1088.8963
181 2-OHGLu_m+0 5550339.322 6072872.833 3855047.791 3216178.72 8358749.348 10271010.45
182 2-OHGLu_m+1 0.0 0.0 0.0 0.0 206171.4626 285834.0353
183 =============== ================== ================== ================== ================== ================== ==================
184
185
186 Example - **Isotopologue proportions**:
187
188 =============== ================== ================== ================== ================== ================== ==================
189 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
190 =============== ================== ================== ================== ================== ================== ==================
191 2_3-PG_m+0 0.023701408 0.026667837 0.003395407 0.05955 0.034383527 0.12
192 2_3-PG_m+1 0.0 0.0 0.0 0.0 0.4 0.12
193 2_3-PG_m+2 0.015379329 0.01506 0.017029723 0.35483229 0.54131313 0.743
194 2_3-PG_m+3 0.960919263 0.958268099 0.97957487 0.581310816 0.017029723 0.017
195 2-OHGLu_m+0 0.972778716 0.960016157 0.238843937 0.234383527 0.9998888 0.015064063
196 2-OHGLu_m+1 0.0 0.0 0.0 0.0 0.0001112 0.960919263
197 =============== ================== ================== ================== ================== ================== ==================
198
199
200
201 **Metadata File Information**
202
203 Provide a tab-separated file that has the names of the samples in the first column and one header row.
204 Column names must be exactly in this order:
205
206 name_to_plot
207 condition
208 timepoint
209 timenum
210 compartment
211 original_name
212
213 Example **Metadata File**:
214
215
216 ==================== =============== ============= ============ ================ =================
217 **name_to_plot** **condition** **timepoint** **timenum** **compartment** **original_name**
218 -------------------- --------------- ------------- ------------ ---------------- -----------------
219 Spleen1_cell_0-1 Spleen1 0min 0 cell MCF001089_TD01
220 Spleen1_cell_0-2 Spleen1 0min 0 cell MCF001089_TD02
221 Spleen1_cell_10-1 Spleen1 10min 10 cell MCF001089_TD03
222 Spleen1_cell_10-2 Spleen1 10min 10 cell MCF001089_TD04
223 Spleen1_cell_30-1 Spleen1 30min 30 cell MCF001089_TD05
224 Spleen1_cell_30-2 Spleen1 30min 30 cell MCF001089_TD06
225 Spleen1_cell_60-1 Spleen1 60min 60 cell MCF001089_TD07
226 Spleen1_cell_60-2 Spleen1 60min 60 cell MCF001089_TD08
227 Spleen1_cell_90-1 Spleen1 90min 90 cell MCF001089_TD09
228 Spleen1_cell_90-2 Spleen1 90min 90 cell MCF001089_TD011
229 Spleen1_med_30-3 Spleen1 30min 30 med MCF001089_TD025
230 Spleen1_med_30-2 Spleen1 30min 30 med MCF001089_TD023
231 ==================== =============== ============= ============ ================ =================
232
233
234 The column **original_name** must have the names of the samples as given in your data.
235
236 The column **name_to_plot** must have the names as you want them to be (or set identical to original_name if you prefer). To set names that are meaningful is a better choice, as we will take them to display the results.
237
238 The column **timenum** must contain only the numeric part of the timepoint, for example 2,0, 10, 100 (this means, without letters ("T", "t", "s", "h" etc) nor any other symbol). Make sure these time numbers are in the same units (but do not write the units here!).
239
240 The column **compartment** is an abbreviation, coined by you, for the compartments. This will be used for the results' files names: the longer the compartments names are, the longer the output files' names! Please pick short and clear abbreviations to fill this column.
241
242
243
244 **Running the analysis**
245
246
247
248 You can precise how you want your analysis to be executed, with the parameters:
249
250 - **datatypes** : the measures type(s) that you want to run
251
252 - **statistical_test** : choose, by type of measure, the specific statistical test to be applied.
253
254 Kruskal-Wallis, Mann-Whitney, Wilcoxon’s signed rank test, Wilcoxon’s rank sum test
255 t-test, and permutation test are currently offered (we use the trusted functions from scipy library https://docs.scipy.org/doc/scipy/reference/stats.html).
256
257 For the permutation test, we have established as test statistic, the absolute difference of geometric means of the two compared groups.
258
259 - **qualityDistanceOverSpan**: a normalized distance between the intervals of values of the compared groups, that is the cutoff for considering a minimal acceptable "separation". A 'distance/span' == 1 is a perfect separation, whereas if 'distance/span' < 0 there is no separation. To use with caution in case of important dispersion of your intra-group values. Default is -0.3 (not stringent)
260
261 - **correction_method** : one of the methods for multiple testing correction available in statsmodels library (bonferroni, fdr_bh, sidak, among others, see https://www.statsmodels.org/dev/generated/statsmodels.stats.multitest.multipletests.html).
262
263 There exist hints on use that will guide you, next to the parameters.
264
265 The output consists of tables with the computed metrics, one by each pair of timepoints compared.
266 The number of output tables = number-of-conditions x (number-of-timepoints)-1 x number-of-compartments.
267
268 For more information about the implemented statistical tests, please visit: https://github.com/cbib/DIMet/wiki/2-Statistical-tests
269
270 The output files are explained in https://github.com/cbib/DIMet/wiki/3-Output
271
272 **Available data for testing**
273
274 You can test our tool with the data from our manuscript https://zenodo.org/record/10579862 (the pertinent
275 files for you are located in the subfolders inside the data folder).
276 You can also use the minimal data examples from https://zenodo.org/record/10579891
277
278 ]]>
279 </help>
280 <expand macro="citations" />
281 </tool>