comparison dimet_pca_analysis.xml @ 0:9ca0fdc9de33 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/DIMet commit 3dba8748fbc8cc8e89ffc08e5febe0a0527a96a5
author iuc
date Fri, 21 Jun 2024 18:46:21 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9ca0fdc9de33
1 <tool id="dimet_@EXECUTABLE@" name="dimet @TOOL_LABEL@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
2 <description>
3 Principal Component Analysis for tracer metabolomics data, producing tables (by DIMet)
4 </description>
5 <macros>
6 <token name="@TOOL_LABEL@">pca analysis</token>
7 <token name="@EXECUTABLE@">pca_analysis</token>
8 <import>macros.xml</import>
9 </macros>
10 <expand macro="requirements"/>
11 <command detect_errors="exit_code"><![CDATA[
12 @INIT_CONFIG@
13 @INIT_PCA@
14 @INIT_IMPUTE_VALUES@
15 @INIT_CONDITIONS@
16 HYDRA_FULL_ERROR=1 python -m dimet
17 '++hydra.run.dir=.'
18 '++figure_path=figures'
19 '++table_path=tables'
20 '++analysis={
21 dataset:{
22 _target_:dimet.data.DatasetConfig,
23 name: "Galaxy DIMet run"
24 },
25 method:{
26 _target_: dimet.method.PcaAnalysisConfig,
27 label: pca-analysis-tables,
28 name: "Generate Principal Component Analysis tables",
29 pca_split_further:['timepoint'],
30 draw_ellipses: null,
31 run_iris_demo: false,
32 impute_values:${impute_values}
33 },
34 label: pca-table
35 }'
36 '++analysis.dataset.subfolder='
37 '++analysis.dataset.label='
38 '++analysis.dataset.conditions=${conds}'
39 #if $metadata_path:
40 '++analysis.dataset.metadata=metadata'
41 #end if
42 #if $abundance_file:
43 '++analysis.dataset.abundances=abundance'
44 #end if
45 #if $me_or_frac_contrib_file:
46 '++analysis.dataset.mean_enrichment=me_or_frac_contrib'
47 #end if
48 @REMOVE_CONFIG@
49 ]]></command>
50 <inputs>
51 <expand macro="input_parameters_pca"/>
52 <expand macro="conditions"/>
53 </inputs>
54
55 <outputs>
56 <collection name="report" type="list">
57 <discover_datasets pattern="__designation__" directory="tables" format="tabular"/>
58 </collection>
59 </outputs>
60 <tests>
61 <test>
62 <param name="abundance_file" ftype="tabular" value="rawAbundances.csv" />
63 <param name="metadata_path" ftype="tabular" value="example2_metadata.csv"/>
64 <param name="conditions" value='Control,L-Cycloserine'/>
65 <output_collection name="report" type="list" count="12">
66 <element file="abundances--cell_pc.tsv" name="abundances--cell_pc.tsv" ftype="tabular"/>
67 <element file="abundances--cell_var.tsv" name="abundances--cell_var.tsv" ftype="tabular"/>
68 <element file="abundances--med_pc.tsv" name="abundances--med_pc.tsv" ftype="tabular"/>
69 <element file="abundances--med_var.tsv" name="abundances--med_var.tsv" ftype="tabular"/>
70 <element file="abundances--T0--cell_pc.tsv" name="abundances--T0--cell_pc.tsv" ftype="tabular"/>
71 <element file="abundances--T0--cell_var.tsv" name="abundances--T0--cell_var.tsv" ftype="tabular"/>
72 <element file="abundances--T0--med_pc.tsv" name="abundances--T0--med_pc.tsv" ftype="tabular"/>
73 <element file="abundances--T0--med_var.tsv" name="abundances--T0--med_var.tsv" ftype="tabular"/>
74 <element file="abundances--T2h--cell_pc.tsv" name="abundances--T2h--cell_pc.tsv" ftype="tabular"/>
75 <element file="abundances--T2h--cell_var.tsv" name="abundances--T2h--cell_var.tsv" ftype="tabular"/>
76 <element file="abundances--T2h--med_pc.tsv" name="abundances--T2h--med_pc.tsv" ftype="tabular"/>
77 <element file="abundances--T2h--med_var.tsv" name="abundances--T2h--med_var.tsv" ftype="tabular"/>
78 </output_collection>
79 </test>
80 </tests>
81 <help><![CDATA[
82 This module is part of DIMet: Differential analysis of Isotope-labeled targeted Metabolomics data (https://pypi.org/project/DIMet/).
83
84 This tool performs the Principal Components Analysis (PCA) on your data,
85 generating the tab-delimited .csv files with the results of the PCA, it is, all the principal components or "dimensions" eigenvalues, and the percentage of explained variances across all the principal components detected in your data.
86
87 For automatic plotting of a PCA analysis use our tool **DIMet pca plot**
88
89 **Input data files**
90
91 This tool requires (at max.) 3 tab-delimited .csv files as inputs. There are two types of files:
92
93 - The measures' (or quantifications') files, that can be of 4 types.
94
95 - The metadata, a unique file with the description of the samples in your measures' files. This is compulsory.
96
97 For running DIMet @EXECUTABLE@ you need **at least one file** of measures:
98
99 - The total **abundances** (of the metabolites) file
100
101 - The mean **enrichment** or labelled fractional contributions
102
103
104 and one metadata file, WHICH IS COMPULSORY, see section **Metadata File Information**.
105
106
107 **Measures' files**
108
109 The measure's files must be organized as matrices:
110
111 - The first column must contain Metabolite IDs that are unique (not repeated) within the file.
112
113 - The rest of the columns correspond to the samples
114
115 - The rows correspond to the metabolites
116
117 - The values must be tab separated, with the first row containing the sample/column labels.
118
119 See the following examples of measures files:
120
121
122 Example - Metabolites **abundances**:
123
124 =============== ================== ================== ================== ================== ================== ==================
125 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
126 =============== ================== ================== ================== ================== ================== ==================
127 2_3-PG 8698823.9926 10718737.7217 10724373.9 8536484.5 22060650 28898956
128 2-OHGLu 36924336 424336 92060650 45165 84951950 965165051
129 Glc6P 2310 2142 2683 1683 012532068 1252172
130 Gly3P 399298 991656565 525195 6365231 89451625 4952651963
131 IsoCit 0 0 0 84915613 856236 954651610
132 =============== ================== ================== ================== ================== ================== ==================
133
134 Example - mean **enrichment** or labeled fractional contributions:
135
136 =============== ================== ================== ================== ================== ================== ==================
137 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
138 =============== ================== ================== ================== ================== ================== ==================
139 2_3-PG 0.9711 0.968 0.9909 0.991 0.40 0.9
140 2-OHGLu 0.01719 0.0246 0.554 0.555 0.73 0.68
141 Glc6P 0.06 0.66 2683 0.06 2068 2172
142 Gly3P 0.06 0.06 0.06 1 5 3
143 IsoCit 0.06 1 0.49 0.36 6 10
144 =============== ================== ================== ================== ================== ================== ==================
145
146
147 **Metadata File Information**
148
149 Provide a tab-separated file that has the names of the samples in the first column and one header row.
150 Column names must be exactly in this order:
151
152 name_to_plot
153 condition
154 timepoint
155 timenum
156 compartment
157 original_name
158
159
160 Example **Metadata File**:
161
162
163 ==================== =============== ============= ============ ================ =================
164 **name_to_plot** **condition** **timepoint** **timenum** **compartment** **original_name**
165 -------------------- --------------- ------------- ------------ ---------------- -----------------
166 Control_cell_T0-1 Control T0 0 cell MCF001089_TD01
167 Control_cell_T0-2 Control T0 0 cell MCF001089_TD02
168 Control_cell_T0-3 Control T0 0 cell MCF001089_TD03
169 Tumoral_cell_T0-1 Tumoral T0 0 cell MCF001089_TD04
170 Tumoral_cell_T0-2 Tumoral T0 0 cell MCF001089_TD05
171 Tumoral_cell_T0-3 Tumoral T0 0 cell MCF001089_TD06
172 Tumoral_cell_T24-1 Tumoral T24 24 cell MCF001089_TD07
173 Tumoral_cell_T24-2 Tumoral T24 24 cell MCF001089_TD08
174 Tumoral_cell_T24-3 Tumoral T24 24 cell MCF001090_TD01
175 Control_med_T24-1 Control T24 24 med MCF001090_TD02
176 Control_med_T24-2 Control T24 24 med MCF001090_TD03
177 Tumoral_med_T24-1 Tumoral T24 24 med MCF001090_TD04
178 Tumoral_med_T24-2 Tumoral T24 24 med MCF001090_TD05
179 Control_med_T0-1 Control T0 0 med MCF001090_TD06
180 Tumoral_med_T0-1 Tumoral T0 0 med MCF001090_TD07
181 Tumoral_med_T0-2 Tumoral T0 0 med MCF001090_TD08
182 ==================== =============== ============= ============ ================ =================
183
184
185 The column **original_name** must have the names of the samples as given in your data.
186
187 The column **name_to_plot** must have the names as you want them to be (or set identical to original_name if you prefer). To set names that
188 are meaningful is a better choice, as we will take them to display the results.
189
190 The column **timenum** must contain only the numeric part of the timepoint, for example 2,0, 10, 100 (this means, without letters ("T", "t", "s", "h" etc)
191 nor any other symbol). Make sure these time numbers are in the same units (but do not write the units here!).
192
193 The column **compartment** is an abbreviation, coined by you, for the compartments. This will be used for the results' files names: the longer the
194 compartments names are, the longer the output files' names! Please pick short and clear abbreviations to fill this column.
195
196
197 **Running the analysis**
198
199 You can precise how you want your analysis to be executed, there exist hints on use that will guide you, next to the parameters.
200
201 Our tool automatically analyzes the integrality of your data (one global PCA analysis), and also splits your data by timepoint to generate PCA results by timepoint (which is convenient to explore the "grouping" of conditions), but if you only have one condition you can discard them.
202
203 The output consists of two .csv files for each performed PCA analysis (one file with the Principal Components (PC), one file with the variances).
204
205 **Available data for testing**
206
207 You can test our tool with the data from our manuscript https://zenodo.org/record/10579862 (the pertinent
208 files for you are located in the subfolders inside the data folder).
209 You can also use the minimal data examples from https://zenodo.org/record/10579891
210
211 ]]>
212 </help>
213 <expand macro="citations" />
214 </tool>