comparison dimet_pca_plot.xml @ 0:58498838f0e0 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/DIMet commit 3dba8748fbc8cc8e89ffc08e5febe0a0527a96a5
author iuc
date Fri, 21 Jun 2024 18:47:21 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:58498838f0e0
1 <tool id="dimet_@EXECUTABLE@" name="dimet @TOOL_LABEL@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
2 <description>
3 Figures of Principal Component Analysis for tracer metabolomics (by DIMet)
4 </description>
5 <macros>
6 <token name="@TOOL_LABEL@">pca plot</token>
7 <token name="@EXECUTABLE@">pca_plot</token>
8 <import>macros.xml</import>
9 </macros>
10 <expand macro="requirements"/>
11 <command detect_errors="exit_code"><![CDATA[
12 @INIT_CONFIG@
13 @INIT_PCA@
14 @INIT_IMPUTE_VALUES@
15 @INIT_CONDITIONS@
16 HYDRA_FULL_ERROR=1 python -m dimet
17 '++hydra.run.dir=.'
18 '++figure_path=figures'
19 '++table_path=tables'
20 '++analysis={
21 dataset:{
22 _target_:dimet.data.DatasetConfig,
23 name: "Galaxy DIMet run"
24 },
25 method:{
26 _target_: dimet.method.PcaPlotConfig,
27 label: pca-plot,
28 name: "Generate Principal Component Analysis plots",
29 pca_split_further:['timepoint'],
30 draw_ellipses: null,
31 run_iris_demo: false,
32 color: condition,
33 style: timepoint,
34 figure_format:${output_options.figure_format},
35 impute_values:${impute_values}
36 },
37 label: pca-plot
38 }'
39 '++analysis.dataset.subfolder='
40 '++analysis.dataset.label='
41 '++analysis.dataset.conditions=${conds}'
42 #if $metadata_path:
43 '++analysis.dataset.metadata=metadata'
44 #end if
45 #if $abundance_file:
46 '++analysis.dataset.abundances=abundance'
47 #end if
48 #if $me_or_frac_contrib_file:
49 '++analysis.dataset.mean_enrichment=me_or_frac_contrib'
50 #end if
51 @REMOVE_CONFIG@
52 ]]></command>
53 <inputs>
54 <expand macro="input_parameters_pca"/>
55 <expand macro="conditions"/>
56 <section name="output_options" title="Output options">
57 <param name="figure_format" type="select" value="pdf" display="radio" label="Select output figure format" help="Please enter at max 1 format">
58 <option value="pdf">Pdf</option>
59 <option value="svg">Svg</option>
60 </param>
61 </section>
62 </inputs>
63
64 <outputs>
65 <collection name="report" type="list">
66 <discover_datasets pattern="__designation_and_ext__" directory="figures"/>
67 </collection>
68 </outputs>
69 <tests>
70 <test>
71 <param name="abundance_file" ftype="tabular" value="rawAbundances.csv" />
72 <param name="metadata_path" ftype="tabular" value="example2_metadata.csv"/>
73 <param name="conditions" value='Control,L-Cycloserine'/>
74 <section name="output_options">
75 <param name="figure_format" value="svg"/>
76 </section>
77 <output_collection name="report" type="list" count="18">
78 <element file="abundances--T0--cell_var.svg" name="abundances--T0--cell_var" ftype="svg" compare="sim_size" delta="100"/>
79 <element file="abundances--T0--cell--label-n_pc.svg" name="abundances--T0--cell--label-n_pc" ftype="svg" compare="sim_size" delta="100"/>
80 <element file="abundances--T0--cell--label-y_pc.svg" name="abundances--T0--cell--label-y_pc" ftype="svg" compare="sim_size" delta="100"/>
81 <element file="abundances--T0--med_var.svg" name="abundances--T0--med_var" ftype="svg" compare="sim_size" delta="100"/>
82 <element file="abundances--T0--med--label-n_pc.svg" name="abundances--T0--med--label-n_pc" ftype="svg" compare="sim_size" delta="100"/>
83 <element file="abundances--T0--med--label-y_pc.svg" name="abundances--T0--med--label-y_pc" ftype="svg" compare="sim_size" delta="100"/>
84 <element file="abundances--T2h--cell--label-n_pc.svg" name="abundances--T2h--cell--label-n_pc" ftype="svg" compare="sim_size" delta="100"/>
85 <element file="abundances--T2h--cell--label-y_pc.svg" name="abundances--T2h--cell--label-y_pc" ftype="svg" compare="sim_size" delta="100"/>
86 <element file="abundances--T2h--cell_var.svg" name="abundances--T2h--cell_var" ftype="svg" compare="sim_size" delta="100"/>
87 <element file="abundances--T2h--med--label-n_pc.svg" name="abundances--T2h--med--label-n_pc" ftype="svg" compare="sim_size" delta="100"/>
88 <element file="abundances--T2h--med--label-y_pc.svg" name="abundances--T2h--med--label-y_pc" ftype="svg" compare="sim_size" delta="100"/>
89 <element file="abundances--T2h--med_var.svg" name="abundances--T2h--med_var" ftype="svg" compare="sim_size" delta="100"/>
90 <element file="abundances--cell--label-n_pc.svg" name="abundances--cell--label-n_pc" ftype="svg" compare="sim_size" delta="100"/>
91 <element file="abundances--cell--label-y_pc.svg" name="abundances--cell--label-y_pc" ftype="svg" compare="sim_size" delta="100"/>
92 <element file="abundances--cell_var.svg" name="abundances--cell_var" ftype="svg" compare="sim_size" delta="100"/>
93 <element file="abundances--med--label-n_pc.svg" name="abundances--med--label-n_pc" ftype="svg" compare="sim_size" delta="100"/>
94 <element file="abundances--med--label-y_pc.svg" name="abundances--med--label-y_pc" ftype="svg" compare="sim_size" delta="100"/>
95 <element file="abundances--med_var.svg" name="abundances--med_var" ftype="svg" compare="sim_size" delta="100"/>
96 </output_collection>
97 </test>
98 </tests>
99 <help><![CDATA[
100 This module is part of DIMet: Differential analysis of Isotope-labeled targeted Metabolomics data (https://pypi.org/project/DIMet/).
101
102 This tool performs the Principal Components Analysis (PCA) on your data,
103 generating the figures with the results of the PCA, it is, the scatter-plot of the two first principal components or dimensions (eigenvalues), and the barplot of the percentage of explained variances across all the principal components detected in your data.
104
105 The figures in .pdf format are of publication quality, and as they are vectorial images you can open them and customize aesthetics with a professional image software such as Inkscape, Adobe Illustrator, Sketch, CorelDRAW, etc.
106
107
108 **Input data files**
109
110 This tool requires (at max.) 3 tab-delimited .csv files as inputs. There are two types of files:
111
112 - The measures' (or quantifications') files, that can be of 4 types.
113
114 - The metadata, a unique file with the description of the samples in your measures' files. This is compulsory.
115
116 For running DIMet @EXECUTABLE@ you need **at least one file** of measures:
117
118 - The total **abundances** (of the metabolites) file
119
120 - The mean **enrichment** or labelled fractional contributions
121
122
123 and one metadata file, WHICH IS COMPULSORY, see section **Metadata File Information**.
124
125
126 **Measures' files**
127
128 The measure's files must be organized as matrices:
129
130 - The first column must contain Metabolite IDs that are unique (not repeated) within the file.
131
132 - The rest of the columns correspond to the samples
133
134 - The rows correspond to the metabolites
135
136 - The values must be tab separated, with the first row containing the sample/column labels.
137
138 See the following examples of measures files:
139
140
141 Example - Metabolites **abundances**:
142
143 =============== ================== ================== ================== ================== ================== ==================
144 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
145 =============== ================== ================== ================== ================== ================== ==================
146 2_3-PG 8698823.9926 10718737.7217 10724373.9 8536484.5 22060650 28898956
147 2-OHGLu 36924336 424336 92060650 45165 84951950 965165051
148 Glc6P 2310 2142 2683 1683 012532068 1252172
149 Gly3P 399298 991656565 525195 6365231 89451625 4952651963
150 IsoCit 0 0 0 84915613 856236 954651610
151 =============== ================== ================== ================== ================== ================== ==================
152
153 Example - mean **enrichment** or labeled fractional contributions:
154
155 =============== ================== ================== ================== ================== ================== ==================
156 ID **MCF001089_TD01** **MCF001089_TD02** **MCF001089_TD03** **MCF001089_TD04** **MCF001089_TD05** **MCF001089_TD06**
157 =============== ================== ================== ================== ================== ================== ==================
158 2_3-PG 0.9711 0.968 0.9909 0.991 0.40 0.9
159 2-OHGLu 0.01719 0.0246 0.554 0.555 0.73 0.68
160 Glc6P 0.06 0.66 2683 0.06 2068 2172
161 Gly3P 0.06 0.06 0.06 1 5 3
162 IsoCit 0.06 1 0.49 0.36 6 10
163 =============== ================== ================== ================== ================== ================== ==================
164
165
166 **Metadata File Information**
167
168 Provide a tab-separated file that has the names of the samples in the first column and one header row.
169 Column names must be exactly in this order:
170
171 name_to_plot
172 condition
173 timepoint
174 timenum
175 compartment
176 original_name
177
178
179 Example **Metadata File**:
180
181
182 ==================== =============== ============= ============ ================ =================
183 **name_to_plot** **condition** **timepoint** **timenum** **compartment** **original_name**
184 -------------------- --------------- ------------- ------------ ---------------- -----------------
185 Control_cell_T0-1 Control T0 0 cell MCF001089_TD01
186 Control_cell_T0-2 Control T0 0 cell MCF001089_TD02
187 Control_cell_T0-3 Control T0 0 cell MCF001089_TD03
188 Tumoral_cell_T0-1 Tumoral T0 0 cell MCF001089_TD04
189 Tumoral_cell_T0-2 Tumoral T0 0 cell MCF001089_TD05
190 Tumoral_cell_T0-3 Tumoral T0 0 cell MCF001089_TD06
191 Tumoral_cell_T24-1 Tumoral T24 24 cell MCF001089_TD07
192 Tumoral_cell_T24-2 Tumoral T24 24 cell MCF001089_TD08
193 Tumoral_cell_T24-3 Tumoral T24 24 cell MCF001090_TD01
194 Control_med_T24-1 Control T24 24 med MCF001090_TD02
195 Control_med_T24-2 Control T24 24 med MCF001090_TD03
196 Tumoral_med_T24-1 Tumoral T24 24 med MCF001090_TD04
197 Tumoral_med_T24-2 Tumoral T24 24 med MCF001090_TD05
198 Control_med_T0-1 Control T0 0 med MCF001090_TD06
199 Tumoral_med_T0-1 Tumoral T0 0 med MCF001090_TD07
200 Tumoral_med_T0-2 Tumoral T0 0 med MCF001090_TD08
201 ==================== =============== ============= ============ ================ =================
202
203
204 The column **original_name** must have the names of the samples as given in your data.
205
206 The column **name_to_plot** must have the names as you want them to be (or set identical to original_name if you prefer). To set names that
207 are meaningful is a better choice, as we will take them to display the results.
208
209 The column **timenum** must contain only the numeric part of the timepoint, for example 2,0, 10, 100 (this means, without letters ("T", "t", "s", "h" etc)
210 nor any other symbol). Make sure these time numbers are in the same units (but do not write the units here!).
211
212 The column **compartment** is an abbreviation, coined by you, for the compartments. This will be used for the results' files names: the longer the
213 compartments names are, the longer the output files' names! Please pick short and clear abbreviations to fill this column.
214
215
216 **Running the analysis**
217
218 You can precise how you want your analysis to be executed, there exist hints on use that will guide you, next to the parameters.
219
220 Our tool automatically processes the integrality of your data (global PCA plots), and also splits your data by timepoint to generate PCA plots by timepoint (which is convenient to explore the "grouping" of conditions), but if you only have one condition you can discard them.
221
222 The output, for the global PCA, and for each timepoint PCA, consists of :
223
224 - a scatter-plot of the first two Principal Components (PC), with labels corresponding to 'name_to_plot'.
225
226 - the same scatter-plot as above but without labels.
227
228 - a bar-plot of the percentage of explained variances.
229
230
231
232 **Available data for testing**
233
234 You can test our tool with the data from our manuscript https://zenodo.org/record/10579862 (the pertinent
235 files for you are located in the subfolders inside the data folder).
236 You can also use the minimal data examples from https://zenodo.org/record/10579891
237
238 ]]>
239 </help>
240 <expand macro="citations" />
241 </tool>