Mercurial > repos > recetox > ramclustr
comparison macros.xml @ 5:10ded21d47c0 draft
"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/ramclustr commit 3d2821ffc97cc4f9287ee83bbddb306a8034daa0"
author | recetox |
---|---|
date | Fri, 11 Feb 2022 14:14:38 +0000 |
parents | 69e0da4703b5 |
children | f01ab6fe8857 |
comparison
equal
deleted
inserted
replaced
4:69e0da4703b5 | 5:10ded21d47c0 |
---|---|
30 </creator> | 30 </creator> |
31 </xml> | 31 </xml> |
32 | 32 |
33 <xml name="parameters_csv"> | 33 <xml name="parameters_csv"> |
34 <section name="ms_csv" title="Input MS Data as CSV" expanded="true"> | 34 <section name="ms_csv" title="Input MS Data as CSV" expanded="true"> |
35 <param label="ms" name="ms" type="data" format="csv" | 35 <param label="Input CSV" name="ms" type="data" format="csv" |
36 help="Features as columns, rows as samples. Column header mz_rt"/> | 36 help="Features as columns, rows as samples. Column header in format mz_rt."/> |
37 <param label="idmsms" name="idmsms" type="data" format="csv" optional="true" | 37 <param label="idMSMS" name="idmsms" type="data" format="csv" optional="true" |
38 help="Optional idMSMS / MSe csv data. same dim and names as ms required"/> | 38 help="Optional idMSMS / MSe csv data. Same dimension and names as in input CSV are required."/> |
39 <param label="sample_name_column" name="sample_name_column" type="integer" value="1" | 39 </section> |
40 help="Which column from the csv file contains sample names?"/> | 40 </xml> |
41 <param label="feature_delimiter" name="feature_delimiter" type="text" value="_" | 41 |
42 help="Only required if ms input is set! How feature mz and rt are delimited in csv import column header e.g. ='-'"/> | 42 <xml name="parameters_xcms"> |
43 <param label="retention_time_column" name="retention_time_column" type="integer" value="2" | 43 <section name="xcms" title="Input MS Data as XCMS" expanded="true"> |
44 help="Which position in delimited column header represents the retention time (csv only)"/> | 44 <param name="input_xcms" label="Input XCMS" type="data" format="rdata.xcms.fillpeaks" |
45 <param label="st" name="st" type="float" value="1" help="Sigma t - time similarity decay value. | 45 help="Grouped feature data for clustering." /> |
46 A recommended starting point is half the value of your average chromatographic peak width at half max (seconds))."/> | 46 <param label="Preserve phenotype" name="usePheno" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" |
47 </section> | 47 help="Transfer phenotype data from XCMS object to Spec abundance file."/> |
48 </xml> | 48 </section> |
49 | |
50 <xml name="parameters_excluded"> | |
51 <param label="MStag" name="MStag" type="text" optional="true" | |
52 help="Character string in 'taglocation' to designat MS / MSe files e.g. '01.cdf'"/> | |
53 <param label="idMSMStag" name="idMSMStag" type="text" optional="true" | |
54 help="Character string in 'taglocation' to designat idMSMS / MSe files e.g. '02.cdf'"/> | |
55 <param label="taglocation" name="taglocation" type="text" value="filepaths" | |
56 help="'filepaths' by default, 'phenoData[,1]' is another option. refers to xcms slot"/> | |
57 </xml> | 49 </xml> |
58 | 50 |
59 <xml name="parameters_required"> | 51 <xml name="parameters_required"> |
60 <section name="required" title="Required Parameters" expanded="true"> | 52 <param label="Sigma r" name="sr" type="float" value="0.5" help="Correlational similarity between features."/> |
61 <param label="sr" name="sr" type="float" value="0.5" help="Sigma r - correlational similarity decay value"/> | 53 <param label="Correlation method" name="cor_method" type="select" display="radio" |
62 <param label="deepSplit" name="deepSplit" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" | 54 help="Choose correlational method to be used - see [1] for details."> |
63 help="Controls how agressively the HCA tree is cut - see ?cutreeDynamicTree"/> | 55 <option value="pearson" selected="true">pearson</option> |
64 <param label="blocksize" name="blocksize" type="integer" value="2000" | 56 <option value="everything">everything</option> |
65 help="Number of features (scans?) processed in one block =1000,"/> | 57 <option value="spearman">spearman</option> |
66 <param label="mult" name="mult" type="integer" value="5" | 58 <option value="kendall">kendall</option> |
67 help="Internal value, can be used to influence processing speed/ram usage"/> | 59 </param> |
68 <param label="hmax" name="hmax" type="float" value="0.3" | 60 <param label="Maximum RT difference" name="maxt" value="60" type="float" |
69 help="Precut the tree at this height, default 0.3 - see ?cutreeDynamicTree"/> | 61 help="Maximum difference to calculate RT similarity - values beyond this are assigned zero similarity."/> |
70 <param label="collapse" name="collapse" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" | 62 </xml> |
71 help="Reduce feature intensities to spectrum intensities?"/> | 63 |
72 <param label="usePheno" name="usePheno" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" | 64 <xml name="main_parameters"> |
73 help="Transfer phenotype data from XCMS object to SpecAbund dataset?"/> | 65 <section name="clustering" title="Clustering" expanded="true"> |
74 <!-- | 66 <param label="Clustering linkage method" name="linkage" type="select" display="radio" |
75 Currently not forwarded because the MSP is exported always manually afterwards | 67 help="Choose hierarchical clustering linkage method - see [2] for details."> |
76 <param label="mspout" name="mspout" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" help="write msp formatted spectra to file?" /> | |
77 --> | |
78 <param label="normalize" name="normalize" type="select" display="radio" | |
79 help="Either 'none', 'TIC', 'quantile', or 'batch.qc' normalization of feature intensities. see batch.qc overview in details. "> | |
80 <option value="none" selected="true">none</option> | |
81 <option value="TIC">TIC</option> | |
82 <option value="quantile">quantile</option> | |
83 <option value="batch.qc">batch.qc</option> | |
84 </param> | |
85 <param label="qc_inj_range" name="qc_inj_range" type="integer" value="20" | |
86 help="How many injections around each injection are to be scanned for presence of QC samples when | |
87 using batch.qc normalization? A good rule of thumb is between 1 and 3 times the typical | |
88 injection span between QC injections. i.e. if you inject QC ever 7 samples, set this to | |
89 between 7 and 21. Smaller values provide more local precision but make normalization sensitive | |
90 to individual poor outliers (though these are first removed using the boxplot function outlier | |
91 detection), while wider values provide less local precision in normalization but better | |
92 stability to individual peak areas."/> | |
93 | |
94 <param label="minModuleSize" name="minModuleSize" type="integer" value="2" | |
95 help="How many features must be part of a cluster to be returned? default = 2"/> | |
96 <param label="linkage" name="linkage" type="select" display="radio" value="average" | |
97 help="Hierarchical clustering linkage method - see ?hclust"> | |
98 <option value="average" selected="true">average</option> | 68 <option value="average" selected="true">average</option> |
99 <option value="ward.D">ward.D</option> | 69 <option value="ward.D">ward.D</option> |
100 <option value="ward.D2">ward.D2</option> | 70 <option value="ward.D2">ward.D2</option> |
101 <option value="single">single</option> | 71 <option value="single">single</option> |
102 <option value="complete">complete</option> | 72 <option value="complete">complete</option> |
103 <option value="mcquitty">mcquitty</option> | 73 <option value="mcquitty">mcquitty</option> |
104 <option value="median">median</option> | 74 <option value="median">median</option> |
105 <option value="centroid">centroid</option> | 75 <option value="centroid">centroid</option> |
106 </param> | 76 </param> |
107 | 77 <param label="Minimal cluster size" name="minModuleSize" type="integer" value="2" |
108 <param label="mzdec" name="mzdec" type="integer" value="3" | 78 help="Minimal size (number of features) of a cluster."/> |
109 help="Number of decimal places used in printing m/z values"/> | 79 <param label="Maximal tree height" name="hmax" type="float" value="0.3" |
110 <param label="cor_method" name="cor_method" type="select" display="radio" value="pearson" | 80 help="Cut the Hierarchical Cluster Analysis tree at this height, see [3] for details."/> |
111 help="Which correlational method used to calculate 'r' - see ?cor"> | 81 <param label="Use deepSplit" name="deepSplit" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" |
112 <option value="pearson" selected="true">pearson</option> | 82 help="Check to produce more smaller clusters, uncheck for fewer bigger clusters, see [3] for details."/> |
113 <option value="everything">everything</option> | 83 </section> |
114 <option value="spearman">spearman</option> | 84 |
115 <option value="kendall">kendall</option> | 85 <section name="normalisation" title="Normalisation" expanded="true"> |
116 </param> | 86 <conditional name="normalisation_method"> |
117 | 87 <param label="Normalisation method" name="normalize" type="select" display="radio" |
118 <param label="rt_only_low_n" name="rt_only_low_n" type="boolean" truevalue="TRUE" falsevalue="FALSE" | 88 help="Choose method for normalization of feature intensities."> |
89 <option value="none" selected="true">none</option> | |
90 <option value="TIC">TIC</option> | |
91 <option value="quantile">quantile</option> | |
92 <option value="batch.qc">batch.qc</option> | |
93 </param> | |
94 <when value="batch.qc"> | |
95 <param label="Metadata details" name="batch_order_qc" type="data" format="csv" optional="true" | |
96 help="CSV with sample names (or indices, currently not handled) on rows and columns with: | |
97 batch number ('batch'), position in sequence ('order'), and whether it is a QC sample or not | |
98 ('qc' with true/false OR 'sampleType' with 'sample/qc/blank')."/> | |
99 <param label="QC injection range" name="qc_inj_range" type="integer" value="20" | |
100 help="How many injections around each injection are to be scanned for presence of QC samples? | |
101 A good rule of thumb is between 1 and 3 times the typical | |
102 injection span between QC injections. i.e. if you inject QC ever 7 samples, set this to | |
103 between 7 and 21. Smaller values provide more local precision but make normalization sensitive | |
104 to individual poor outliers (though these are first removed using the boxplot function outlier | |
105 detection), while wider values provide less local precision in normalization but better | |
106 stability to individual peak areas."/> | |
107 </when> | |
108 </conditional> | |
109 </section> | |
110 | |
111 <section name="performance" title="Performance"> | |
112 <param label="Blocksize" name="blocksize" type="integer" value="2000" | |
113 help="Number of features processed in one block."/> | |
114 <param label="Blocksize factor" name="mult" type="integer" value="5" | |
115 help="Factor to scale blocksize to influence processing speed."/> | |
116 </section> | |
117 | |
118 <section name="msp_output_details" title="MSP output"> | |
119 <param label="Merge MSP Files" name="merge_msp" type="boolean" truevalue="TRUE" falsevalue="FALSE" | |
120 checked="true" help="Merge all MSP in one file or export one MSP per spectra."/> | |
121 <param label="m/z decimal places" name="mzdec" type="integer" value="6" | |
122 help="Number of decimal places used in printing m/z values."/> | |
123 <!-- | |
124 Currently not forwarded because the MSP is exported always manually afterwards | |
125 <param label="mspout" name="mspout" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" help="write msp formatted spectra to file?" /> | |
126 --> | |
127 </section> | |
128 | |
129 <section name="extras" title="Extras"> | |
130 <param label="RT only low n" name="rt_only_low_n" type="boolean" truevalue="TRUE" falsevalue="FALSE" | |
119 checked="true" | 131 checked="true" |
120 help="At low injection numbers, correlational relationships of peak intensities may be unreliable. | 132 help="At low injection numbers, correlational relationships of peak intensities may be unreliable. |
121 By defualt ramclustR will simply ignore the correlational r value and cluster on retention time alone. | 133 By default, RAMClustR will simply ignore the correlational Sigma r value and cluster on retention time alone. |
122 If you wish to use correlation with at n less than 5, set this value to FALSE."/> | 134 If you wish to use correlation with at n less than 5, set this value to FALSE."/> |
123 <param label="replace_zeros" name="replace_zeros" type="boolean" truevalue="TRUE" falsevalue="FALSE" | 135 <param label="Replace zeros" name="replace_zeros" type="boolean" truevalue="TRUE" falsevalue="FALSE" |
124 checked="true" | 136 checked="true" |
125 help="NA, NaN, and Inf values are replaced with zero, and zero values are sometimes returned from | 137 help="NA, NaN, and Inf values are replaced with zero, and zero values are sometimes returned from |
126 peak peaking. When TRUE, zero values will be replaced with a small amount of noise, with noise level | 138 peak peaking. When TRUE, zero values will be replaced with a small amount of noise, with noise level |
127 set based on the detected signal intensities for that feature. "/> | 139 set based on the detected signal intensities for that feature."/> |
128 <param label="Merge MSP Files" name="merge_msp" type="boolean" truevalue="TRUE" falsevalue="FALSE" | 140 <param label="Experimental design metadata" name="ExpDes" type="data" format="csv" optional="true" |
129 checked="true" help="Whether to merge all msp in one file or export one msp per spectra"/> | 141 help="Definition of experimental design in CSV format." /> |
130 </section> | |
131 </xml> | |
132 | |
133 <xml name="parameters_optional_xcms"> | |
134 <section name="optional" title="Optional Parameters" expanded="false"> | |
135 <param label="st" name="st" type="float" optional="true" help="Sigma t - time similarity decay value. | |
136 A recommended starting point is half the value of your average chromatographic peak width at half max (seconds))."/> | |
137 <param label="fftempdir" name="fftempdir" type="text" optional="true" | |
138 help="Valid path: if there are file size limitations on the default ff pacakge temp directory - | |
139 getOptions('fftempdir') - you can change the directory used as the fftempdir with this option."/> | |
140 <param label="maxt" name="maxt" type="integer" optional="true" | |
141 help="Maximum time difference to calculate retention similarity for - all values beyond this are assigned similarity of zero"/> | |
142 </section> | |
143 </xml> | |
144 | |
145 <xml name="parameters_optional_csv"> | |
146 <section name="optional" title="Optional Parameters" expanded="false"> | |
147 <param label="fftempdir" name="fftempdir" type="text" optional="true" | |
148 help="Valid path: if there are file size limitations on the default ff pacakge temp directory - | |
149 getOptions('fftempdir') - you can change the directory used as the fftempdir with this option."/> | |
150 <param label="maxt" name="maxt" type="integer" optional="true" | |
151 help="Maximum time difference to calculate retention similarity for - all values beyond this are assigned similarity of zero"/> | |
152 </section> | |
153 </xml> | |
154 | |
155 <xml name="parameters_optional_metadata"> | |
156 <section name="metadata" title="Optional Metadata" expanded="false"> | |
157 <param label="metadata" name="batch_order_qc" type="data" format="csv" optional="true" | |
158 help="CSV with sample names (or indices, currently not handled) on rows and columns with: batch | |
159 number ('batch'), position in sequence ('order') and whether it is a qc sample or not | |
160 ('qc' with true/false OR 'sampleType' with 'sample/qc/blank')."/> | |
161 <param label="Experimental design" name="ExpDes" type="data" format="csv" optional="true" help="Definition of experimental design in CSV format." /> | |
162 </section> | 142 </section> |
163 </xml> | 143 </xml> |
164 | 144 |
165 <xml name="output_msp"> | 145 <xml name="output_msp"> |
166 <collection label="Mass spectra from ${tool.name} on ${on_string}" name="mass_spectra" type="list"> | 146 <collection label="Mass spectra from ${tool.name} on ${on_string}" name="mass_spectra_collection" type="list"> |
167 <discover_datasets pattern="__name_and_ext__" directory="spectra" recurse="true" ext="msp"/> | 147 <discover_datasets pattern="__name_and_ext__" directory="spectra" recurse="true" ext="msp"/> |
168 </collection> | 148 <filter>not msp_output_details['merge_msp']</filter> |
149 </collection> | |
150 <data label="Mass spectra from ${tool.name} on ${on_string}" name="mass_spectra_merged" format="msp"> | |
151 <filter>msp_output_details['merge_msp']</filter> | |
152 </data> | |
169 </xml> | 153 </xml> |
170 | 154 |
171 <xml name="citations"> | 155 <xml name="citations"> |
172 <citations> | 156 <citations> |
173 <!-- Example of annotating a citation using a BibTex entry. --> | 157 <!-- Example of annotating a citation using a BibTex entry. --> |
232 | 216 |
233 (1) no more than one sample (or file) name column and one feature name row; | 217 (1) no more than one sample (or file) name column and one feature name row; |
234 (2) feature names that contain the mass and retention times, separated by a constant delimiter; and | 218 (2) feature names that contain the mass and retention times, separated by a constant delimiter; and |
235 (3) features in columns and samples in rows. | 219 (3) features in columns and samples in rows. |
236 | 220 |
221 +----------------------+-------------------+-------------------+--------------------+--------------------+ | |
222 | sample | 100.88_262.464 | 100.01_423.699 | 100.003_128.313 | 100.0057_154.686 | | |
223 +======================+===================+===================+====================+====================+ | |
224 | 10_qc_16x_dil_milliq | 0 | 195953.6376 | 0 | 0 | | |
225 +----------------------+-------------------+-------------------+--------------------+--------------------+ | |
226 | 11_qc_8x_dil_milliq | 0 | 117742.1828 | 4247300.664 | 0 | | |
227 +----------------------+-------------------+-------------------+--------------------+--------------------+ | |
228 | 12_qc_32x_dil_milliq | 4470859.38 | 0 | 2206092.112 | 0 | | |
229 +----------------------+-------------------+-------------------+--------------------+--------------------+ | |
230 | 15_qc_16x_dil_milliq | 0 | 0 | 2767477.481 | 0 | | |
231 +----------------------+-------------------+-------------------+--------------------+--------------------+ | |
232 | |
233 | |
237 Downstream Tools | 234 Downstream Tools |
235 The output is a msp file or a collection of msp files, with additional Spec Abundance file. | |
236 | |
238 +---------+--------------+----------------------+ | 237 +---------+--------------+----------------------+ |
239 | Name | Output File | Format | | 238 | Name | Output File | Format | |
240 +=========+==============+======================+ | 239 +=========+==============+======================+ |
241 | matchMS | Mass Spectra | collection (tgz/msp) | | 240 | matchMS | Mass Spectra | collection (tgz/msp) | |
242 +---------+--------------+----------------------+ | 241 +---------+--------------+----------------------+ |
264 ([M+2H]++). This can become further complicated by considering combinations of these phenomena. For | 263 ([M+2H]++). This can become further complicated by considering combinations of these phenomena. For |
265 example [2M+3H]+++ (triply charged dimer) or an in-source fragment of a dimer. | 264 example [2M+3H]+++ (triply charged dimer) or an in-source fragment of a dimer. |
266 | 265 |
267 RAMClustR approach | 266 RAMClustR approach |
268 RAMClustR was designed to group features designed from the same compound using an approach which is | 267 RAMClustR was designed to group features designed from the same compound using an approach which is |
269 __1.__ unsupervised, __2.__ platform agnosic, and __3.__ devoid of curated rules, as the depth of | 268 **1.** unsupervised, **2.** platform agnostic, and **3.** devoid of curated rules, as the depth of |
270 understanding of these processes is insufficent to enable accurate curation/prediction of all phenomenon | 269 understanding of these processes is insufficient to enable accurate curation/prediction of all phenomenon |
271 that may occur. We acheive this by making two assumptions. The first is that two features derived | 270 that may occur. We achieve this by making two assumptions. The first is that two features derived |
272 from the same compound with have (approximately) the same retention time. The second is that two | 271 from the same compound with have (approximately) the same retention time. The second is that two |
273 features derived from the same compound will have (approximately) the same quantitative trend across | 272 features derived from the same compound will have (approximately) the same quantitative trend across |
274 all samples in the xcms sample set. From these assumptions, we can calculate a retention time | 273 all samples in the xcms sample set. From these assumptions, we can calculate a retention time |
275 similarity score and a correlational similarity score for each feature pair. A high similarity score | 274 similarity score and a correlational similarity score for each feature pair. A high similarity score |
276 for both retention time and correlation indicates a strong probability that two features derive from | 275 for both retention time and correlation indicates a strong probability that two features derive from |
277 the same compound. Since both conditions must be met, the product of the two similarity scores provides | 276 the same compound. Since both conditions must be met, the product of the two similarity scores provides |
278 the best approximatio of the total similarity score - i.e. a feature pair with retention time similarity | 277 the best approximation of the total similarity score - i.e. a feature pair with retention time similarity |
279 of 1 and correlational similarity of 0 is unlikely to derive from one compound - 1 x 0 = 0, the final | 278 of 1 and correlational similarity of 0 is unlikely to derive from one compound - 1 x 0 = 0, the final |
280 similarity score is zero, indicating the two features represent two different compounds. Similarly, a | 279 similarity score is zero, indicating the two features represent two different compounds. Similarly, a |
281 feature pair with retention time similarity of 0 and correlational similarity of 1 is unlikely to derive | 280 feature pair with retention time similarity of 0 and correlational similarity of 1 is unlikely to derive |
282 from one compound - 0 x 1 = 0. Alternatively - a feature pair with retention time similarity of 1 and | 281 from one compound - 0 x 1 = 0. Alternatively - a feature pair with retention time similarity of 1 and |
283 correlational similarity of 1 is likely to derive from one compound - 1 x 1 = 1. | 282 correlational similarity of 1 is likely to derive from one compound - 1 x 1 = 1. |
284 | 283 |
285 The RAMClustR algorithm is built on creating similarity scores for all pairs of features, submitting | 284 The RAMClustR algorithm is built on creating similarity scores for all pairs of features, submitting |
286 this score matrix for heirarchical clustering, and then cutting the resulting dendrogram into neat | 285 this score matrix for hierarchical clustering, and then cutting the resulting dendrogram into neat |
287 chunks using the dynamicTreeCut package - where each 'chunk' of the dendrogram results in a group of | 286 chunks using the dynamicTreeCut package - where each 'chunk' of the dendrogram results in a group of |
288 features likely to be derived from a single compound. Importantly, this is acheived without looking for | 287 features likely to be derived from a single compound. Importantly, this is achieved without looking for |
289 specific phenomenon (i.e. sodiation), meaning that grouping can be performed on any dataset, whether it | 288 specific phenomenon (i.e. sodiation), meaning that grouping can be performed on any dataset, whether it |
290 is poisitive or negative ionization mode, EI or ESI, LC-MS GC-MS or CE-MS, in-source fragment or complex | 289 is positive or negative ionization mode, EI or ESI, LC-MS GC-MS or CE-MS, in-source fragment or complex |
291 adduction event, and predictable or unpredictable signals. | 290 adduction event, and predictable or unpredictable signals. |
292 </token> | 291 </token> |
293 | 292 |
294 <token name="@HELP_experiment@"> | 293 <token name="@HELP_experiment@"> |
295 <![CDATA[ | 294 <![CDATA[ |