comparison spectralMatching.xml @ 0:21a19280e7ae draft

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2948ce35fa7fffe5a64711cb30be971031e79019-dirty
author tomnl
date Fri, 24 May 2019 09:09:21 -0400
parents
children 532739956f51
comparison
equal deleted inserted replaced
-1:000000000000 0:21a19280e7ae
1 <tool id="mspurity_spectralmatching" name="msPurity.spectralMatching" version="0.2.0">
2 <description>
3 Perform spectral matching to MS/MS spectral libraries
4 </description>
5
6 <macros>
7 <import>macros.xml</import>
8 </macros>
9
10 <expand macro="requirements">
11 </expand>
12
13
14 <stdio>
15 <exit_code range="1:" />
16 </stdio>
17 <command interpreter="Rscript"><![CDATA[
18 spectralMatching.R
19 --outDir=.
20 --cores=\${GALAXY_SLOTS:-4}
21
22
23 #if $Query.q_dbPth_con.q_dbPth_select == 'userdb'
24 --q_dbPth=$Query.q_dbPth_con.q_dbPth
25 #else
26 --q_defaultDb
27 #end if
28
29 #if $Library.l_dbPth_con.l_dbPth_select == 'userdb'
30 --l_dbPth=$Library.l_dbPth_con.l_dbPth
31 #else
32 --l_defaultDb
33 #end if
34
35 --q_ppmPrec=$Query.q_filters.q_ppmPrec
36 --l_ppmPrec=$Library.l_filters.l_ppmPrec
37
38 --q_ppmProd=$Query.q_filters.q_ppmProd
39 --l_ppmProd=$Library.l_filters.l_ppmProd
40
41
42 #if $Query.q_filters.q_raThres_cond.q_raThres_bool
43 --q_raThres=$Query.q_filters.q_raThres_cond.q_raThres
44 #end if
45
46 #if $Library.l_filters.l_raThres_cond.l_raThres_bool
47 --l_raThres=$Library.l_filters.l_raThres_cond.l_raThres
48 #end if
49
50 #if $Query.q_filters.q_polarity_cond.q_polarity_bool
51 --q_polarity=$Query.q_filters.q_polarity_cond.q_polarity
52 #end if
53
54 #if $Library.l_filters.l_polarity_cond.l_polarity_bool
55 --l_polarity=$Library.l_filters.l_polarity_cond.l_polarity
56 #end if
57
58 #if $Query.q_filters.q_purity_cond.q_purity_bool
59 --q_purity=$Query.q_filters.q_purity_cond.q_purity
60 #end if
61
62 #if $Library.l_filters.l_purity_cond.l_purity_bool
63 --l_purity=$Library.l_filters.l_purity_cond.l_purity
64 #end if
65
66 #if $Query.q_filters.q_xcmsGroups_cond.q_xcmsGroups_bool
67 --q_xcmsGroups=$Query.q_filters.q_xcmsGroups_cond.q_xcmsGroups
68 #end if
69
70 #if $Library.l_filters.l_xcmsGroups_cond.l_xcmsGroups_bool
71 --l_xcmsGroups=$Library.l_filters.l_xcmsGroups_cond.l_xcmsGroups
72 #end if
73
74 #if $Query.q_filters.q_pids_cond.q_pids_bool
75 --q_pids=$Query.q_filters.q_pids_cond.q_pids
76 #end if
77
78 #if $Library.l_filters.l_pids_cond.l_pids_bool
79 --l_pids=$Library.l_filters.l_pids_cond.l_pids
80 #end if
81
82 #if $Query.q_filters.q_rtrange_cond.q_rtrange_bool
83 --q_rtrangeMin=$Query.q_filters.q_rtrange_cond.q_rtrangeMin
84 --q_rtrangeMax=$Query.q_filters.q_rtrange_cond.q_rtrangeMax
85 #end if
86
87 #if $Library.l_filters.l_rtrange_cond.l_rtrange_bool
88 --l_rtrangeMin=$Library.l_filters.l_rtrange_cond.l_rtrangeMin
89 --l_rtrangeMax=$Library.l_filters.l_rtrange_cond.l_rtrangeMax
90 #end if
91
92 #if $Query.q_filters.q_accessions_cond.q_accessions_bool
93 --q_accessions=$Query.q_filters.q_accessions_cond.q_accessions
94 #end if
95
96 #if $Library.l_filters.l_accessions_cond.l_accessions_bool
97 --l_accessions=$Library.l_filters.l_accessions_cond.l_accessions
98 #end if
99
100
101 #if $Query.q_filters.q_sources_cond.q_sources_bool
102 --q_sources=$Query.q_filters.q_sources_cond.q_sources
103 --q_sourcesUser=$Query.q_filters.q_sources_cond.q_sourcesUser
104 #end if
105
106 #if $Library.l_filters.l_sources_cond.l_sources_bool
107 --l_sources=$Library.l_filters.l_sources_cond.l_sources
108 --l_sourcesUser=$Library.l_filters.l_sources_cond.l_sourcesUser
109 #end if
110
111 #if $Query.q_filters.q_instrumentTypes_cond.q_instrumentTypes_bool
112 --q_instrumentTypes=$Query.q_filters.q_instrumentTypes_cond.q_instrumentTypes
113 --q_instrumentTypesUser=$Query.q_filters.q_instrumentTypes_cond.q_instrumentTypesUser
114 #end if
115
116 #if $Library.l_filters.l_instrumentTypes_cond.l_instrumentTypes_bool
117 --l_instrumentTypes=$Library.l_filters.l_instrumentTypes_cond.l_instrumentTypes
118 --l_instrumentTypesUser=$Library.l_filters.l_instrumentTypes_cond.l_instrumentTypesUser
119 #end if
120
121 #if $Query.q_filters.q_instruments_cond.q_instruments_bool
122 --q_instruments=$Query.q_filters.q_instruments_cond.q_instruments
123 #end if
124
125 #if $Library.l_filters.l_instruments_cond.l_instruments_bool
126 --l_instruments=$Library.l_filters.l_instruments_cond.l_instruments
127 #end if
128
129 #if $Query.q_filters.q_spectraTypes_cond.q_spectraTypes_bool
130 --q_spectraTypes=$Query.q_filters.q_spectraTypes_cond.q_spectraTypes
131 #end if
132
133 #if $Library.l_filters.l_spectraTypes_cond.l_spectraTypes_bool
134 --l_spectraTypes=$Library.l_filters.l_spectraTypes_cond.l_spectraTypes
135 #end if
136
137 #if $Query.q_filters.q_spectraFilter
138 --q_spectraFilter
139 #end if
140
141 #if $Library.l_filters.l_spectraFilter
142 --l_spectraFilter
143 #end if
144
145 #if $General.rttol_cond.rttol_bool
146 --rttol=$General.rttol_cond.rttol
147 #end if
148
149 --raW=$General.raW
150 --mzW=$General.mzW
151
152 #if $General.updateDb_cond.updateDb
153 --updateDb
154 #if $General.updateDb_cond.copyDb
155 --copyDb
156 #end if
157 #end if
158
159 #if $General.usePrecursors
160 --usePrecursors
161 #end if
162
163
164
165 ]]></command>
166 <inputs>
167
168
169
170 <section name="Query" title="Query spectra input and filters" expanded="True">
171 <expand macro="sm_input" ql='Query' ql_shrt = "q" user="True" mspuritydatalib="False" msp="False"
172 help="Query SQLite database - in the standard XCMS msPurity workflow - the output
173 of msPurity.createDatabase should be used here. However any SQLite database
174 following the schema of xxx can be used as input"/>
175 <expand macro="filters" ql="Query" ql_shrt="q"/>
176 </section>
177
178 <section name="Library" title="Library spectra input and filters" expanded="True">
179 <expand macro="sm_input" ql='Library' ql_shrt = "l" user="False" mspuritydatalib="True" msp="False"
180 help="Library SQLite database - in the standard XCMS msPurity workflow - a default
181 database of MassBank, HMDB, LipidBlast and GNPS is used. However any SQLite
182 database following the schema of xxx can be used as input"/>
183 <expand macro="filters" ql="Library" ql_shrt="l"/>
184 </section>
185
186 <section name="General" title="General arguments" expanded="False">
187 <conditional name="rttol_cond">
188 <param name="rttol_bool" type="boolean" label="Filter on retention time match?"
189 help="" />
190 <when value="true">
191 <param name="rttol" type="float" value="30" min="0"
192 label="Retention time tolerance (seconds)"
193 help="Retention time tolerance in seconds to match precursors"/>
194 </when>
195 <when value="false">
196 </when>
197 </conditional>
198
199
200 <param name="usePrecursors" type="boolean" checked="true" label="Filter on matching precursors?"
201 help="If True, spectra will be filtered by similarity of precursors based on
202 the library and query ppm defined tolerance" />
203
204 <param name="raW" label="Weighting for relative abundance"
205 type="float" value="0.5"
206 help="Relative abundance weight for spectra (default to 0.5 as determined by
207 massbank for ESI data)"/>
208
209 <param name="mzW" label="Weighting for mz"
210 type="float" value="2"
211 help="mz weight for spectra (default to 2 as determined by massbank for ESI data)"/>
212
213 <conditional name="updateDb_cond">
214 <param name="updateDb" type="boolean" checked="true"
215 label="Update database with results?" help="" />
216 <when value="true">
217 <param name="copyDb" type="boolean" checked="true"
218 label="Make a copy of the database?"
219 help="A copy will be made of the input SQLite target database and the
220 results will be added to this copy. When False, the input SQLite
221 database will be updated with the matching results. Use False if
222 you want to reduce storage space being used."/>
223 </when>
224 <when value="false">
225 </when>
226 </conditional>
227
228
229
230 </section>
231
232
233
234
235 </inputs>
236
237 <outputs>
238 <data name="sqlite_results" format="sqlite" label="${tool.name} on ${on_string}: SQLite results"
239 from_work_dir="db_with_spectral_matching.sqlite" >
240 <filter>create_new_database is True</filter>
241 </data>
242 <data name="matches" format="tsv" label="${tool.name} on ${on_string}: matches"
243 from_work_dir="matched_results.tsv" >
244 <filter>spectra_type_q == "scans"</filter>
245 </data>
246 <data name="xcms_matches" format="tsv" label="${tool.name} on ${on_string}: XCMS matches"
247 from_work_dir="xcms_matched_results.tsv" />
248 </outputs>
249 <tests>
250 <test>
251 <param name="q_dbPth" value="createDatabase_output.sqlite" />
252 <param name="l_dbPth_select" value="userdb" />
253 <param name="l_dbPth" value="PR100037.sqlite" />
254
255 <param name="q_xcmsGroups_bool" value="true" />
256 <param name="l_accessions_bool" value="true" />
257 <param name="q_xcmsGroups" value="14" />
258 <param name="l_accessions" value="PR100037" />
259 <output name="xcms_matches" file="spectralMatching_matched_results.tsv" />
260 <output name="matches" file="spectralMatching_xcms_matched_results.tsv" />
261 <output name="sqlite_results" value="spectralMatching_db_with_spectral_matching.sqlite" ftype="sqlite" compare="sim_size"/>
262 </test>
263 </tests>
264
265
266
267 <help><![CDATA[
268
269 =============================================================
270 Spectral matching
271 =============================================================
272 -----------
273 General
274 -----------
275
276
277 Perform spectral matching to spectral libraries for an LC-MS/MS dataset.
278
279 The spectral matching is performed from a **Query** SQLite spectral-database against a **Library** SQLite spectral-database.
280
281 The SQLite schema of the spectral database here: spectral_database_schema_
282
283
284 The query spectral-database in most cases should contain be the "unknown" spectra database generated the msPurity
285 function createDatabase as part of a msPurity-XCMS data processing workflow.
286
287 The library spectral-database in most cases should contain the "known" spectra from either public or user generated resources.
288 The library SQLite database by default contains data from MoNA including Massbank, HMDB, LipidBlast and GNPS.
289 A larger_database_ can be download and used from the msp2db github repository.
290
291 To create a user generated library SQLite database the following tool can be used to generate a SQLite database
292 from a collection of MSP files: msp2db_.
293
294 It should be noted though, that as long as the schema of the spectral-database is as described here, then any database can be used
295 for either the library or query - even allowing for the same database to be used.
296
297 The spectral matching functionality has four main components, spectral filtering, spectral alignment, spectral matching,
298 and summarising the results.
299
300 Spectral filtering is simply filtering both the library and query spectra to be search against (e.g. choosing
301 the library source, instrument, retention time, precursor PPM tolerance etc).
302
303 The spectral alignment stage involves aligning the query peaks to the library peaks. The approach used is similar
304 to modified pMatch algorithm described in Zhou et al 2015.
305
306 The spectral matching of the aligned spectra is performed against a combined intensity and m/z weighted vector - created for both
307 the query and library spectra (wq and wl). See below:
308
309 .. math::
310
311 w=intensity^x \cdot mz^y
312
313
314 Where x and y represent weight factors and can be adjusted with the parameters raW and mzW.
315 Defaults to x=0.5 and y=2 as per MassBank for ESI based mass spectrometry data.
316
317 The aligned weighted vectors are then matched using dot product cosine, reverse dot product cosine and the composite dot product.
318 See below for dot product cosine equation.
319
320 .. math::
321
322 dpc = \frac{ w_q \cdot w_l } { \sqrt{Σ{w_{q}{}^2} } \cdot \sqrt{Σ{w_{l}{}^2}}}
323
324
325 Full details of the matching approaches are described in the msPurity_spectral_matching_vignette_
326
327 --------------------------------------------
328 Example LC-MS/MS processing workflow
329 --------------------------------------------
330
331 * Purity assessments
332 + (mzML files) -> purityA -> (pa)
333 * XCMS processing
334 + (mzML files) -> xcms.xcmsSet -> xcms.merge -> xcms.group -> xcms.retcor -> xcms.group -> (xset)
335 * Fragmentation processing
336 + (xset, pa) -> frag4feature -> filterFragSpectra -> averageAllFragSpectra -> createDatabase -> **spectralMatching** -> (sqlite spectral database)
337
338 -----------
339 Output
340 -----------
341
342 **Database**
343
344 The updated query database (this will have been updated with the annotation results if updateDb argument used)
345
346 **matchedResults**
347
348 All matched results from the query spectra to the library spectra. Contains the following columns
349
350 * dpc - dot product cosine of the match
351 * rdpc - reverse dot product cosine of the match
352 * cdpc - composite dot product cosine of the match
353 * mcount - number of matching peaks
354 * allcount - total number of peaks across both query and library spectra
355 * mpercent - percentage of matching peaks across both query and library spectra
356 * accession - accession of library match
357 * name - name of library match
358 * inchikey - inchikey of library match
359 * lpid - pid in database of library match
360 * qpid - pid in database of query match
361 * mid - id of the match
362
363 **xcmsMatchedResults**
364
365 If the qeury spectra had XCMS based chromotographic peaks tables (e.g c_peak_groups, c_peaks) in the sqlite database - it will
366 be possible to summarise the matches for each XCMS grouped feature. The dataframe contains the following columns
367
368 * pid - pid in database of query match
369 * grpid - grpid of the XCMS grouped feature for query match
370 * mz - derived from XCMS grouped feature
371 * mzmin - derived from XCMS grouped feature
372 * mzmax - derived from XCMS grouped feature
373 * rt - derived from XCMS grouped feature
374 * rtmin - derived from XCMS grouped feature
375 * rtmax - derived from XCMS grouped feature
376 * npeaks - derived from XCMS grouped feature
377 * grp_name - derived from XCMS grouped feature
378 * dpc - dot product cosine of the match
379 * rdpc - reverse dot product cosine of the match
380 * cdpc - composite dot product cosine of the match
381 * mcount - number of matching peaks
382 * allcount - total number of peaks across both query and library spectra
383 * mpercent - percentage of matching peaks across both query and library spectra
384 * accession - accession of library match
385 * name - name of library match
386 * inchikey - inchikey of library match
387 * lpid - pid in database of library match
388 * mid - id of the match
389
390
391 .. _spectral_database_schema: https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-datatabase-schema.html
392 .. _larger_database: https://github.com/computational-metabolomics/msp2db/releases
393 .. _msp2db: https://github.com/computational-metabolomics/msp2db/releases
394 .. _msPurity_spectral_matching_vignette: https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-lcmsms-data-processing-and-spectral-matching-vignette.html
395
396 ]]></help>
397
398 <expand macro="citations"> </expand>
399 </tool>