comparison spectralMatching.xml @ 0:a8ab07c27338 draft default tip

"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
author computational-metabolomics
date Thu, 04 Mar 2021 12:20:23 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a8ab07c27338
1 <tool id="mspurity_spectralmatching" name="msPurity.spectralMatching" version="@TOOL_VERSION@+galaxy@GALAXY_TOOL_VERSION@">
2 <description>
3 Perform spectral matching to MS/MS spectral libraries
4 </description>
5 <macros>
6 <import>macros.xml</import>
7 </macros>
8 <expand macro="requirements"/>
9 <command detect_errors="exit_code"><![CDATA[
10 Rscript '$__tool_directory__/spectralMatching.R'
11 --outDir=.
12 --cores=\${GALAXY_SLOTS:-4}
13
14 #if $Query.q_dbPth_con.q_dbPth_select == 'msPurityData'
15 --q_defaultDb
16 #else if $Query.q_dbPth_con.q_dbPth_select == 'sqlite'
17 --q_dbPth=$Query.q_dbPth_con.q_dbPth
18 #end if
19
20 #if $Library.l_dbPth_con.l_dbPth_select == 'msPurityData'
21 --l_defaultDb
22 #else if $Library.l_dbPth_con.l_dbPth_select == 'sqlite'
23 --l_dbPth=$Library.l_dbPth_con.l_dbPth
24 #end if
25
26 --l_dbType=$Library.l_dbPth_con.l_dbPth_select
27 --q_dbType=$Query.q_dbPth_con.q_dbPth_select
28
29
30 --q_ppmPrec=$Query.q_filters.q_ppmPrec
31 --l_ppmPrec=$Library.l_filters.l_ppmPrec
32
33 --q_ppmProd=$Query.q_filters.q_ppmProd
34 --l_ppmProd=$Library.l_filters.l_ppmProd
35
36
37 #if $Query.q_filters.q_raThres_cond.q_raThres_bool
38 --q_raThres=$Query.q_filters.q_raThres_cond.q_raThres
39 #end if
40
41 #if $Library.l_filters.l_raThres_cond.l_raThres_bool
42 --l_raThres=$Library.l_filters.l_raThres_cond.l_raThres
43 #end if
44
45 #if $Query.q_filters.q_polarity_cond.q_polarity_bool
46 --q_polarity=$Query.q_filters.q_polarity_cond.q_polarity
47 #end if
48
49 #if $Library.l_filters.l_polarity_cond.l_polarity_bool
50 --l_polarity=$Library.l_filters.l_polarity_cond.l_polarity
51 #end if
52
53 #if $Query.q_filters.q_purity_cond.q_purity_bool
54 --q_purity=$Query.q_filters.q_purity_cond.q_purity
55 #end if
56
57 #if $Library.l_filters.l_purity_cond.l_purity_bool
58 --l_purity=$Library.l_filters.l_purity_cond.l_purity
59 #end if
60
61 #if $Query.q_filters.q_xcmsGroups_cond.q_xcmsGroups_bool
62 --q_xcmsGroups=$Query.q_filters.q_xcmsGroups_cond.q_xcmsGroups
63 #end if
64
65 #if $Library.l_filters.l_xcmsGroups_cond.l_xcmsGroups_bool
66 --l_xcmsGroups=$Library.l_filters.l_xcmsGroups_cond.l_xcmsGroups
67 #end if
68
69 #if $Query.q_filters.q_pids_cond.q_pids_bool
70 --q_pids=$Query.q_filters.q_pids_cond.q_pids
71 #end if
72
73 #if $Library.l_filters.l_pids_cond.l_pids_bool
74 --l_pids=$Library.l_filters.l_pids_cond.l_pids
75 #end if
76
77 #if $Query.q_filters.q_rtrange_cond.q_rtrange_bool
78 --q_rtrangeMin=$Query.q_filters.q_rtrange_cond.q_rtrangeMin
79 --q_rtrangeMax=$Query.q_filters.q_rtrange_cond.q_rtrangeMax
80 #end if
81
82 #if $Library.l_filters.l_rtrange_cond.l_rtrange_bool
83 --l_rtrangeMin=$Library.l_filters.l_rtrange_cond.l_rtrangeMin
84 --l_rtrangeMax=$Library.l_filters.l_rtrange_cond.l_rtrangeMax
85 #end if
86
87 #if $Query.q_filters.q_accessions_cond.q_accessions_bool
88 --q_accessions=$Query.q_filters.q_accessions_cond.q_accessions
89 #end if
90
91 #if $Library.l_filters.l_accessions_cond.l_accessions_bool
92 --l_accessions=$Library.l_filters.l_accessions_cond.l_accessions
93 #end if
94
95
96 #if $Query.q_filters.q_sources_cond.q_sources_bool
97 --q_sources=$Query.q_filters.q_sources_cond.q_sources
98 --q_sourcesUser='$Query.q_filters.q_sources_cond.q_sourcesUser'
99 #end if
100
101 #if $Library.l_filters.l_sources_cond.l_sources_bool
102 --l_sources=$Library.l_filters.l_sources_cond.l_sources
103 --l_sourcesUser='$Library.l_filters.l_sources_cond.l_sourcesUser'
104 #end if
105
106 #if $Query.q_filters.q_instrumentTypes_cond.q_instrumentTypes_bool
107 --q_instrumentTypes='$Query.q_filters.q_instrumentTypes_cond.q_instrumentTypes'
108 --q_instrumentTypesUser='$Query.q_filters.q_instrumentTypes_cond.q_instrumentTypesUser'
109 #end if
110
111 #if $Library.l_filters.l_instrumentTypes_cond.l_instrumentTypes_bool
112 --l_instrumentTypes='$Library.l_filters.l_instrumentTypes_cond.l_instrumentTypes'
113 --l_instrumentTypesUser='$Library.l_filters.l_instrumentTypes_cond.l_instrumentTypesUser'
114 #end if
115
116 #if $Query.q_filters.q_instruments_cond.q_instruments_bool
117 --q_instruments=$Query.q_filters.q_instruments_cond.q_instruments
118 #end if
119
120 #if $Library.l_filters.l_instruments_cond.l_instruments_bool
121 --l_instruments='$Library.l_filters.l_instruments_cond.l_instruments'
122 #end if
123
124 #if $Query.q_filters.q_spectraTypes_cond.q_spectraTypes_bool
125 --q_spectraTypes=$Query.q_filters.q_spectraTypes_cond.q_spectraTypes
126 #end if
127
128 #if $Library.l_filters.l_spectraTypes_cond.l_spectraTypes_bool
129 --l_spectraTypes=$Library.l_filters.l_spectraTypes_cond.l_spectraTypes
130 #end if
131
132 #if $Query.q_filters.q_spectraFilter
133 --q_spectraFilter
134 #end if
135
136 #if $Library.l_filters.l_spectraFilter
137 --l_spectraFilter
138 #end if
139
140 #if $General.rttol_cond.rttol_bool
141 --rttol=$General.rttol_cond.rttol
142 #end if
143
144 --raW=$General.raW
145 --mzW=$General.mzW
146
147 #if $General.updateDb_cond.updateDb
148 --updateDb
149 #if $General.updateDb_cond.copyDb
150 --copyDb
151 #end if
152 #end if
153
154 #if $General.usePrecursors
155 --usePrecursors
156 #end if
157
158 ]]></command>
159 <inputs>
160 <section name="Query" title="Query spectra input and filters" expanded="True">
161 <expand macro="sm_input" ql="Query" ql_shrt = "q" user="True" mspuritydatalib="False" msp="False"
162 help="Query SQLite database - in the standard XCMS msPurity workflow - the output
163 of msPurity.createDatabase should be used here. However any SQLite database
164 following the schema of as https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-database-vignette.html can be used as input"/>
165 <expand macro="filters" ql="Query" ql_shrt="q"/>
166 </section>
167 <section name="Library" title="Library spectra input and filters" expanded="True">
168 <expand macro="sm_input" ql="Library" ql_shrt = "l" user="False" mspuritydatalib="True" msp="False"
169 help="Library SQLite database - in the standard XCMS msPurity workflow - a default
170 database of MassBank, HMDB, LipidBlast and GNPS is used. However any SQLite
171 database following the schema of https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-database-vignette.html can be used as input"/>
172 <expand macro="filters" ql="Library" ql_shrt="l"/>
173 </section>
174 <section name="General" title="General arguments" expanded="False">
175 <conditional name="rttol_cond">
176 <param name="rttol_bool" type="boolean" label="Filter on retention time match?"
177 help="" />
178 <when value="true">
179 <param name="rttol" type="float" value="30" min="0"
180 label="Retention time tolerance (seconds)"
181 help="Retention time tolerance in seconds to match precursors"/>
182 </when>
183 <when value="false"/>
184 </conditional>
185 <param name="usePrecursors" type="boolean" checked="true" label="Filter on matching precursors?"
186 help="If True, spectra will be filtered by similarity of precursors based on
187 the library and query ppm defined tolerance" />
188 <param name="raW" label="Weighting for relative abundance" type="float" value="0.5"
189 help="Relative abundance weight for spectra (default to 0.5 as determined by
190 massbank for ESI data)"/>
191 <param name="mzW" label="Weighting for mz" type="float" min="0" value="2"
192 help="mz weight for spectra (default to 2 as determined by massbank for ESI data)"/>
193 <conditional name="updateDb_cond">
194 <param name="updateDb" type="boolean" checked="true"
195 label="Update database with results?" help="" />
196 <when value="true">
197 <param name="copyDb" type="boolean" checked="true"
198 label="Make a copy of the database?"
199 help="A copy will be made of the input SQLite target database and the
200 results will be added to this copy. When False, the input SQLite
201 database will be updated with the matching results. Use False if
202 you want to reduce storage space being used."/>
203 </when>
204 <when value="false"/>
205 </conditional>
206 </section>
207 </inputs>
208
209 <outputs>
210 <data name="sqlite_results" format="sqlite" label="${tool.name} on ${on_string}: SQLite results"
211 from_work_dir="db_with_spectral_matching.sqlite" >
212 <filter>create_new_database is True</filter>
213 </data>
214 <data name="matches" format="tsv" label="${tool.name} on ${on_string}: matches"
215 from_work_dir="matched_results.tsv" >
216 <filter>spectra_type_q == "scans"</filter>
217 </data>
218 <data name="xcms_matches" format="tsv" label="${tool.name} on ${on_string}: XCMS matches"
219 from_work_dir="xcms_matched_results.tsv" />
220 </outputs>
221 <tests>
222 <test>
223 <param name="q_dbPth" value="createDatabase_output.sqlite" />
224 <param name="q_spectraTypes_bool" value="true" />
225 <param name="q_spectraTypes" value="inter,av_all" />
226 <param name="l_dbPth_select" value="userdb" />
227 <param name="l_dbPth" value="PR100037.sqlite" />
228 <param name="q_xcmsGroups_bool" value="true" />
229 <param name="l_accessions_bool" value="true" />
230 <param name="q_xcmsGroups" value="14" />
231 <param name="l_accessions" value="PR100037" />
232 <output name="xcms_matches" file="spectralMatching_matched_results.tsv" />
233 <output name="matches" file="spectralMatching_xcms_matched_results.tsv" />
234 <output name="sqlite_results" value="spectralMatching_db_with_spectral_matching.sqlite" ftype="sqlite" compare="sim_size"/>
235 </test>
236 <test>
237 <param name="l_instrumentTypes_bool" value="true" />
238 <param name="q_dbPth" value="createDatabase_output.sqlite" />
239 <param name="q_spectraTypes_bool" value="true" />
240 <param name="q_spectraTypes" value="inter,av_all" />
241 <param name="l_dbPth_select" value="userdb" />
242 <param name="l_dbPth" value="PR100037.sqlite" />
243 <param name="q_xcmsGroups_bool" value="true" />
244 <param name="l_accessions_bool" value="true" />
245 <param name="q_xcmsGroups" value="14" />
246 <param name="l_accessions" value="PR100037" />
247 <output name="xcms_matches" file="spectralMatching_matched_results_instrumentTypes.tsv" />
248 <output name="matches" file="spectralMatching_xcms_matched_results_instrumentTypes.tsv" />
249 <output name="sqlite_results" value="spectralMatching_db_with_spectral_matching_instrumentTypes.sqlite" ftype="sqlite" compare="sim_size"/>
250 </test>
251 </tests>
252
253 <help><![CDATA[
254
255 =============================================================
256 Spectral matching
257 =============================================================
258 -----------
259 General
260 -----------
261
262
263 Perform spectral matching to spectral libraries for an LC-MS/MS dataset.
264
265 The spectral matching is performed from a **Query** SQLite spectral-database against a **Library** SQLite spectral-database.
266
267 The SQLite schema of the spectral database here: spectral_database_schema_
268
269
270 The query spectral-database in most cases should contain be the "unknown" spectra database generated the msPurity
271 function createDatabase as part of a msPurity-XCMS data processing workflow.
272
273 The library spectral-database in most cases should contain the "known" spectra from either public or user generated resources.
274 The library SQLite database by default contains data from MoNA including Massbank, HMDB, LipidBlast and GNPS.
275 A larger_database_ can be download and used from the msp2db github repository.
276
277 To create a user generated library SQLite database the following tool can be used to generate a SQLite database
278 from a collection of MSP files: msp2db_.
279
280 It should be noted though, that as long as the schema of the spectral-database is as described here, then any database can be used
281 for either the library or query - even allowing for the same database to be used.
282
283 The spectral matching functionality has four main components, spectral filtering, spectral alignment, spectral matching,
284 and summarising the results.
285
286 Spectral filtering is simply filtering both the library and query spectra to be search against (e.g. choosing
287 the library source, instrument, retention time, precursor PPM tolerance etc).
288
289 The spectral alignment stage involves aligning the query peaks to the library peaks. The approach used is similar
290 to modified pMatch algorithm described in Zhou et al 2015.
291
292 The spectral matching of the aligned spectra is performed against a combined intensity and m/z weighted vector - created for both
293 the query and library spectra (wq and wl). See below:
294
295 .. math::
296
297 w=intensity^x \cdot mz^y
298
299
300 Where x and y represent weight factors and can be adjusted with the parameters raW and mzW.
301 Defaults to x=0.5 and y=2 as per MassBank for ESI based mass spectrometry data.
302
303 The aligned weighted vectors are then matched using dot product cosine, reverse dot product cosine and the composite dot product.
304 See below for dot product cosine equation.
305
306 .. math::
307
308 dpc = \frac{ w_q \cdot w_l } { \sqrt{Σ{w_{q}{}^2} } \cdot \sqrt{Σ{w_{l}{}^2}}}
309
310
311 Full details of the matching approaches are described in the msPurity_spectral_matching_vignette_
312
313 --------------------------------------------
314 Example LC-MS/MS processing workflow
315 --------------------------------------------
316
317 * Purity assessments
318 + (mzML files) -> purityA -> (pa)
319 * XCMS processing
320 + (mzML files) -> xcms.xcmsSet -> xcms.merge -> xcms.group -> xcms.retcor -> xcms.group -> (xset)
321 * Fragmentation processing
322 + (xset, pa) -> frag4feature -> filterFragSpectra -> averageAllFragSpectra -> createDatabase -> **spectralMatching** -> (sqlite spectral database)
323
324 -----------
325 Output
326 -----------
327
328 **Database**
329
330 The updated query database (this will have been updated with the annotation results if updateDb argument used)
331
332
333 **xcmsMatchedResults**
334
335 If the qeury spectra had XCMS based chromotographic peaks tables (e.g c_peak_groups, c_peaks) in the sqlite database - it will
336 be possible to summarise the matches for each XCMS grouped feature. The dataframe contains the following columns
337
338 * lpid - id in database of library spectra
339 * qpid - id in database of query spectra
340 * dpc - dot product cosine of the match
341 * rdpc - reverse dot product cosine of the match
342 * cdpc - composite dot product cosine of the match
343 * mcount - number of matching peaks
344 * allcount - total number of peaks across both query and library spectra
345 * mpercent - percentage of matching peaks across both query and library spectra
346 * library_rt - retention time of library spectra
347 * query_rt - retention time of query spectra
348 * rtdiff - difference between library and query retention time
349 * library_precursor_mz - library precursor mz
350 * query_precursor_mz - query precursor mz
351 * library_precursor_ion_purity - library precursor ion purity
352 * query_precursor_ion_purity - query precursor ion purity
353 * library_accession - library accession value (unique string or number given to eith MoNA or Massbank data entires)
354 * library_precursor_type - library precursor type (i.e. adduct)
355 * library_entry_name - Name given to the library spectra
356 * inchikey - inchikey of the matched library spectra
357 * library_source_name - source of the spectra (e.g. massbank, gnps)
358 * library_compound_name - name of compound spectra was obtained from
359
360 **matchedResults**
361
362 All matched results from the query spectra to the library spectra. Contains the same as above
363 without the XCMS details. This table is useful to observe spectral matching results
364 for all MS/MS spectra irrespective of if they are linked to XCMS MS1 features.
365
366
367 .. _spectral_database_schema: https://www.bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-database-vignette.html
368 .. _larger_database: https://github.com/computational-metabolomics/msp2db/releases
369 .. _msp2db: https://github.com/computational-metabolomics/msp2db/releases
370 .. _msPurity_spectral_matching_vignette: https://www.bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-lcmsms-data-processing-and-spectral-matching-vignette.html
371
372 ]]></help>
373
374 <expand macro="citations"> </expand>
375 </tool>