Mercurial > repos > tomnl > mspurity_spectralmatching
comparison spectralMatching.xml @ 0:21a19280e7ae draft
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2948ce35fa7fffe5a64711cb30be971031e79019-dirty
| author | tomnl |
|---|---|
| date | Fri, 24 May 2019 09:09:21 -0400 |
| parents | |
| children | 532739956f51 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:21a19280e7ae |
|---|---|
| 1 <tool id="mspurity_spectralmatching" name="msPurity.spectralMatching" version="0.2.0"> | |
| 2 <description> | |
| 3 Perform spectral matching to MS/MS spectral libraries | |
| 4 </description> | |
| 5 | |
| 6 <macros> | |
| 7 <import>macros.xml</import> | |
| 8 </macros> | |
| 9 | |
| 10 <expand macro="requirements"> | |
| 11 </expand> | |
| 12 | |
| 13 | |
| 14 <stdio> | |
| 15 <exit_code range="1:" /> | |
| 16 </stdio> | |
| 17 <command interpreter="Rscript"><![CDATA[ | |
| 18 spectralMatching.R | |
| 19 --outDir=. | |
| 20 --cores=\${GALAXY_SLOTS:-4} | |
| 21 | |
| 22 | |
| 23 #if $Query.q_dbPth_con.q_dbPth_select == 'userdb' | |
| 24 --q_dbPth=$Query.q_dbPth_con.q_dbPth | |
| 25 #else | |
| 26 --q_defaultDb | |
| 27 #end if | |
| 28 | |
| 29 #if $Library.l_dbPth_con.l_dbPth_select == 'userdb' | |
| 30 --l_dbPth=$Library.l_dbPth_con.l_dbPth | |
| 31 #else | |
| 32 --l_defaultDb | |
| 33 #end if | |
| 34 | |
| 35 --q_ppmPrec=$Query.q_filters.q_ppmPrec | |
| 36 --l_ppmPrec=$Library.l_filters.l_ppmPrec | |
| 37 | |
| 38 --q_ppmProd=$Query.q_filters.q_ppmProd | |
| 39 --l_ppmProd=$Library.l_filters.l_ppmProd | |
| 40 | |
| 41 | |
| 42 #if $Query.q_filters.q_raThres_cond.q_raThres_bool | |
| 43 --q_raThres=$Query.q_filters.q_raThres_cond.q_raThres | |
| 44 #end if | |
| 45 | |
| 46 #if $Library.l_filters.l_raThres_cond.l_raThres_bool | |
| 47 --l_raThres=$Library.l_filters.l_raThres_cond.l_raThres | |
| 48 #end if | |
| 49 | |
| 50 #if $Query.q_filters.q_polarity_cond.q_polarity_bool | |
| 51 --q_polarity=$Query.q_filters.q_polarity_cond.q_polarity | |
| 52 #end if | |
| 53 | |
| 54 #if $Library.l_filters.l_polarity_cond.l_polarity_bool | |
| 55 --l_polarity=$Library.l_filters.l_polarity_cond.l_polarity | |
| 56 #end if | |
| 57 | |
| 58 #if $Query.q_filters.q_purity_cond.q_purity_bool | |
| 59 --q_purity=$Query.q_filters.q_purity_cond.q_purity | |
| 60 #end if | |
| 61 | |
| 62 #if $Library.l_filters.l_purity_cond.l_purity_bool | |
| 63 --l_purity=$Library.l_filters.l_purity_cond.l_purity | |
| 64 #end if | |
| 65 | |
| 66 #if $Query.q_filters.q_xcmsGroups_cond.q_xcmsGroups_bool | |
| 67 --q_xcmsGroups=$Query.q_filters.q_xcmsGroups_cond.q_xcmsGroups | |
| 68 #end if | |
| 69 | |
| 70 #if $Library.l_filters.l_xcmsGroups_cond.l_xcmsGroups_bool | |
| 71 --l_xcmsGroups=$Library.l_filters.l_xcmsGroups_cond.l_xcmsGroups | |
| 72 #end if | |
| 73 | |
| 74 #if $Query.q_filters.q_pids_cond.q_pids_bool | |
| 75 --q_pids=$Query.q_filters.q_pids_cond.q_pids | |
| 76 #end if | |
| 77 | |
| 78 #if $Library.l_filters.l_pids_cond.l_pids_bool | |
| 79 --l_pids=$Library.l_filters.l_pids_cond.l_pids | |
| 80 #end if | |
| 81 | |
| 82 #if $Query.q_filters.q_rtrange_cond.q_rtrange_bool | |
| 83 --q_rtrangeMin=$Query.q_filters.q_rtrange_cond.q_rtrangeMin | |
| 84 --q_rtrangeMax=$Query.q_filters.q_rtrange_cond.q_rtrangeMax | |
| 85 #end if | |
| 86 | |
| 87 #if $Library.l_filters.l_rtrange_cond.l_rtrange_bool | |
| 88 --l_rtrangeMin=$Library.l_filters.l_rtrange_cond.l_rtrangeMin | |
| 89 --l_rtrangeMax=$Library.l_filters.l_rtrange_cond.l_rtrangeMax | |
| 90 #end if | |
| 91 | |
| 92 #if $Query.q_filters.q_accessions_cond.q_accessions_bool | |
| 93 --q_accessions=$Query.q_filters.q_accessions_cond.q_accessions | |
| 94 #end if | |
| 95 | |
| 96 #if $Library.l_filters.l_accessions_cond.l_accessions_bool | |
| 97 --l_accessions=$Library.l_filters.l_accessions_cond.l_accessions | |
| 98 #end if | |
| 99 | |
| 100 | |
| 101 #if $Query.q_filters.q_sources_cond.q_sources_bool | |
| 102 --q_sources=$Query.q_filters.q_sources_cond.q_sources | |
| 103 --q_sourcesUser=$Query.q_filters.q_sources_cond.q_sourcesUser | |
| 104 #end if | |
| 105 | |
| 106 #if $Library.l_filters.l_sources_cond.l_sources_bool | |
| 107 --l_sources=$Library.l_filters.l_sources_cond.l_sources | |
| 108 --l_sourcesUser=$Library.l_filters.l_sources_cond.l_sourcesUser | |
| 109 #end if | |
| 110 | |
| 111 #if $Query.q_filters.q_instrumentTypes_cond.q_instrumentTypes_bool | |
| 112 --q_instrumentTypes=$Query.q_filters.q_instrumentTypes_cond.q_instrumentTypes | |
| 113 --q_instrumentTypesUser=$Query.q_filters.q_instrumentTypes_cond.q_instrumentTypesUser | |
| 114 #end if | |
| 115 | |
| 116 #if $Library.l_filters.l_instrumentTypes_cond.l_instrumentTypes_bool | |
| 117 --l_instrumentTypes=$Library.l_filters.l_instrumentTypes_cond.l_instrumentTypes | |
| 118 --l_instrumentTypesUser=$Library.l_filters.l_instrumentTypes_cond.l_instrumentTypesUser | |
| 119 #end if | |
| 120 | |
| 121 #if $Query.q_filters.q_instruments_cond.q_instruments_bool | |
| 122 --q_instruments=$Query.q_filters.q_instruments_cond.q_instruments | |
| 123 #end if | |
| 124 | |
| 125 #if $Library.l_filters.l_instruments_cond.l_instruments_bool | |
| 126 --l_instruments=$Library.l_filters.l_instruments_cond.l_instruments | |
| 127 #end if | |
| 128 | |
| 129 #if $Query.q_filters.q_spectraTypes_cond.q_spectraTypes_bool | |
| 130 --q_spectraTypes=$Query.q_filters.q_spectraTypes_cond.q_spectraTypes | |
| 131 #end if | |
| 132 | |
| 133 #if $Library.l_filters.l_spectraTypes_cond.l_spectraTypes_bool | |
| 134 --l_spectraTypes=$Library.l_filters.l_spectraTypes_cond.l_spectraTypes | |
| 135 #end if | |
| 136 | |
| 137 #if $Query.q_filters.q_spectraFilter | |
| 138 --q_spectraFilter | |
| 139 #end if | |
| 140 | |
| 141 #if $Library.l_filters.l_spectraFilter | |
| 142 --l_spectraFilter | |
| 143 #end if | |
| 144 | |
| 145 #if $General.rttol_cond.rttol_bool | |
| 146 --rttol=$General.rttol_cond.rttol | |
| 147 #end if | |
| 148 | |
| 149 --raW=$General.raW | |
| 150 --mzW=$General.mzW | |
| 151 | |
| 152 #if $General.updateDb_cond.updateDb | |
| 153 --updateDb | |
| 154 #if $General.updateDb_cond.copyDb | |
| 155 --copyDb | |
| 156 #end if | |
| 157 #end if | |
| 158 | |
| 159 #if $General.usePrecursors | |
| 160 --usePrecursors | |
| 161 #end if | |
| 162 | |
| 163 | |
| 164 | |
| 165 ]]></command> | |
| 166 <inputs> | |
| 167 | |
| 168 | |
| 169 | |
| 170 <section name="Query" title="Query spectra input and filters" expanded="True"> | |
| 171 <expand macro="sm_input" ql='Query' ql_shrt = "q" user="True" mspuritydatalib="False" msp="False" | |
| 172 help="Query SQLite database - in the standard XCMS msPurity workflow - the output | |
| 173 of msPurity.createDatabase should be used here. However any SQLite database | |
| 174 following the schema of xxx can be used as input"/> | |
| 175 <expand macro="filters" ql="Query" ql_shrt="q"/> | |
| 176 </section> | |
| 177 | |
| 178 <section name="Library" title="Library spectra input and filters" expanded="True"> | |
| 179 <expand macro="sm_input" ql='Library' ql_shrt = "l" user="False" mspuritydatalib="True" msp="False" | |
| 180 help="Library SQLite database - in the standard XCMS msPurity workflow - a default | |
| 181 database of MassBank, HMDB, LipidBlast and GNPS is used. However any SQLite | |
| 182 database following the schema of xxx can be used as input"/> | |
| 183 <expand macro="filters" ql="Library" ql_shrt="l"/> | |
| 184 </section> | |
| 185 | |
| 186 <section name="General" title="General arguments" expanded="False"> | |
| 187 <conditional name="rttol_cond"> | |
| 188 <param name="rttol_bool" type="boolean" label="Filter on retention time match?" | |
| 189 help="" /> | |
| 190 <when value="true"> | |
| 191 <param name="rttol" type="float" value="30" min="0" | |
| 192 label="Retention time tolerance (seconds)" | |
| 193 help="Retention time tolerance in seconds to match precursors"/> | |
| 194 </when> | |
| 195 <when value="false"> | |
| 196 </when> | |
| 197 </conditional> | |
| 198 | |
| 199 | |
| 200 <param name="usePrecursors" type="boolean" checked="true" label="Filter on matching precursors?" | |
| 201 help="If True, spectra will be filtered by similarity of precursors based on | |
| 202 the library and query ppm defined tolerance" /> | |
| 203 | |
| 204 <param name="raW" label="Weighting for relative abundance" | |
| 205 type="float" value="0.5" | |
| 206 help="Relative abundance weight for spectra (default to 0.5 as determined by | |
| 207 massbank for ESI data)"/> | |
| 208 | |
| 209 <param name="mzW" label="Weighting for mz" | |
| 210 type="float" value="2" | |
| 211 help="mz weight for spectra (default to 2 as determined by massbank for ESI data)"/> | |
| 212 | |
| 213 <conditional name="updateDb_cond"> | |
| 214 <param name="updateDb" type="boolean" checked="true" | |
| 215 label="Update database with results?" help="" /> | |
| 216 <when value="true"> | |
| 217 <param name="copyDb" type="boolean" checked="true" | |
| 218 label="Make a copy of the database?" | |
| 219 help="A copy will be made of the input SQLite target database and the | |
| 220 results will be added to this copy. When False, the input SQLite | |
| 221 database will be updated with the matching results. Use False if | |
| 222 you want to reduce storage space being used."/> | |
| 223 </when> | |
| 224 <when value="false"> | |
| 225 </when> | |
| 226 </conditional> | |
| 227 | |
| 228 | |
| 229 | |
| 230 </section> | |
| 231 | |
| 232 | |
| 233 | |
| 234 | |
| 235 </inputs> | |
| 236 | |
| 237 <outputs> | |
| 238 <data name="sqlite_results" format="sqlite" label="${tool.name} on ${on_string}: SQLite results" | |
| 239 from_work_dir="db_with_spectral_matching.sqlite" > | |
| 240 <filter>create_new_database is True</filter> | |
| 241 </data> | |
| 242 <data name="matches" format="tsv" label="${tool.name} on ${on_string}: matches" | |
| 243 from_work_dir="matched_results.tsv" > | |
| 244 <filter>spectra_type_q == "scans"</filter> | |
| 245 </data> | |
| 246 <data name="xcms_matches" format="tsv" label="${tool.name} on ${on_string}: XCMS matches" | |
| 247 from_work_dir="xcms_matched_results.tsv" /> | |
| 248 </outputs> | |
| 249 <tests> | |
| 250 <test> | |
| 251 <param name="q_dbPth" value="createDatabase_output.sqlite" /> | |
| 252 <param name="l_dbPth_select" value="userdb" /> | |
| 253 <param name="l_dbPth" value="PR100037.sqlite" /> | |
| 254 | |
| 255 <param name="q_xcmsGroups_bool" value="true" /> | |
| 256 <param name="l_accessions_bool" value="true" /> | |
| 257 <param name="q_xcmsGroups" value="14" /> | |
| 258 <param name="l_accessions" value="PR100037" /> | |
| 259 <output name="xcms_matches" file="spectralMatching_matched_results.tsv" /> | |
| 260 <output name="matches" file="spectralMatching_xcms_matched_results.tsv" /> | |
| 261 <output name="sqlite_results" value="spectralMatching_db_with_spectral_matching.sqlite" ftype="sqlite" compare="sim_size"/> | |
| 262 </test> | |
| 263 </tests> | |
| 264 | |
| 265 | |
| 266 | |
| 267 <help><![CDATA[ | |
| 268 | |
| 269 ============================================================= | |
| 270 Spectral matching | |
| 271 ============================================================= | |
| 272 ----------- | |
| 273 General | |
| 274 ----------- | |
| 275 | |
| 276 | |
| 277 Perform spectral matching to spectral libraries for an LC-MS/MS dataset. | |
| 278 | |
| 279 The spectral matching is performed from a **Query** SQLite spectral-database against a **Library** SQLite spectral-database. | |
| 280 | |
| 281 The SQLite schema of the spectral database here: spectral_database_schema_ | |
| 282 | |
| 283 | |
| 284 The query spectral-database in most cases should contain be the "unknown" spectra database generated the msPurity | |
| 285 function createDatabase as part of a msPurity-XCMS data processing workflow. | |
| 286 | |
| 287 The library spectral-database in most cases should contain the "known" spectra from either public or user generated resources. | |
| 288 The library SQLite database by default contains data from MoNA including Massbank, HMDB, LipidBlast and GNPS. | |
| 289 A larger_database_ can be download and used from the msp2db github repository. | |
| 290 | |
| 291 To create a user generated library SQLite database the following tool can be used to generate a SQLite database | |
| 292 from a collection of MSP files: msp2db_. | |
| 293 | |
| 294 It should be noted though, that as long as the schema of the spectral-database is as described here, then any database can be used | |
| 295 for either the library or query - even allowing for the same database to be used. | |
| 296 | |
| 297 The spectral matching functionality has four main components, spectral filtering, spectral alignment, spectral matching, | |
| 298 and summarising the results. | |
| 299 | |
| 300 Spectral filtering is simply filtering both the library and query spectra to be search against (e.g. choosing | |
| 301 the library source, instrument, retention time, precursor PPM tolerance etc). | |
| 302 | |
| 303 The spectral alignment stage involves aligning the query peaks to the library peaks. The approach used is similar | |
| 304 to modified pMatch algorithm described in Zhou et al 2015. | |
| 305 | |
| 306 The spectral matching of the aligned spectra is performed against a combined intensity and m/z weighted vector - created for both | |
| 307 the query and library spectra (wq and wl). See below: | |
| 308 | |
| 309 .. math:: | |
| 310 | |
| 311 w=intensity^x \cdot mz^y | |
| 312 | |
| 313 | |
| 314 Where x and y represent weight factors and can be adjusted with the parameters raW and mzW. | |
| 315 Defaults to x=0.5 and y=2 as per MassBank for ESI based mass spectrometry data. | |
| 316 | |
| 317 The aligned weighted vectors are then matched using dot product cosine, reverse dot product cosine and the composite dot product. | |
| 318 See below for dot product cosine equation. | |
| 319 | |
| 320 .. math:: | |
| 321 | |
| 322 dpc = \frac{ w_q \cdot w_l } { \sqrt{Σ{w_{q}{}^2} } \cdot \sqrt{Σ{w_{l}{}^2}}} | |
| 323 | |
| 324 | |
| 325 Full details of the matching approaches are described in the msPurity_spectral_matching_vignette_ | |
| 326 | |
| 327 -------------------------------------------- | |
| 328 Example LC-MS/MS processing workflow | |
| 329 -------------------------------------------- | |
| 330 | |
| 331 * Purity assessments | |
| 332 + (mzML files) -> purityA -> (pa) | |
| 333 * XCMS processing | |
| 334 + (mzML files) -> xcms.xcmsSet -> xcms.merge -> xcms.group -> xcms.retcor -> xcms.group -> (xset) | |
| 335 * Fragmentation processing | |
| 336 + (xset, pa) -> frag4feature -> filterFragSpectra -> averageAllFragSpectra -> createDatabase -> **spectralMatching** -> (sqlite spectral database) | |
| 337 | |
| 338 ----------- | |
| 339 Output | |
| 340 ----------- | |
| 341 | |
| 342 **Database** | |
| 343 | |
| 344 The updated query database (this will have been updated with the annotation results if updateDb argument used) | |
| 345 | |
| 346 **matchedResults** | |
| 347 | |
| 348 All matched results from the query spectra to the library spectra. Contains the following columns | |
| 349 | |
| 350 * dpc - dot product cosine of the match | |
| 351 * rdpc - reverse dot product cosine of the match | |
| 352 * cdpc - composite dot product cosine of the match | |
| 353 * mcount - number of matching peaks | |
| 354 * allcount - total number of peaks across both query and library spectra | |
| 355 * mpercent - percentage of matching peaks across both query and library spectra | |
| 356 * accession - accession of library match | |
| 357 * name - name of library match | |
| 358 * inchikey - inchikey of library match | |
| 359 * lpid - pid in database of library match | |
| 360 * qpid - pid in database of query match | |
| 361 * mid - id of the match | |
| 362 | |
| 363 **xcmsMatchedResults** | |
| 364 | |
| 365 If the qeury spectra had XCMS based chromotographic peaks tables (e.g c_peak_groups, c_peaks) in the sqlite database - it will | |
| 366 be possible to summarise the matches for each XCMS grouped feature. The dataframe contains the following columns | |
| 367 | |
| 368 * pid - pid in database of query match | |
| 369 * grpid - grpid of the XCMS grouped feature for query match | |
| 370 * mz - derived from XCMS grouped feature | |
| 371 * mzmin - derived from XCMS grouped feature | |
| 372 * mzmax - derived from XCMS grouped feature | |
| 373 * rt - derived from XCMS grouped feature | |
| 374 * rtmin - derived from XCMS grouped feature | |
| 375 * rtmax - derived from XCMS grouped feature | |
| 376 * npeaks - derived from XCMS grouped feature | |
| 377 * grp_name - derived from XCMS grouped feature | |
| 378 * dpc - dot product cosine of the match | |
| 379 * rdpc - reverse dot product cosine of the match | |
| 380 * cdpc - composite dot product cosine of the match | |
| 381 * mcount - number of matching peaks | |
| 382 * allcount - total number of peaks across both query and library spectra | |
| 383 * mpercent - percentage of matching peaks across both query and library spectra | |
| 384 * accession - accession of library match | |
| 385 * name - name of library match | |
| 386 * inchikey - inchikey of library match | |
| 387 * lpid - pid in database of library match | |
| 388 * mid - id of the match | |
| 389 | |
| 390 | |
| 391 .. _spectral_database_schema: https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-spectral-datatabase-schema.html | |
| 392 .. _larger_database: https://github.com/computational-metabolomics/msp2db/releases | |
| 393 .. _msp2db: https://github.com/computational-metabolomics/msp2db/releases | |
| 394 .. _msPurity_spectral_matching_vignette: https://bioconductor.org/packages/release/bioc/vignettes/msPurity/inst/doc/msPurity-lcmsms-data-processing-and-spectral-matching-vignette.html | |
| 395 | |
| 396 ]]></help> | |
| 397 | |
| 398 <expand macro="citations"> </expand> | |
| 399 </tool> |
