comparison spectralMatching.R @ 0:e9f8d23f6923 draft default tip

"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
author computational-metabolomics
date Thu, 04 Mar 2021 12:12:45 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e9f8d23f6923
1 library(msPurity)
2 library(msPurityData)
3 library(optparse)
4 print(sessionInfo())
5 # load in library spectra config
6 source_local <- function(fname) {
7 argv <- commandArgs(trailingOnly = FALSE)
8 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
9 source(paste(base_dir, fname, sep = "/"))
10 }
11 source_local("dbconfig.R")
12
13 option_list <- list(
14 make_option(c("-o", "--outDir"), type = "character"),
15 make_option("--q_dbPth", type = "character"),
16 make_option("--l_dbPth", type = "character"),
17
18 make_option("--q_dbType", type = "character", default = NA),
19 make_option("--l_dbType", type = "character", default = NA),
20
21 make_option("--q_msp", type = "character", default = NA),
22 make_option("--l_msp", type = "character", default = NA),
23
24 make_option("--q_defaultDb", action = "store_true"),
25 make_option("--l_defaultDb", action = "store_true"),
26
27 make_option("--q_ppmPrec", type = "double"),
28 make_option("--l_ppmPrec", type = "double"),
29
30 make_option("--q_ppmProd", type = "double"),
31 make_option("--l_ppmProd", type = "double"),
32
33 make_option("--q_raThres", type = "double", default = NA),
34 make_option("--l_raThres", type = "double", default = NA),
35
36 make_option("--q_polarity", type = "character", default = NA),
37 make_option("--l_polarity", type = "character", default = NA),
38
39 make_option("--q_purity", type = "double", default = NA),
40 make_option("--l_purity", type = "double", default = NA),
41
42 make_option("--q_xcmsGroups", type = "character", default = NA),
43 make_option("--l_xcmsGroups", type = "character", default = NA),
44
45 make_option("--q_pids", type = "character", default = NA),
46 make_option("--l_pids", type = "character", default = NA),
47
48 make_option("--q_rtrangeMin", type = "double", default = NA),
49 make_option("--l_rtrangeMin", type = "double", default = NA),
50
51 make_option("--q_rtrangeMax", type = "double", default = NA),
52 make_option("--l_rtrangeMax", type = "double", default = NA),
53
54 make_option("--q_accessions", type = "character", default = NA),
55 make_option("--l_accessions", type = "character", default = NA),
56
57 make_option("--q_sources", type = "character", default = NA),
58 make_option("--l_sources", type = "character", default = NA),
59
60 make_option("--q_sourcesUser", type = "character", default = NA),
61 make_option("--l_sourcesUser", type = "character", default = NA),
62
63 make_option("--q_instrumentTypes", type = "character", default = NA),
64 make_option("--l_instrumentTypes", type = "character", default = NA),
65
66 make_option("--q_instrumentTypesUser", type = "character", default = NA),
67 make_option("--l_instrumentTypesUser", type = "character", default = NA),
68
69 make_option("--q_instruments", type = "character", default = NA),
70 make_option("--l_instruments", type = "character", default = NA),
71
72 make_option("--q_spectraTypes", type = "character", default = NA),
73 make_option("--l_spectraTypes", type = "character", default = NA),
74
75 make_option("--q_spectraFilter", action = "store_true"),
76 make_option("--l_spectraFilter", action = "store_true"),
77
78 make_option("--usePrecursors", action = "store_true"),
79
80 make_option("--mzW", type = "double"),
81 make_option("--raW", type = "double"),
82
83 make_option("--rttol", type = "double", default = NA),
84
85 make_option("--updateDb", action = "store_true"),
86 make_option("--copyDb", action = "store_true"),
87 make_option("--cores", default = 1)
88
89
90 )
91
92 # store options
93 opt <- parse_args(OptionParser(option_list = option_list))
94
95 print(opt)
96
97 # check if the sqlite databases have any spectra
98 checkSPeakMeta <- function(dbPth, nme) {
99 if (is.null(dbPth)) {
100 return(TRUE)
101 }else if ((file.exists(dbPth)) & (file.info(dbPth)$size > 0)) {
102 con <- DBI::dbConnect(RSQLite::SQLite(), dbPth)
103 if (DBI::dbExistsTable(con, "s_peak_meta")) {
104 spm <- DBI::dbGetQuery(con, "SELECT * FROM s_peak_meta ORDER BY ROWID ASC LIMIT 1")
105 return(TRUE)
106 }else if (DBI::dbExistsTable(con, "library_spectra_meta")) {
107 spm <- DBI::dbGetQuery(con, "SELECT * FROM library_spectra_meta ORDER BY ROWID ASC LIMIT 1")
108 return(TRUE)
109 }else{
110 print(paste("No spectra available for ", nme))
111 return(FALSE)
112 }
113 }else{
114 print(paste("file empty or does not exist for", nme))
115 return(FALSE)
116 }
117
118
119 }
120
121
122 addQueryNameColumn <- function(sm) {
123 if (is.null(sm$matchedResults) || length(sm$matchedResults) == 1 || nrow(sm$matchedResults) == 0) {
124 return(sm)
125 }
126
127 con <- DBI::dbConnect(RSQLite::SQLite(), sm$q_dbPth)
128 if (DBI::dbExistsTable(con, "s_peak_meta")) {
129 spm <- DBI::dbGetQuery(con, "SELECT pid, name AS query_entry_name FROM s_peak_meta")
130 }else if (DBI::dbExistsTable(con, "library_spectra_meta")) {
131 spm <- DBI::dbGetQuery(con, "SELECT id AS pid, name AS query_entry_name FROM library_spectra_meta")
132 }
133 print(sm$matchedResults)
134 if ("pid" %in% colnames(sm$matchedResults)) {
135 sm$matchedResults <- merge(sm$matchedResults, spm, by.x = "pid", by.y = "pid")
136 }else{
137 sm$matchedResults <- merge(sm$matchedResults, spm, by.x = "qpid", by.y = "pid")
138 }
139
140 print(sm$xcmsMatchedResults)
141 if (is.null(sm$xcmsMatchedResults) || length(sm$xcmsMatchedResults) == 1 || nrow(sm$xcmsMatchedResults) == 0) {
142 return(sm)
143 }else{
144 if ("pid" %in% colnames(sm$xcmsMatchedResults)) {
145 sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x = "pid", by.y = "pid")
146 }else{
147 sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x = "qpid", by.y = "pid")
148 }
149 }
150
151 return(sm)
152
153 }
154
155
156 updateDbF <- function(q_con, l_con) {
157 message("Adding extra details to database")
158 q_con <- DBI::dbConnect(RSQLite::SQLite(), sm$q_dbPth)
159 if (DBI::dbExistsTable(q_con, "l_s_peak_meta")) {
160 l_s_peak_meta <- DBI::dbGetQuery(q_con, "SELECT * FROM l_s_peak_meta")
161 colnames(l_s_peak_meta)[1] <- "pid"
162 }
163
164 l_con <- DBI::dbConnect(RSQLite::SQLite(), l_dbPth)
165 if (DBI::dbExistsTable(l_con, "s_peaks")) {
166 l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse = ",")))
167
168 }else if (DBI::dbExistsTable(l_con, "library_spectra")) {
169 l_s_peaks <- DBI::dbGetQuery(l_con, sprintf("SELECT * FROM library_spectra
170 WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse = ",")))
171 }else{
172 l_s_peaks <- NULL
173 }
174
175 if (DBI::dbExistsTable(l_con, "source")) {
176 l_source <- DBI::dbGetQuery(l_con, "SELECT * FROM source")
177 }else if (DBI::dbExistsTable(l_con, "library_spectra_source")) {
178 l_source <- DBI::dbGetQuery(l_con, "SELECT * FROM library_spectra_source")
179 }else{
180 l_source <- NULL
181 }
182
183 if (!is.null(l_s_peaks)) {
184 DBI::dbWriteTable(q_con, name = "l_s_peaks", value = l_s_peaks, row.names = FALSE, append = TRUE)
185 }
186
187 if (!is.null(l_source)) {
188 DBI::dbWriteTable(q_con, name = "l_source", value = l_source, row.names = FALSE, append = TRUE)
189 }
190
191 }
192
193
194 extractMultiple <- function(optParam) {
195 if (!is.na(optParam)) {
196 param <- trimws(strsplit(optParam, ",")[[1]])
197 param <- param[param != ""]
198 }else {
199 param <- NA
200 }
201 return(param)
202
203 }
204
205 if (!is.null(opt$q_defaultDb)) {
206 q_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package = "msPurityData")
207 q_dbType <- "sqlite"
208 }else if (!opt$q_dbType == "local_config") {
209 q_dbType <- opt$q_dbType
210 q_dbPth <- opt$q_dbPth
211 }
212
213 if (!is.null(opt$l_defaultDb)) {
214 l_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package = "msPurityData")
215 l_dbType <- "sqlite"
216 }else if (!opt$l_dbType == "local_config") {
217 l_dbType <- opt$l_dbType
218 l_dbPth <- opt$l_dbPth
219 }
220
221 q_spectraTypes <- extractMultiple(opt$q_spectraTypes)
222 l_spectraTypes <- extractMultiple(opt$l_spectraTypes)
223
224 q_polarity <- extractMultiple(opt$q_polarity)
225 l_polarity <- extractMultiple(opt$l_polarity)
226
227 q_xcmsGroups <- extractMultiple(opt$q_xcmsGroups)
228 l_xcmsGroups <- extractMultiple(opt$l_xcmsGroups)
229
230 q_pids <- extractMultiple(opt$q_pids)
231 l_pids <- extractMultiple(opt$l_pids)
232
233 q_sources <- extractMultiple(opt$q_sources)
234 l_sources <- extractMultiple(opt$l_sources)
235
236 q_sourcesUser <- extractMultiple(opt$q_sourcesUser)
237 l_sourcesUser <- extractMultiple(opt$l_sourcesUser)
238
239 q_sources <- c(q_sources, q_sourcesUser)
240 l_sources <- c(l_sources, l_sourcesUser)
241
242 q_instrumentTypes <- extractMultiple(opt$q_instrumentTypes)
243 l_instrumentTypes <- extractMultiple(opt$l_instrumentTypes)
244
245 q_instrumentTypes <- c(q_instrumentTypes, q_instrumentTypes)
246 l_instrumentTypes <- c(l_instrumentTypes, l_instrumentTypes)
247
248
249 if (!is.null(opt$l_spectraFilter)) {
250 l_spectraFilter <- TRUE
251 }else{
252 l_spectraFilter <- FALSE
253 }
254
255 if (!is.null(opt$q_spectraFilter)) {
256 q_spectraFilter <- TRUE
257 }else{
258 q_spectraFilter <- FALSE
259 }
260
261 if (!is.null(opt$updateDb)) {
262 updateDb <- TRUE
263 }else{
264 updateDb <- FALSE
265 }
266
267 if (!is.null(opt$copyDb)) {
268 copyDb <- TRUE
269 }else{
270 copyDb <- FALSE
271 }
272
273 if (!is.null(opt$l_rtrangeMax)) {
274 l_rtrangeMax <- opt$l_rtrangeMax
275 }else{
276 l_rtrangeMax <- NA
277 }
278
279 if (!is.null(opt$q_rtrangeMax)) {
280 q_rtrangeMax <- opt$q_rtrangeMax
281 }else{
282 q_rtrangeMax <- NA
283 }
284
285 if (!is.null(opt$l_rtrangeMin)) {
286 l_rtrangeMin <- opt$l_rtrangeMin
287 }else{
288 l_rtrangeMin <- NA
289 }
290
291 if (!is.null(opt$q_rtrangeMin)) {
292 q_rtrangeMin <- opt$q_rtrangeMin
293 }else{
294 q_rtrangeMin <- NA
295 }
296
297 q_check <- checkSPeakMeta(opt$q_dbPth, "query")
298 l_check <- checkSPeakMeta(opt$l_dbPth, "library")
299
300
301 if (q_check && l_check) {
302 sm <- msPurity::spectralMatching(
303 q_purity = opt$q_purity,
304 l_purity = opt$l_purity,
305
306 q_ppmProd = opt$q_ppmProd,
307 l_ppmProd = opt$l_ppmProd,
308
309 q_ppmPrec = opt$q_ppmPrec,
310 l_ppmPrec = opt$l_ppmPrec,
311
312 q_raThres = opt$q_raThres,
313 l_raThres = opt$l_raThres,
314
315 q_pol = q_polarity,
316 l_pol = l_polarity,
317
318 q_spectraFilter = q_spectraFilter,
319 l_spectraFilter = l_spectraFilter,
320
321 q_xcmsGroups = q_xcmsGroups,
322 l_xcmsGroups = l_xcmsGroups,
323
324 q_pids = q_pids,
325 l_pids = l_pids,
326
327 q_sources = q_sources,
328 l_sources = l_sources,
329
330 q_instrumentTypes = q_instrumentTypes,
331 l_instrumentTypes = l_instrumentTypes,
332
333 q_spectraTypes = q_spectraTypes,
334 l_spectraTypes = l_spectraTypes,
335
336 l_rtrange = c(l_rtrangeMin, l_rtrangeMax),
337 q_rtrange = c(q_rtrangeMin, q_rtrangeMax),
338
339 q_accessions = opt$q_accessions,
340 l_accessions = opt$l_accessions,
341
342 raW = opt$raW,
343 mzW = opt$mzW,
344 rttol = opt$rttol,
345 cores = opt$cores,
346
347 copyDb = copyDb,
348 updateDb = updateDb,
349 outPth = "db_with_spectral_matching.sqlite",
350
351 q_dbPth = q_dbPth,
352 q_dbType = q_dbType,
353 q_dbName = q_dbName,
354 q_dbHost = q_dbHost,
355 q_dbUser = q_dbUser,
356 q_dbPass = q_dbPass,
357 q_dbPort = q_dbPort,
358
359 l_dbPth = l_dbPth,
360 l_dbType = l_dbType,
361 l_dbName = l_dbName,
362 l_dbHost = l_dbHost,
363 l_dbUser = l_dbUser,
364 l_dbPass = l_dbPass,
365 l_dbPort = l_dbPort
366
367 )
368
369 sm <- addQueryNameColumn(sm)
370 # Get name of the query results (and merged with the data frames)
371 write.table(sm$matchedResults, "matched_results.tsv", sep = "\t", row.names = FALSE, col.names = TRUE)
372 write.table(sm$xcmsMatchedResults, "xcms_matched_results.tsv", sep = "\t", row.names = FALSE, col.names = TRUE)
373
374 if (updateDb) {
375 updateDbF(q_con, l_con)
376 }
377 }