Mercurial > repos > computational-metabolomics > mspurity_createdatabase
comparison spectralMatching.R @ 0:7dc0cde206d8 draft default tip
"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
author | computational-metabolomics |
---|---|
date | Thu, 04 Mar 2021 12:14:02 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7dc0cde206d8 |
---|---|
1 library(msPurity) | |
2 library(msPurityData) | |
3 library(optparse) | |
4 print(sessionInfo()) | |
5 # load in library spectra config | |
6 source_local <- function(fname) { | |
7 argv <- commandArgs(trailingOnly = FALSE) | |
8 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)) | |
9 source(paste(base_dir, fname, sep = "/")) | |
10 } | |
11 source_local("dbconfig.R") | |
12 | |
13 option_list <- list( | |
14 make_option(c("-o", "--outDir"), type = "character"), | |
15 make_option("--q_dbPth", type = "character"), | |
16 make_option("--l_dbPth", type = "character"), | |
17 | |
18 make_option("--q_dbType", type = "character", default = NA), | |
19 make_option("--l_dbType", type = "character", default = NA), | |
20 | |
21 make_option("--q_msp", type = "character", default = NA), | |
22 make_option("--l_msp", type = "character", default = NA), | |
23 | |
24 make_option("--q_defaultDb", action = "store_true"), | |
25 make_option("--l_defaultDb", action = "store_true"), | |
26 | |
27 make_option("--q_ppmPrec", type = "double"), | |
28 make_option("--l_ppmPrec", type = "double"), | |
29 | |
30 make_option("--q_ppmProd", type = "double"), | |
31 make_option("--l_ppmProd", type = "double"), | |
32 | |
33 make_option("--q_raThres", type = "double", default = NA), | |
34 make_option("--l_raThres", type = "double", default = NA), | |
35 | |
36 make_option("--q_polarity", type = "character", default = NA), | |
37 make_option("--l_polarity", type = "character", default = NA), | |
38 | |
39 make_option("--q_purity", type = "double", default = NA), | |
40 make_option("--l_purity", type = "double", default = NA), | |
41 | |
42 make_option("--q_xcmsGroups", type = "character", default = NA), | |
43 make_option("--l_xcmsGroups", type = "character", default = NA), | |
44 | |
45 make_option("--q_pids", type = "character", default = NA), | |
46 make_option("--l_pids", type = "character", default = NA), | |
47 | |
48 make_option("--q_rtrangeMin", type = "double", default = NA), | |
49 make_option("--l_rtrangeMin", type = "double", default = NA), | |
50 | |
51 make_option("--q_rtrangeMax", type = "double", default = NA), | |
52 make_option("--l_rtrangeMax", type = "double", default = NA), | |
53 | |
54 make_option("--q_accessions", type = "character", default = NA), | |
55 make_option("--l_accessions", type = "character", default = NA), | |
56 | |
57 make_option("--q_sources", type = "character", default = NA), | |
58 make_option("--l_sources", type = "character", default = NA), | |
59 | |
60 make_option("--q_sourcesUser", type = "character", default = NA), | |
61 make_option("--l_sourcesUser", type = "character", default = NA), | |
62 | |
63 make_option("--q_instrumentTypes", type = "character", default = NA), | |
64 make_option("--l_instrumentTypes", type = "character", default = NA), | |
65 | |
66 make_option("--q_instrumentTypesUser", type = "character", default = NA), | |
67 make_option("--l_instrumentTypesUser", type = "character", default = NA), | |
68 | |
69 make_option("--q_instruments", type = "character", default = NA), | |
70 make_option("--l_instruments", type = "character", default = NA), | |
71 | |
72 make_option("--q_spectraTypes", type = "character", default = NA), | |
73 make_option("--l_spectraTypes", type = "character", default = NA), | |
74 | |
75 make_option("--q_spectraFilter", action = "store_true"), | |
76 make_option("--l_spectraFilter", action = "store_true"), | |
77 | |
78 make_option("--usePrecursors", action = "store_true"), | |
79 | |
80 make_option("--mzW", type = "double"), | |
81 make_option("--raW", type = "double"), | |
82 | |
83 make_option("--rttol", type = "double", default = NA), | |
84 | |
85 make_option("--updateDb", action = "store_true"), | |
86 make_option("--copyDb", action = "store_true"), | |
87 make_option("--cores", default = 1) | |
88 | |
89 | |
90 ) | |
91 | |
92 # store options | |
93 opt <- parse_args(OptionParser(option_list = option_list)) | |
94 | |
95 print(opt) | |
96 | |
97 # check if the sqlite databases have any spectra | |
98 checkSPeakMeta <- function(dbPth, nme) { | |
99 if (is.null(dbPth)) { | |
100 return(TRUE) | |
101 }else if ((file.exists(dbPth)) & (file.info(dbPth)$size > 0)) { | |
102 con <- DBI::dbConnect(RSQLite::SQLite(), dbPth) | |
103 if (DBI::dbExistsTable(con, "s_peak_meta")) { | |
104 spm <- DBI::dbGetQuery(con, "SELECT * FROM s_peak_meta ORDER BY ROWID ASC LIMIT 1") | |
105 return(TRUE) | |
106 }else if (DBI::dbExistsTable(con, "library_spectra_meta")) { | |
107 spm <- DBI::dbGetQuery(con, "SELECT * FROM library_spectra_meta ORDER BY ROWID ASC LIMIT 1") | |
108 return(TRUE) | |
109 }else{ | |
110 print(paste("No spectra available for ", nme)) | |
111 return(FALSE) | |
112 } | |
113 }else{ | |
114 print(paste("file empty or does not exist for", nme)) | |
115 return(FALSE) | |
116 } | |
117 | |
118 | |
119 } | |
120 | |
121 | |
122 addQueryNameColumn <- function(sm) { | |
123 if (is.null(sm$matchedResults) || length(sm$matchedResults) == 1 || nrow(sm$matchedResults) == 0) { | |
124 return(sm) | |
125 } | |
126 | |
127 con <- DBI::dbConnect(RSQLite::SQLite(), sm$q_dbPth) | |
128 if (DBI::dbExistsTable(con, "s_peak_meta")) { | |
129 spm <- DBI::dbGetQuery(con, "SELECT pid, name AS query_entry_name FROM s_peak_meta") | |
130 }else if (DBI::dbExistsTable(con, "library_spectra_meta")) { | |
131 spm <- DBI::dbGetQuery(con, "SELECT id AS pid, name AS query_entry_name FROM library_spectra_meta") | |
132 } | |
133 print(sm$matchedResults) | |
134 if ("pid" %in% colnames(sm$matchedResults)) { | |
135 sm$matchedResults <- merge(sm$matchedResults, spm, by.x = "pid", by.y = "pid") | |
136 }else{ | |
137 sm$matchedResults <- merge(sm$matchedResults, spm, by.x = "qpid", by.y = "pid") | |
138 } | |
139 | |
140 print(sm$xcmsMatchedResults) | |
141 if (is.null(sm$xcmsMatchedResults) || length(sm$xcmsMatchedResults) == 1 || nrow(sm$xcmsMatchedResults) == 0) { | |
142 return(sm) | |
143 }else{ | |
144 if ("pid" %in% colnames(sm$xcmsMatchedResults)) { | |
145 sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x = "pid", by.y = "pid") | |
146 }else{ | |
147 sm$xcmsMatchedResults <- merge(sm$xcmsMatchedResults, spm, by.x = "qpid", by.y = "pid") | |
148 } | |
149 } | |
150 | |
151 return(sm) | |
152 | |
153 } | |
154 | |
155 | |
156 updateDbF <- function(q_con, l_con) { | |
157 message("Adding extra details to database") | |
158 q_con <- DBI::dbConnect(RSQLite::SQLite(), sm$q_dbPth) | |
159 if (DBI::dbExistsTable(q_con, "l_s_peak_meta")) { | |
160 l_s_peak_meta <- DBI::dbGetQuery(q_con, "SELECT * FROM l_s_peak_meta") | |
161 colnames(l_s_peak_meta)[1] <- "pid" | |
162 } | |
163 | |
164 l_con <- DBI::dbConnect(RSQLite::SQLite(), l_dbPth) | |
165 if (DBI::dbExistsTable(l_con, "s_peaks")) { | |
166 l_s_peaks <- DBI::dbGetQuery(q_con, sprintf("SELECT * FROM s_peaks WHERE pid in (%s)", paste(unique(l_s_peak_meta$pid), collapse = ","))) | |
167 | |
168 }else if (DBI::dbExistsTable(l_con, "library_spectra")) { | |
169 l_s_peaks <- DBI::dbGetQuery(l_con, sprintf("SELECT * FROM library_spectra | |
170 WHERE library_spectra_meta_id in (%s)", paste(unique(l_s_peak_meta$pid), collapse = ","))) | |
171 }else{ | |
172 l_s_peaks <- NULL | |
173 } | |
174 | |
175 if (DBI::dbExistsTable(l_con, "source")) { | |
176 l_source <- DBI::dbGetQuery(l_con, "SELECT * FROM source") | |
177 }else if (DBI::dbExistsTable(l_con, "library_spectra_source")) { | |
178 l_source <- DBI::dbGetQuery(l_con, "SELECT * FROM library_spectra_source") | |
179 }else{ | |
180 l_source <- NULL | |
181 } | |
182 | |
183 if (!is.null(l_s_peaks)) { | |
184 DBI::dbWriteTable(q_con, name = "l_s_peaks", value = l_s_peaks, row.names = FALSE, append = TRUE) | |
185 } | |
186 | |
187 if (!is.null(l_source)) { | |
188 DBI::dbWriteTable(q_con, name = "l_source", value = l_source, row.names = FALSE, append = TRUE) | |
189 } | |
190 | |
191 } | |
192 | |
193 | |
194 extractMultiple <- function(optParam) { | |
195 if (!is.na(optParam)) { | |
196 param <- trimws(strsplit(optParam, ",")[[1]]) | |
197 param <- param[param != ""] | |
198 }else { | |
199 param <- NA | |
200 } | |
201 return(param) | |
202 | |
203 } | |
204 | |
205 if (!is.null(opt$q_defaultDb)) { | |
206 q_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package = "msPurityData") | |
207 q_dbType <- "sqlite" | |
208 }else if (!opt$q_dbType == "local_config") { | |
209 q_dbType <- opt$q_dbType | |
210 q_dbPth <- opt$q_dbPth | |
211 } | |
212 | |
213 if (!is.null(opt$l_defaultDb)) { | |
214 l_dbPth <- system.file("extdata", "library_spectra", "library_spectra.db", package = "msPurityData") | |
215 l_dbType <- "sqlite" | |
216 }else if (!opt$l_dbType == "local_config") { | |
217 l_dbType <- opt$l_dbType | |
218 l_dbPth <- opt$l_dbPth | |
219 } | |
220 | |
221 q_spectraTypes <- extractMultiple(opt$q_spectraTypes) | |
222 l_spectraTypes <- extractMultiple(opt$l_spectraTypes) | |
223 | |
224 q_polarity <- extractMultiple(opt$q_polarity) | |
225 l_polarity <- extractMultiple(opt$l_polarity) | |
226 | |
227 q_xcmsGroups <- extractMultiple(opt$q_xcmsGroups) | |
228 l_xcmsGroups <- extractMultiple(opt$l_xcmsGroups) | |
229 | |
230 q_pids <- extractMultiple(opt$q_pids) | |
231 l_pids <- extractMultiple(opt$l_pids) | |
232 | |
233 q_sources <- extractMultiple(opt$q_sources) | |
234 l_sources <- extractMultiple(opt$l_sources) | |
235 | |
236 q_sourcesUser <- extractMultiple(opt$q_sourcesUser) | |
237 l_sourcesUser <- extractMultiple(opt$l_sourcesUser) | |
238 | |
239 q_sources <- c(q_sources, q_sourcesUser) | |
240 l_sources <- c(l_sources, l_sourcesUser) | |
241 | |
242 q_instrumentTypes <- extractMultiple(opt$q_instrumentTypes) | |
243 l_instrumentTypes <- extractMultiple(opt$l_instrumentTypes) | |
244 | |
245 q_instrumentTypes <- c(q_instrumentTypes, q_instrumentTypes) | |
246 l_instrumentTypes <- c(l_instrumentTypes, l_instrumentTypes) | |
247 | |
248 | |
249 if (!is.null(opt$l_spectraFilter)) { | |
250 l_spectraFilter <- TRUE | |
251 }else{ | |
252 l_spectraFilter <- FALSE | |
253 } | |
254 | |
255 if (!is.null(opt$q_spectraFilter)) { | |
256 q_spectraFilter <- TRUE | |
257 }else{ | |
258 q_spectraFilter <- FALSE | |
259 } | |
260 | |
261 if (!is.null(opt$updateDb)) { | |
262 updateDb <- TRUE | |
263 }else{ | |
264 updateDb <- FALSE | |
265 } | |
266 | |
267 if (!is.null(opt$copyDb)) { | |
268 copyDb <- TRUE | |
269 }else{ | |
270 copyDb <- FALSE | |
271 } | |
272 | |
273 if (!is.null(opt$l_rtrangeMax)) { | |
274 l_rtrangeMax <- opt$l_rtrangeMax | |
275 }else{ | |
276 l_rtrangeMax <- NA | |
277 } | |
278 | |
279 if (!is.null(opt$q_rtrangeMax)) { | |
280 q_rtrangeMax <- opt$q_rtrangeMax | |
281 }else{ | |
282 q_rtrangeMax <- NA | |
283 } | |
284 | |
285 if (!is.null(opt$l_rtrangeMin)) { | |
286 l_rtrangeMin <- opt$l_rtrangeMin | |
287 }else{ | |
288 l_rtrangeMin <- NA | |
289 } | |
290 | |
291 if (!is.null(opt$q_rtrangeMin)) { | |
292 q_rtrangeMin <- opt$q_rtrangeMin | |
293 }else{ | |
294 q_rtrangeMin <- NA | |
295 } | |
296 | |
297 q_check <- checkSPeakMeta(opt$q_dbPth, "query") | |
298 l_check <- checkSPeakMeta(opt$l_dbPth, "library") | |
299 | |
300 | |
301 if (q_check && l_check) { | |
302 sm <- msPurity::spectralMatching( | |
303 q_purity = opt$q_purity, | |
304 l_purity = opt$l_purity, | |
305 | |
306 q_ppmProd = opt$q_ppmProd, | |
307 l_ppmProd = opt$l_ppmProd, | |
308 | |
309 q_ppmPrec = opt$q_ppmPrec, | |
310 l_ppmPrec = opt$l_ppmPrec, | |
311 | |
312 q_raThres = opt$q_raThres, | |
313 l_raThres = opt$l_raThres, | |
314 | |
315 q_pol = q_polarity, | |
316 l_pol = l_polarity, | |
317 | |
318 q_spectraFilter = q_spectraFilter, | |
319 l_spectraFilter = l_spectraFilter, | |
320 | |
321 q_xcmsGroups = q_xcmsGroups, | |
322 l_xcmsGroups = l_xcmsGroups, | |
323 | |
324 q_pids = q_pids, | |
325 l_pids = l_pids, | |
326 | |
327 q_sources = q_sources, | |
328 l_sources = l_sources, | |
329 | |
330 q_instrumentTypes = q_instrumentTypes, | |
331 l_instrumentTypes = l_instrumentTypes, | |
332 | |
333 q_spectraTypes = q_spectraTypes, | |
334 l_spectraTypes = l_spectraTypes, | |
335 | |
336 l_rtrange = c(l_rtrangeMin, l_rtrangeMax), | |
337 q_rtrange = c(q_rtrangeMin, q_rtrangeMax), | |
338 | |
339 q_accessions = opt$q_accessions, | |
340 l_accessions = opt$l_accessions, | |
341 | |
342 raW = opt$raW, | |
343 mzW = opt$mzW, | |
344 rttol = opt$rttol, | |
345 cores = opt$cores, | |
346 | |
347 copyDb = copyDb, | |
348 updateDb = updateDb, | |
349 outPth = "db_with_spectral_matching.sqlite", | |
350 | |
351 q_dbPth = q_dbPth, | |
352 q_dbType = q_dbType, | |
353 q_dbName = q_dbName, | |
354 q_dbHost = q_dbHost, | |
355 q_dbUser = q_dbUser, | |
356 q_dbPass = q_dbPass, | |
357 q_dbPort = q_dbPort, | |
358 | |
359 l_dbPth = l_dbPth, | |
360 l_dbType = l_dbType, | |
361 l_dbName = l_dbName, | |
362 l_dbHost = l_dbHost, | |
363 l_dbUser = l_dbUser, | |
364 l_dbPass = l_dbPass, | |
365 l_dbPort = l_dbPort | |
366 | |
367 ) | |
368 | |
369 sm <- addQueryNameColumn(sm) | |
370 # Get name of the query results (and merged with the data frames) | |
371 write.table(sm$matchedResults, "matched_results.tsv", sep = "\t", row.names = FALSE, col.names = TRUE) | |
372 write.table(sm$xcmsMatchedResults, "xcms_matched_results.tsv", sep = "\t", row.names = FALSE, col.names = TRUE) | |
373 | |
374 if (updateDb) { | |
375 updateDbF(q_con, l_con) | |
376 } | |
377 } |