Mercurial > repos > computational-metabolomics > mspurity_frag4feature
comparison dimsPredictPuritySingle.R @ 0:85bc606fd219 draft default tip
"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
author | computational-metabolomics |
---|---|
date | Thu, 04 Mar 2021 12:22:55 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:85bc606fd219 |
---|---|
1 library(msPurity) | |
2 library(optparse) | |
3 print(sessionInfo()) | |
4 | |
5 option_list <- list( | |
6 make_option(c("--mzML_file"), type = "character"), | |
7 make_option(c("--mzML_files"), type = "character"), | |
8 make_option(c("--mzML_filename"), type = "character", default = ""), | |
9 make_option(c("--mzML_galaxy_names"), type = "character", default = ""), | |
10 make_option(c("--peaks_file"), type = "character"), | |
11 make_option(c("-o", "--out_dir"), type = "character"), | |
12 make_option("--minoffset", default = 0.5), | |
13 make_option("--maxoffset", default = 0.5), | |
14 make_option("--ilim", default = 0.05), | |
15 make_option("--ppm", default = 4), | |
16 make_option("--dimspy", action = "store_true"), | |
17 make_option("--sim", action = "store_true"), | |
18 make_option("--remove_nas", action = "store_true"), | |
19 make_option("--iwNorm", default = "none", type = "character"), | |
20 make_option("--file_num_dimspy", default = 1), | |
21 make_option("--exclude_isotopes", action = "store_true"), | |
22 make_option("--isotope_matrix", type = "character") | |
23 ) | |
24 | |
25 # store options | |
26 opt <- parse_args(OptionParser(option_list = option_list)) | |
27 | |
28 print(sessionInfo()) | |
29 print(opt) | |
30 | |
31 print(opt$mzML_files) | |
32 print(opt$mzML_galaxy_names) | |
33 | |
34 str_to_vec <- function(x) { | |
35 print(x) | |
36 x <- trimws(strsplit(x, ",")[[1]]) | |
37 return(x[x != ""]) | |
38 } | |
39 | |
40 find_mzml_file <- function(mzML_files, galaxy_names, mzML_filename) { | |
41 mzML_filename <- trimws(mzML_filename) | |
42 mzML_files <- str_to_vec(mzML_files) | |
43 galaxy_names <- str_to_vec(galaxy_names) | |
44 if (mzML_filename %in% galaxy_names) { | |
45 return(mzML_files[galaxy_names == mzML_filename]) | |
46 }else{ | |
47 stop(paste("mzML file not found - ", mzML_filename)) | |
48 } | |
49 } | |
50 | |
51 | |
52 if (is.null(opt$dimspy)) { | |
53 df <- read.table(opt$peaks_file, header = TRUE, sep = "\t") | |
54 if (file.exists(opt$mzML_file)) { | |
55 mzML_file <- opt$mzML_file | |
56 }else if (!is.null(opt$mzML_files)) { | |
57 mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, | |
58 opt$mzML_filename) | |
59 }else{ | |
60 mzML_file <- file.path(opt$mzML_file, filename) | |
61 } | |
62 }else{ | |
63 indf <- read.table(opt$peaks_file, | |
64 header = TRUE, sep = "\t", stringsAsFactors = FALSE) | |
65 | |
66 filename <- colnames(indf)[8:ncol(indf)][opt$file_num_dimspy] | |
67 print(filename) | |
68 # check if the data file is mzML or RAW (can only use mzML currently) so | |
69 # we expect an mzML file of the same name in the same folder | |
70 indf$i <- indf[, colnames(indf) == filename] | |
71 indf[, colnames(indf) == filename] <- as.numeric(indf[, colnames(indf) == filename]) | |
72 | |
73 filename <- sub("raw", "mzML", filename, ignore.case = TRUE) | |
74 print(filename) | |
75 | |
76 if (file.exists(opt$mzML_file)) { | |
77 mzML_file <- opt$mzML_file | |
78 }else if (!is.null(opt$mzML_files)) { | |
79 mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, filename) | |
80 }else{ | |
81 mzML_file <- file.path(opt$mzML_file, filename) | |
82 } | |
83 | |
84 # Update the dimspy output with the correct information | |
85 df <- indf[4:nrow(indf), ] | |
86 if ("blank_flag" %in% colnames(df)) { | |
87 df <- df[df$blank_flag == 1, ] | |
88 } | |
89 colnames(df)[colnames(df) == "m.z"] <- "mz" | |
90 | |
91 if ("nan" %in% df$mz) { | |
92 df[df$mz == "nan", ]$mz <- NA | |
93 } | |
94 df$mz <- as.numeric(df$mz) | |
95 } | |
96 | |
97 if (!is.null(opt$remove_nas)) { | |
98 df <- df[!is.na(df$mz), ] | |
99 } | |
100 | |
101 if (is.null(opt$isotope_matrix)) { | |
102 im <- NULL | |
103 }else{ | |
104 im <- read.table(opt$isotope_matrix, | |
105 header = TRUE, sep = "\t", stringsAsFactors = FALSE) | |
106 } | |
107 | |
108 if (is.null(opt$exclude_isotopes)) { | |
109 isotopes <- FALSE | |
110 }else{ | |
111 isotopes <- TRUE | |
112 } | |
113 | |
114 if (is.null(opt$sim)) { | |
115 sim <- FALSE | |
116 }else{ | |
117 sim <- TRUE | |
118 } | |
119 | |
120 minOffset <- as.numeric(opt$minoffset) | |
121 maxOffset <- as.numeric(opt$maxoffset) | |
122 | |
123 if (opt$iwNorm == "none") { | |
124 iwNorm <- FALSE | |
125 iwNormFun <- NULL | |
126 }else if (opt$iwNorm == "gauss") { | |
127 iwNorm <- TRUE | |
128 iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset) | |
129 }else if (opt$iwNorm == "rcosine") { | |
130 iwNorm <- TRUE | |
131 iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset) | |
132 }else if (opt$iwNorm == "QE5") { | |
133 iwNorm <- TRUE | |
134 iwNormFun <- msPurity::iwNormQE.5() | |
135 } | |
136 | |
137 print("FIRST ROWS OF PEAK FILE") | |
138 print(head(df)) | |
139 print(mzML_file) | |
140 predicted <- msPurity::dimsPredictPuritySingle(df$mz, | |
141 filepth = mzML_file, | |
142 minOffset = minOffset, | |
143 maxOffset = maxOffset, | |
144 ppm = opt$ppm, | |
145 mzML = TRUE, | |
146 sim = sim, | |
147 ilim = opt$ilim, | |
148 isotopes = isotopes, | |
149 im = im, | |
150 iwNorm = iwNorm, | |
151 iwNormFun = iwNormFun | |
152 ) | |
153 predicted <- cbind(df, predicted) | |
154 | |
155 print(head(predicted)) | |
156 print(file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv")) | |
157 | |
158 write.table(predicted, | |
159 file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv"), | |
160 row.names = FALSE, sep = "\t") |