Mercurial > repos > computational-metabolomics > mspurity_flagremove
comparison flagRemove.R @ 0:353d48ece635 draft default tip
"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
author | computational-metabolomics |
---|---|
date | Thu, 04 Mar 2021 12:17:52 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:353d48ece635 |
---|---|
1 library(msPurity) | |
2 library(optparse) | |
3 print(sessionInfo()) | |
4 option_list <- list( | |
5 make_option(c("-o", "--out_dir"), type = "character", default = getwd(), | |
6 help = "Output folder for resulting files [default = %default]" | |
7 ), | |
8 make_option(c("-x", "--xset_path"), type = "character", default = file.path(getwd(), "xset.rds"), | |
9 help = "The path to the xcmsSet object [default = %default]" | |
10 ), | |
11 make_option("--polarity", default = NA, | |
12 help = "polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]" | |
13 ), | |
14 make_option("--rsd_i_blank", default = 100, | |
15 help = "RSD threshold for the blank [default = %default]" | |
16 ), | |
17 make_option("--minfrac_blank", default = 0.5, | |
18 help = "minimum fraction of files for features needed for the blank [default = %default]" | |
19 ), | |
20 make_option("--rsd_rt_blank", default = 100, | |
21 help = "RSD threshold for the RT of the blank [default = %default]" | |
22 ), | |
23 | |
24 make_option("--ithres_blank", default = 0, | |
25 help = "Intensity threshold for the blank [default = %default]" | |
26 ), | |
27 make_option("--s2b", default = 10, | |
28 help = "fold change (sample/blank) needed for sample peak to be allowed. e.g. | |
29 if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10. | |
30 1000/10 = 100, so sample has fold change higher than the threshold and the peak | |
31 is not considered a blank [default = %default]" | |
32 ), | |
33 make_option("--blank_class", default = "blank", type = "character", | |
34 help = "A string representing the class that will be used for the blank.[default = %default]" | |
35 ), | |
36 make_option("--egauss_thr", default = NA, | |
37 help = "Threshold for filtering out non gaussian shaped peaks. Note this only works | |
38 if the 'verbose columns' and 'fit gauss' was used with xcms | |
39 [default = %default]" | |
40 ), | |
41 make_option("--rsd_i_sample", default = 100, | |
42 help = "RSD threshold for the samples [default = %default]" | |
43 ), | |
44 make_option("--minfrac_sample", default = 0.8, | |
45 help = "minimum fraction of files for features needed for the samples [default = %default]" | |
46 ), | |
47 make_option("--rsd_rt_sample", default = 100, | |
48 help = "RSD threshold for the RT of the samples [default = %default]" | |
49 ), | |
50 make_option("--ithres_sample", default = 5000, | |
51 help = "Intensity threshold for the sample [default = %default]" | |
52 ), | |
53 make_option("--grp_rm_ids", default = NA, | |
54 help = "vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output) | |
55 [default = %default]" | |
56 ), | |
57 make_option("--remove_spectra", action = "store_true", | |
58 help = "TRUE if flagged spectra is to be removed [default = %default]" | |
59 ), | |
60 make_option("--minfrac_xcms", default = 0.5, | |
61 help = "minfrac for xcms grouping [default = %default]" | |
62 ), | |
63 make_option("--mzwid", default = 0.001, | |
64 help = "mzwid for xcms grouping [default = %default]" | |
65 ), | |
66 make_option("--bw", default = 5, | |
67 help = "bw for xcms grouping [default = %default]" | |
68 ), | |
69 | |
70 make_option("--temp_save", action = "store_true", | |
71 help = "Assign True if files for each step saved (for testing purposes) [default = %default]" | |
72 ), | |
73 | |
74 make_option("--samplelist", type = "character", help = "Sample list to determine the blank class") | |
75 | |
76 ) | |
77 | |
78 # nolint start | |
79 # make_option("--multilist", action="store_true" | |
80 # help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist - sample list file has to be provided" | |
81 # ), | |
82 # nolint end | |
83 | |
84 # store options | |
85 opt <- parse_args(OptionParser(option_list = option_list)) | |
86 | |
87 opt <- replace(opt, opt == "NA", NA) | |
88 | |
89 if (is.null(opt$temp_save)) { | |
90 temp_save <- FALSE | |
91 }else{ | |
92 temp_save <- TRUE | |
93 } | |
94 | |
95 if (is.null(opt$remove_spectra)) { | |
96 remove_spectra <- FALSE | |
97 }else{ | |
98 remove_spectra <- TRUE | |
99 } | |
100 | |
101 | |
102 print(opt) | |
103 | |
104 getxcmsSetObject <- function(xobject) { | |
105 # XCMS 1.x | |
106 if (class(xobject) == "xcmsSet") | |
107 return(xobject) | |
108 # XCMS 3.x | |
109 if (class(xobject) == "XCMSnExp") { | |
110 # Get the legacy xcmsSet object | |
111 suppressWarnings(xset <- as(xobject, "xcmsSet")) | |
112 xcms::sampclass(xset) <- xset@phenoData$sample_group | |
113 return(xset) | |
114 } | |
115 } | |
116 | |
117 | |
118 loadRData <- function(rdata_path, name) { | |
119 #loads an RData file, and returns the named xset object if it is there | |
120 load(rdata_path) | |
121 return(get(ls()[ls() %in% name])) | |
122 } | |
123 | |
124 xset <- getxcmsSetObject(loadRData(opt$xset_path, c("xset", "xdata"))) | |
125 | |
126 print(xset) | |
127 if (is.null(opt$samplelist)) { | |
128 blank_class <- opt$blank_class | |
129 }else{ | |
130 samplelist <- read.table(opt$samplelist, sep = "\t", header = TRUE) | |
131 samplelist_blank <- unique(samplelist$sample_class[samplelist$blank == "yes"]) | |
132 | |
133 chosen_blank <- samplelist_blank[samplelist_blank %in% xset@phenoData$class] | |
134 if (length(chosen_blank) > 1) { | |
135 print("ERROR: only 1 blank is currently allowed to be used with this tool") | |
136 quit() | |
137 } | |
138 blank_class <- as.character(chosen_blank) | |
139 print(blank_class) | |
140 } | |
141 | |
142 | |
143 if (is.null(opt$multilist)) { | |
144 ffrm_out <- flag_remove(xset, | |
145 pol = opt$polarity, | |
146 rsd_i_blank = opt$rsd_i_blank, | |
147 minfrac_blank = opt$minfrac_blank, | |
148 rsd_rt_blank = opt$rsd_rt_blank, | |
149 ithres_blank = opt$ithres_blank, | |
150 s2b = opt$s2b, | |
151 ref.class = blank_class, | |
152 egauss_thr = opt$egauss_thr, | |
153 rsd_i_sample = opt$rsd_i_sample, | |
154 minfrac_sample = opt$minfrac_sample, | |
155 rsd_rt_sample = opt$rsd_rt_sample, | |
156 ithres_sample = opt$ithres_sample, | |
157 minfrac_xcms = opt$minfrac_xcms, | |
158 mzwid = opt$mzwid, | |
159 bw = opt$bw, | |
160 out_dir = opt$out_dir, | |
161 temp_save = temp_save, | |
162 remove_spectra = remove_spectra, | |
163 grp_rm_ids = unlist(strsplit(as.character(opt$grp_rm_ids), split = ", "))[[1]]) | |
164 print("flag remove finished") | |
165 xset <- ffrm_out[[1]] | |
166 grp_peaklist <- ffrm_out[[2]] | |
167 removed_peaks <- ffrm_out[[3]] | |
168 | |
169 save.image(file = file.path(opt$out_dir, "xset_filtered.RData"), version = 2) | |
170 | |
171 # grpid needed for mspurity ID needed for deconrank... (will clean up at some up) | |
172 peak_pth <- file.path(opt$out_dir, "peaklist_filtered.tsv") | |
173 print(peak_pth) | |
174 write.table(data.frame("grpid" = rownames(grp_peaklist), "ID" = rownames(grp_peaklist), grp_peaklist), | |
175 peak_pth, row.names = FALSE, sep = "\t") | |
176 | |
177 removed_peaks <- data.frame(removed_peaks) | |
178 write.table(data.frame("ID" = rownames(removed_peaks), removed_peaks), | |
179 file.path(opt$out_dir, "removed_peaks.tsv"), row.names = FALSE, sep = "\t") | |
180 | |
181 }else{ | |
182 | |
183 # nolint start | |
184 # TODO | |
185 #xsets <- split(xset, multilist_df$multlist) | |
186 # | |
187 #mult_grps <- unique(multilist_df$multlist) | |
188 # | |
189 #for (mgrp in mult_grps){ | |
190 # xset_i <- xsets[mgrp] | |
191 # xcms::group(xset_i, | |
192 # | |
193 # } | |
194 # nolint end | |
195 | |
196 | |
197 } |