comparison flagRemove.R @ 0:7dc0cde206d8 draft default tip

"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
author computational-metabolomics
date Thu, 04 Mar 2021 12:14:02 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:7dc0cde206d8
1 library(msPurity)
2 library(optparse)
3 print(sessionInfo())
4 option_list <- list(
5 make_option(c("-o", "--out_dir"), type = "character", default = getwd(),
6 help = "Output folder for resulting files [default = %default]"
7 ),
8 make_option(c("-x", "--xset_path"), type = "character", default = file.path(getwd(), "xset.rds"),
9 help = "The path to the xcmsSet object [default = %default]"
10 ),
11 make_option("--polarity", default = NA,
12 help = "polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]"
13 ),
14 make_option("--rsd_i_blank", default = 100,
15 help = "RSD threshold for the blank [default = %default]"
16 ),
17 make_option("--minfrac_blank", default = 0.5,
18 help = "minimum fraction of files for features needed for the blank [default = %default]"
19 ),
20 make_option("--rsd_rt_blank", default = 100,
21 help = "RSD threshold for the RT of the blank [default = %default]"
22 ),
23
24 make_option("--ithres_blank", default = 0,
25 help = "Intensity threshold for the blank [default = %default]"
26 ),
27 make_option("--s2b", default = 10,
28 help = "fold change (sample/blank) needed for sample peak to be allowed. e.g.
29 if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10.
30 1000/10 = 100, so sample has fold change higher than the threshold and the peak
31 is not considered a blank [default = %default]"
32 ),
33 make_option("--blank_class", default = "blank", type = "character",
34 help = "A string representing the class that will be used for the blank.[default = %default]"
35 ),
36 make_option("--egauss_thr", default = NA,
37 help = "Threshold for filtering out non gaussian shaped peaks. Note this only works
38 if the 'verbose columns' and 'fit gauss' was used with xcms
39 [default = %default]"
40 ),
41 make_option("--rsd_i_sample", default = 100,
42 help = "RSD threshold for the samples [default = %default]"
43 ),
44 make_option("--minfrac_sample", default = 0.8,
45 help = "minimum fraction of files for features needed for the samples [default = %default]"
46 ),
47 make_option("--rsd_rt_sample", default = 100,
48 help = "RSD threshold for the RT of the samples [default = %default]"
49 ),
50 make_option("--ithres_sample", default = 5000,
51 help = "Intensity threshold for the sample [default = %default]"
52 ),
53 make_option("--grp_rm_ids", default = NA,
54 help = "vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output)
55 [default = %default]"
56 ),
57 make_option("--remove_spectra", action = "store_true",
58 help = "TRUE if flagged spectra is to be removed [default = %default]"
59 ),
60 make_option("--minfrac_xcms", default = 0.5,
61 help = "minfrac for xcms grouping [default = %default]"
62 ),
63 make_option("--mzwid", default = 0.001,
64 help = "mzwid for xcms grouping [default = %default]"
65 ),
66 make_option("--bw", default = 5,
67 help = "bw for xcms grouping [default = %default]"
68 ),
69
70 make_option("--temp_save", action = "store_true",
71 help = "Assign True if files for each step saved (for testing purposes) [default = %default]"
72 ),
73
74 make_option("--samplelist", type = "character", help = "Sample list to determine the blank class")
75
76 )
77
78 # nolint start
79 # make_option("--multilist", action="store_true"
80 # help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist - sample list file has to be provided"
81 # ),
82 # nolint end
83
84 # store options
85 opt <- parse_args(OptionParser(option_list = option_list))
86
87 opt <- replace(opt, opt == "NA", NA)
88
89 if (is.null(opt$temp_save)) {
90 temp_save <- FALSE
91 }else{
92 temp_save <- TRUE
93 }
94
95 if (is.null(opt$remove_spectra)) {
96 remove_spectra <- FALSE
97 }else{
98 remove_spectra <- TRUE
99 }
100
101
102 print(opt)
103
104 getxcmsSetObject <- function(xobject) {
105 # XCMS 1.x
106 if (class(xobject) == "xcmsSet")
107 return(xobject)
108 # XCMS 3.x
109 if (class(xobject) == "XCMSnExp") {
110 # Get the legacy xcmsSet object
111 suppressWarnings(xset <- as(xobject, "xcmsSet"))
112 xcms::sampclass(xset) <- xset@phenoData$sample_group
113 return(xset)
114 }
115 }
116
117
118 loadRData <- function(rdata_path, name) {
119 #loads an RData file, and returns the named xset object if it is there
120 load(rdata_path)
121 return(get(ls()[ls() %in% name]))
122 }
123
124 xset <- getxcmsSetObject(loadRData(opt$xset_path, c("xset", "xdata")))
125
126 print(xset)
127 if (is.null(opt$samplelist)) {
128 blank_class <- opt$blank_class
129 }else{
130 samplelist <- read.table(opt$samplelist, sep = "\t", header = TRUE)
131 samplelist_blank <- unique(samplelist$sample_class[samplelist$blank == "yes"])
132
133 chosen_blank <- samplelist_blank[samplelist_blank %in% xset@phenoData$class]
134 if (length(chosen_blank) > 1) {
135 print("ERROR: only 1 blank is currently allowed to be used with this tool")
136 quit()
137 }
138 blank_class <- as.character(chosen_blank)
139 print(blank_class)
140 }
141
142
143 if (is.null(opt$multilist)) {
144 ffrm_out <- flag_remove(xset,
145 pol = opt$polarity,
146 rsd_i_blank = opt$rsd_i_blank,
147 minfrac_blank = opt$minfrac_blank,
148 rsd_rt_blank = opt$rsd_rt_blank,
149 ithres_blank = opt$ithres_blank,
150 s2b = opt$s2b,
151 ref.class = blank_class,
152 egauss_thr = opt$egauss_thr,
153 rsd_i_sample = opt$rsd_i_sample,
154 minfrac_sample = opt$minfrac_sample,
155 rsd_rt_sample = opt$rsd_rt_sample,
156 ithres_sample = opt$ithres_sample,
157 minfrac_xcms = opt$minfrac_xcms,
158 mzwid = opt$mzwid,
159 bw = opt$bw,
160 out_dir = opt$out_dir,
161 temp_save = temp_save,
162 remove_spectra = remove_spectra,
163 grp_rm_ids = unlist(strsplit(as.character(opt$grp_rm_ids), split = ", "))[[1]])
164 print("flag remove finished")
165 xset <- ffrm_out[[1]]
166 grp_peaklist <- ffrm_out[[2]]
167 removed_peaks <- ffrm_out[[3]]
168
169 save.image(file = file.path(opt$out_dir, "xset_filtered.RData"), version = 2)
170
171 # grpid needed for mspurity ID needed for deconrank... (will clean up at some up)
172 peak_pth <- file.path(opt$out_dir, "peaklist_filtered.tsv")
173 print(peak_pth)
174 write.table(data.frame("grpid" = rownames(grp_peaklist), "ID" = rownames(grp_peaklist), grp_peaklist),
175 peak_pth, row.names = FALSE, sep = "\t")
176
177 removed_peaks <- data.frame(removed_peaks)
178 write.table(data.frame("ID" = rownames(removed_peaks), removed_peaks),
179 file.path(opt$out_dir, "removed_peaks.tsv"), row.names = FALSE, sep = "\t")
180
181 }else{
182
183 # nolint start
184 # TODO
185 #xsets <- split(xset, multilist_df$multlist)
186 #
187 #mult_grps <- unique(multilist_df$multlist)
188 #
189 #for (mgrp in mult_grps){
190 # xset_i <- xsets[mgrp]
191 # xcms::group(xset_i,
192 #
193 # }
194 # nolint end
195
196
197 }