comparison lib.r @ 0:ae9ef9219aae draft default tip

"planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 2f3f29dbaaa8568b40818d3476159c384f1a21d6-dirty"
author eschen42
date Fri, 12 Feb 2021 18:05:29 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:ae9ef9219aae
1 #@authors ABiMS TEAM, Y. Guitton
2 # lib.r for Galaxy Workflow4Metabolomics xcms tools
3
4 #@author G. Le Corguille
5 # solve an issue with batch if arguments are logical TRUE/FALSE
6 parseCommandArgs <- function(...) {
7 args <- batch::parseCommandArgs(...)
8 for (key in names(args)) {
9 if (args[key] %in% c("TRUE","FALSE"))
10 args[key] = as.logical(args[key])
11 }
12 return(args)
13 }
14
15 #@author G. Le Corguille
16 # This function will
17 # - load the packages
18 # - display the sessionInfo
19 loadAndDisplayPackages <- function(pkgs) {
20 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE)))
21
22 sessioninfo = sessionInfo()
23 cat(sessioninfo$R.version$version.string,"\n")
24 cat("Main packages:\n")
25 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
26 cat("Other loaded packages:\n")
27 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
28 }
29
30 #@author G. Le Corguille
31 # This function merge several chromBPI or chromTIC into one.
32 mergeChrom <- function(chrom_merged, chrom) {
33 if (is.null(chrom_merged)) return(NULL)
34 chrom_merged@.Data <- cbind(chrom_merged@.Data, chrom@.Data)
35 return(chrom_merged)
36 }
37
38 #@author G. Le Corguille
39 # This function merge several xdata into one.
40 mergeXData <- function(args) {
41 chromTIC <- NULL
42 chromBPI <- NULL
43 chromTIC_adjusted <- NULL
44 chromBPI_adjusted <- NULL
45 md5sumList <- NULL
46 for(image in args$images) {
47
48 load(image)
49 # Handle infiles
50 if (!exists("singlefile")) singlefile <- NULL
51 if (!exists("zipfile")) zipfile <- NULL
52 rawFilePath <- retrieveRawfileInTheWorkingDirectory(singlefile, zipfile, args)
53 zipfile <- rawFilePath$zipfile
54 singlefile <- rawFilePath$singlefile
55
56 if (exists("raw_data")) xdata <- raw_data
57 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*")
58
59 cat(sampleNamesList$sampleNamesOrigin,"\n")
60
61 if (!exists("xdata_merged")) {
62 xdata_merged <- xdata
63 singlefile_merged <- singlefile
64 md5sumList_merged <- md5sumList
65 sampleNamesList_merged <- sampleNamesList
66 chromTIC_merged <- chromTIC
67 chromBPI_merged <- chromBPI
68 chromTIC_adjusted_merged <- chromTIC_adjusted
69 chromBPI_adjusted_merged <- chromBPI_adjusted
70 } else {
71 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged,xdata)
72 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- xcms:::.concatenate_OnDiskMSnExp(xdata_merged,xdata)
73 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata")
74
75 singlefile_merged <- c(singlefile_merged,singlefile)
76 md5sumList_merged$origin <- rbind(md5sumList_merged$origin,md5sumList$origin)
77 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin,sampleNamesList$sampleNamesOrigin)
78 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames,sampleNamesList$sampleNamesMakeNames)
79 chromTIC_merged <- mergeChrom(chromTIC_merged, chromTIC)
80 chromBPI_merged <- mergeChrom(chromBPI_merged, chromBPI)
81 chromTIC_adjusted_merged <- mergeChrom(chromTIC_adjusted_merged, chromTIC_adjusted)
82 chromBPI_adjusted_merged <- mergeChrom(chromBPI_adjusted_merged, chromBPI_adjusted)
83 }
84 }
85 rm(image)
86 xdata <- xdata_merged; rm(xdata_merged)
87 singlefile <- singlefile_merged; rm(singlefile_merged)
88 md5sumList <- md5sumList_merged; rm(md5sumList_merged)
89 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged)
90
91 if (!is.null(args$sampleMetadata)) {
92 cat("\tXSET PHENODATA SETTING...\n")
93 sampleMetadataFile <- args$sampleMetadata
94 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header=F)
95 xdata@phenoData@data$sample_group=sampleMetadata$V2[match(xdata@phenoData@data$sample_name,sampleMetadata$V1)]
96
97 if (any(is.na(pData(xdata)$sample_group))) {
98 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)]
99 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse=" "))
100 print(error_message)
101 stop(error_message)
102 }
103 }
104
105 if (!is.null(chromTIC_merged)) { chromTIC <- chromTIC_merged; chromTIC@phenoData <- xdata@phenoData }
106 if (!is.null(chromBPI_merged)) { chromBPI <- chromBPI_merged; chromBPI@phenoData <- xdata@phenoData }
107 if (!is.null(chromTIC_adjusted_merged)) { chromTIC_adjusted <- chromTIC_adjusted_merged; chromTIC_adjusted@phenoData <- xdata@phenoData }
108 if (!is.null(chromBPI_adjusted_merged)) { chromBPI_adjusted <- chromBPI_adjusted_merged; chromBPI_adjusted@phenoData <- xdata@phenoData }
109
110 return(list("xdata"=xdata, "singlefile"=singlefile, "md5sumList"=md5sumList,"sampleNamesList"=sampleNamesList, "chromTIC"=chromTIC, "chromBPI"=chromBPI, "chromTIC_adjusted"=chromTIC_adjusted, "chromBPI_adjusted"=chromBPI_adjusted))
111 }
112
113 #@author G. Le Corguille
114 # This function convert if it is required the Retention Time in minutes
115 RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
116 if (convertRTMinute){
117 #converting the retention times (seconds) into minutes
118 print("converting the retention times into minutes in the variableMetadata")
119 variableMetadata[,"rt"] <- variableMetadata[,"rt"]/60
120 variableMetadata[,"rtmin"] <- variableMetadata[,"rtmin"]/60
121 variableMetadata[,"rtmax"] <- variableMetadata[,"rtmax"]/60
122 }
123 return (variableMetadata)
124 }
125
126 #@author G. Le Corguille
127 # This function format ions identifiers
128 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) {
129 splitDeco <- strsplit(as.character(variableMetadata$name),"_")
130 idsDeco <- sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) })
131 namecustom <- make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco))
132 variableMetadata <- cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))])
133 return(variableMetadata)
134 }
135
136 #@author G. Le Corguille
137 # This function convert the remain NA to 0 in the dataMatrix
138 naTOzeroDataMatrix <- function(dataMatrix, naTOzero) {
139 if (naTOzero){
140 dataMatrix[is.na(dataMatrix)] <- 0
141 }
142 return (dataMatrix)
143 }
144
145 #@author G. Le Corguille
146 # Draw the plotChromPeakDensity 3 per page in a pdf file
147 getPlotChromPeakDensity <- function(xdata, param = NULL, mzdigit=4) {
148 pdf(file="plotChromPeakDensity.pdf", width=16, height=12)
149
150 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5))
151
152 if(length(unique(xdata$sample_group))<10){
153 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
154 }else{
155 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette="Dark 3")
156 }
157 names(group_colors) <- unique(xdata$sample_group)
158 col_per_samp <- as.character(xdata$sample_group)
159 for(i in 1:length(group_colors)){col_per_samp[col_per_samp==(names(group_colors)[i])]<-group_colors[i]}
160
161 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax))
162 for (i in 1:nrow(featureDefinitions(xdata))) {
163 mzmin = featureDefinitions(xdata)[i,]$mzmin
164 mzmax = featureDefinitions(xdata)[i,]$mzmax
165 plotChromPeakDensity(xdata, param = param, mz=c(mzmin,mzmax), col=col_per_samp, pch=16, xlim=xlim, main=paste(round(mzmin,mzdigit),round(mzmax,mzdigit)))
166 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
167 }
168
169 dev.off()
170 }
171
172 #@author G. Le Corguille
173 # Draw the plotChromPeakDensity 3 per page in a pdf file
174 getPlotAdjustedRtime <- function(xdata) {
175
176 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12)
177
178 # Color by group
179 if(length(unique(xdata$sample_group))<10){
180 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
181 }else{
182 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette="Dark 3")
183 }
184 if (length(group_colors) > 1) {
185 names(group_colors) <- unique(xdata$sample_group)
186 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group])
187 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
188 }
189
190 # Color by sample
191 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name)))
192 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1)
193
194 dev.off()
195 }
196
197 #@author G. Le Corguille
198 # value: intensity values to be used into, maxo or intb
199 getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, naTOzero=T, variableMetadataOutput, dataMatrixOutput, sampleNamesList) {
200 dataMatrix <- featureValues(xdata, method="medret", value=intval)
201 colnames(dataMatrix) <- make.names(tools::file_path_sans_ext(colnames(dataMatrix)))
202 dataMatrix = cbind(name=groupnames(xdata), dataMatrix)
203 variableMetadata <- featureDefinitions(xdata)
204 colnames(variableMetadata)[1] = "mz"; colnames(variableMetadata)[4] = "rt"
205 variableMetadata = data.frame(name=groupnames(xdata), variableMetadata)
206
207 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute)
208 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)
209 dataMatrix <- naTOzeroDataMatrix(dataMatrix, naTOzero)
210
211 # FIX: issue when the vector at peakidx is too long and is written in a new line during the export
212 variableMetadata[,"peakidx"] <- vapply(variableMetadata[,"peakidx"], FUN = paste, FUN.VALUE = character(1), collapse = ",")
213
214 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F)
215 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F)
216
217 }
218
219 #@author G. Le Corguille
220 # It allow different of field separators
221 getDataFrameFromFile <- function(filename, header=T) {
222 myDataFrame <- read.table(filename, header=header, sep=";", stringsAsFactors=F)
223 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep="\t", stringsAsFactors=F)
224 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep=",", stringsAsFactors=F)
225 if (ncol(myDataFrame) < 2) {
226 error_message="Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation"
227 print(error_message)
228 stop(error_message)
229 }
230 return(myDataFrame)
231 }
232
233 #@author G. Le Corguille
234 # Draw the BPI and TIC graphics
235 # colored by sample names or class names
236 getPlotChromatogram <- function(chrom, xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") {
237
238 if (aggregationFun == "sum")
239 type="Total Ion Chromatograms"
240 else
241 type="Base Peak Intensity Chromatograms"
242
243 adjusted="Raw"
244 if (hasAdjustedRtime(xdata))
245 adjusted="Adjusted"
246
247 main <- paste(type,":",adjusted,"data")
248
249 pdf(pdfname, width=16, height=10)
250
251 # Color by group
252 if(length(unique(xdata$sample_group))<10){
253 group_colors <- brewer.pal(length(unique(xdata$sample_group)), "Set1")
254 }else{
255 group_colors <- hcl.colors(length(unique(xdata$sample_group)), palette="Dark 3")
256 }
257 if (length(group_colors) > 1) {
258 names(group_colors) <- unique(xdata$sample_group)
259 plot(chrom, col = group_colors[chrom$sample_group], main=main, peakType = "none")
260 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
261 }
262
263 # Color by sample
264 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main, peakType = "none")
265 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1)
266
267 dev.off()
268 }
269
270
271 # Get the polarities from all the samples of a condition
272 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
273 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
274 getSampleMetadata <- function(xdata=NULL, sampleMetadataOutput="sampleMetadata.tsv") {
275 cat("Creating the sampleMetadata file...\n")
276
277 #Create the sampleMetada dataframe
278 sampleMetadata <- xdata@phenoData@data
279 rownames(sampleMetadata) <- NULL
280 colnames(sampleMetadata) <- c("sample_name", "class")
281
282 sampleNamesOrigin <- sampleMetadata$sample_name
283 sampleNamesMakeNames <- make.names(sampleNamesOrigin)
284
285 if (any(duplicated(sampleNamesMakeNames))) {
286 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())
287 for (sampleName in sampleNamesOrigin) {
288 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr())
289 }
290 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
291 }
292
293 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) {
294 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n")
295 for (sampleName in sampleNamesOrigin) {
296 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n"))
297 }
298 }
299
300 sampleMetadata$sample_name <- sampleNamesMakeNames
301
302
303 #For each sample file, the following actions are done
304 for (fileIdx in 1:length(fileNames(xdata))) {
305 #Check if the file is in the CDF format
306 if (!mzR:::netCDFIsFile(fileNames(xdata))) {
307
308 # If the column isn't exist, with add one filled with NA
309 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA
310
311 #Extract the polarity (a list of polarities)
312 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx,"polarity"]
313 #Verify if all the scans have the same polarity
314 uniq_list <- unique(polarity)
315 if (length(uniq_list)>1){
316 polarity <- "mixed"
317 } else {
318 polarity <- as.character(uniq_list)
319 }
320
321 #Set the polarity attribute
322 sampleMetadata$polarity[fileIdx] <- polarity
323 }
324
325 }
326
327 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput)
328
329 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames))
330
331 }
332
333
334 # This function will compute MD5 checksum to check the data integrity
335 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
336 getMd5sum <- function (files) {
337 cat("Compute md5 checksum...\n")
338 library(tools)
339 return(as.matrix(md5sum(files)))
340 }
341
342 # This function retrieve the raw file in the working directory
343 # - if zipfile: unzip the file with its directory tree
344 # - if singlefiles: set symlink with the good filename
345 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
346 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile, args, prefix="") {
347
348 if (!(prefix %in% c("","Positive","Negative","MS1","MS2"))) stop("prefix must be either '', 'Positive', 'Negative', 'MS1' or 'MS2'")
349
350 # single - if the file are passed in the command arguments -> refresh singlefile
351 if (!is.null(args[[paste0("singlefile_galaxyPath",prefix)]])) {
352 singlefile_galaxyPaths <- unlist(strsplit(args[[paste0("singlefile_galaxyPath",prefix)]],"\\|"))
353 singlefile_sampleNames <- unlist(strsplit(args[[paste0("singlefile_sampleName",prefix)]],"\\|"))
354
355 singlefile <- NULL
356 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
357 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
358 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
359 # In case, an url is used to import data within Galaxy
360 singlefile_sampleName <- tail(unlist(strsplit(singlefile_sampleName,"/")), n=1)
361 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
362 }
363 }
364 # zipfile - if the file are passed in the command arguments -> refresh zipfile
365 if (!is.null(args[[paste0("zipfile",prefix)]]))
366 zipfile <- args[[paste0("zipfile",prefix)]]
367
368 # single
369 if(!is.null(singlefile) && (length("singlefile")>0)) {
370 files <- vector()
371 for (singlefile_sampleName in names(singlefile)) {
372 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
373 if(!file.exists(singlefile_galaxyPath)){
374 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!")
375 print(error_message); stop(error_message)
376 }
377
378 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T)))
379 file.copy(singlefile_galaxyPath, singlefile_sampleName)
380 files <- c(files, singlefile_sampleName)
381 }
382 }
383 # zipfile
384 if(!is.null(zipfile) && (zipfile != "")) {
385 if(!file.exists(zipfile)){
386 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")
387 print(error_message)
388 stop(error_message)
389 }
390 suppressWarnings(unzip(zipfile, unzip="unzip"))
391
392 #get the directory name
393 suppressWarnings(filesInZip <- unzip(zipfile, list=T))
394 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])))
395 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
396 directory <- "."
397 if (length(directories) == 1) directory <- directories
398
399 cat("files_root_directory\t",directory,"\n")
400
401 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
402 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
403 info <- file.info(directory)
404 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE)
405 files <- c(directory[!info$isdir], listed)
406 exists <- file.exists(files)
407 files <- files[exists]
408
409 }
410 return(list(zipfile=zipfile, singlefile=singlefile, files=files))
411
412 }
413
414
415 # This function retrieve a xset like object
416 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
417 getxcmsSetObject <- function(xobject) {
418 # XCMS 1.x
419 if (class(xobject) == "xcmsSet")
420 return (xobject)
421 # XCMS 3.x
422 if (class(xobject) == "XCMSnExp") {
423 # Get the legacy xcmsSet object
424 suppressWarnings(xset <- as(xobject, 'xcmsSet'))
425 if (!is.null(xset@phenoData$sample_group))
426 sampclass(xset) <- xset@phenoData$sample_group
427 else
428 sampclass(xset) <- "."
429 return (xset)
430 }
431 }