comparison multivariate_wrapper.R @ 0:b2b02fb81a0a draft default tip

planemo upload for repository https://github.com/eschen42/multivariate/tree/dump_rdata forked from https://github.com/workflow4metabolomics/multivariate.git commit 2ace6612c83223925e25d38bce9530f90f20a602-dirty
author eschen42
date Mon, 14 Aug 2017 20:57:59 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:b2b02fb81a0a
1 #!/usr/bin/env Rscript
2
3 library(batch) ## parseCommandArgs
4
5 # Constants
6 argv <- commandArgs(trailingOnly = FALSE)
7 script.path <- sub("--file=","",argv[grep("--file=",argv)])
8 prog.name <- basename(script.path)
9
10 # Print help
11 if (length(grep('-h', argv)) >0) {
12 cat("Usage:", prog.name,
13 "dataMatrix_in myDataMatrix.tsv",
14 "sampleMetadata_in mySampleData.tsv",
15 "variableMetadata_in myVariableMetadata.tsv",
16 "respC ...",
17 "predI ...",
18 "orthoI ...",
19 "testL ...",
20 "typeC ...",
21 "parAsColC ...",
22 "parCexN ...",
23 "parPc1I ...",
24 "parPc2I ...",
25 "parMahalC ...",
26 "parLabVc ...",
27 "algoC ...",
28 "crossvalI ...",
29 "log10L ...",
30 "permI ...",
31 "scaleC ...",
32 "sampleMetadata_out mySampleMetadata_out.tsv",
33 "variableMetadata_out myVariableMetadata_out.tsv",
34 "figure figure.pdf",
35 "information information.txt",
36 "\n")
37 quit(status = 0)
38 }
39
40 ########
41 # MAIN #
42 ########
43
44 argVc <- unlist(parseCommandArgs(evaluate=FALSE))
45
46 ##------------------------------
47 ## Initializing
48 ##------------------------------
49
50 ## options
51 ##--------
52
53 strAsFacL <- options()$stringsAsFactors
54 options(stringsAsFactors = FALSE)
55
56 ## libraries
57 ##----------
58
59 suppressMessages(library(ropls))
60
61 if(packageVersion("ropls") < "1.4.0")
62 stop("Please use 'ropls' versions of 1.4.0 and above")
63
64 ## constants
65 ##----------
66
67 modNamC <- "Multivariate" ## module name
68
69 topEnvC <- environment()
70 flgC <- "\n"
71
72 ## functions
73 ##----------
74
75 flgF <- function(tesC,
76 envC = topEnvC,
77 txtC = NA) { ## management of warning and error messages
78
79 tesL <- eval(parse(text = tesC), envir = envC)
80
81 if(!tesL) {
82
83 sink()
84 stpTxtC <- ifelse(is.na(txtC),
85 paste0(tesC, " is FALSE"),
86 txtC)
87
88 stop(stpTxtC,
89 call. = FALSE)
90
91 }
92
93 } ## flgF
94
95
96 ## log file
97 ##---------
98
99 sink(argVc["information"])
100
101 cat("\nStart of the '", modNamC, "' Galaxy module call: ",
102 format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="")
103
104
105 ## arguments
106 ##----------
107
108 xMN <- t(as.matrix(read.table(argVc["dataMatrix_in"],
109 check.names = FALSE,
110 header = TRUE,
111 row.names = 1,
112 sep = "\t",
113 comment.char = "")))
114
115 samDF <- read.table(argVc["sampleMetadata_in"],
116 check.names = FALSE,
117 header = TRUE,
118 row.names = 1,
119 sep = "\t",
120 comment.char = "")
121 flgF("identical(rownames(xMN), rownames(samDF))", txtC = "Sample names (or number) in the data matrix (first row) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section")
122
123 varDF <- read.table(argVc["variableMetadata_in"],
124 check.names = FALSE,
125 header = TRUE,
126 row.names = 1,
127 sep = "\t",
128 comment.char = "")
129 flgF("identical(colnames(xMN), rownames(varDF))", txtC = "Variable names (or number) in the data matrix (first column) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section")
130
131 flgF("argVc['respC'] == 'none' || (argVc['respC'] %in% colnames(samDF))",
132 txtC = paste0("Y Response argument (", argVc['respC'], ") must be either none or one of the column names (first row) of your sample metadata"))
133 if(argVc["respC"] != "none") {
134 yMCN <- matrix(samDF[, argVc['respC']], ncol = 1, dimnames = list(rownames(xMN), argVc['respC']))
135 } else
136 yMCN <- NULL
137
138 if(argVc["testL"] == "TRUE") {
139 flgF("!is.null(yMCN)",
140 txtC = "Predictions cannot be peformed with PCA models")
141 flgF("'test.' %in% colnames(samDF)",
142 txtC = "No 'test.' column found in the sample metadata")
143 flgF("identical(sort(unique(samDF[, 'test.'])), c('no', 'yes'))",
144 txtC = "'test.' column of sample metadata must contain both 'yes' (tested samples) and 'no' (samples to be used for model training) values, and nothing else")
145 flgF("identical(sort(unique(samDF[, 'test.'])), c('no', 'yes'))",
146 txtC = "'test.' column of sample metadata must contain both 'yes' (tested samples) and 'no' (samples to be used for model training) values, and nothing else")
147 flgF("!any(is.na(yMCN[samDF[, 'test.'] == 'no', ]))",
148 txtC = "samples for model training (i.e. 'no' value in the 'test.' column) should not contain NA in the response")
149 tesVl <- samDF[, "test."] == "yes"
150 xTesMN <- xMN[tesVl, , drop = FALSE]
151 xMN <- xMN[!tesVl, , drop = FALSE]
152 yMCN <- yMCN[!tesVl, , drop = FALSE]
153 } else
154 tesVl <- NULL
155
156 if(!('parAsColC' %in% names(argVc)))
157 argVc["parAsColC"] <- "none"
158 flgF("argVc['parAsColC'] == 'none' || argVc['parAsColC'] %in% colnames(samDF)", txtC = paste0("Sample color argument (", argVc['parAsColC'], ") must be either none or one of the column names (first row) of your sample metadata"))
159 if(argVc["parAsColC"] != "none") {
160 parAsColFcVn <- samDF[, argVc['parAsColC']]
161 if(is.character(parAsColFcVn))
162 parAsColFcVn <- factor(parAsColFcVn)
163 } else
164 parAsColFcVn <- NA
165
166 if(!('parMahalC' %in% names(argVc)) || argVc["parMahalC"] == "NA") {
167 if(!is.null(yMCN) && ncol(yMCN) == 1 && mode(yMCN) == "character")
168 argVc["parMahalC"] <- argVc["respC"]
169 else
170 argVc["parMahalC"] <- "none"
171 }
172 flgF("argVc['parMahalC'] == 'none' || (argVc['parMahalC'] %in% colnames(samDF))",
173 txtC = paste0("Mahalanobis argument (", argVc['parMahalC'], ") must be either 'NA', 'none' or one of the column names (first row) of your sample metadata"))
174 if(argVc["parMahalC"] == "none") {
175 parEllipsesL <- FALSE
176 } else {
177 if(is.null(yMCN)) { ## PCA case
178 flgF("mode(samDF[, argVc['parMahalC']]) == 'character'",
179 txtC = paste0("Mahalanobis argument (", argVc['parMahalC'], ") must correspond to a column of characters in your sampleMetadata"))
180 parAsColFcVn <- factor(samDF[, argVc["parMahalC"]])
181 parEllipsesL <- TRUE
182 } else { ## (O)PLS-DA case
183 flgF("identical(as.character(argVc['respC']), as.character(argVc['parMahalC']))",
184 txtC = paste0("The Mahalanobis argument (", argVc['parMahalC'], ") must be identical to the Y response argument (", argVc['respC'], ")"))
185 parEllipsesL <- TRUE
186 }
187 }
188
189 if(!('parLabVc' %in% names(argVc)))
190 argVc["parLabVc"] <- "none"
191 flgF("argVc['parLabVc'] == 'none' || (argVc['parLabVc'] %in% colnames(samDF))",
192 txtC = paste0("Sample labels argument (", argVc['parLabVc'], ") must be either none or one of the column names (first row) of your sample metadata"))
193 if('parLabVc' %in% names(argVc))
194 if(argVc["parLabVc"] != "none") {
195 flgF("mode(samDF[, argVc['parLabVc']]) == 'character'",
196 txtC = paste0("The sample label argument (", argVc['parLabVc'], ") must correspond to a sample metadata column of characters (not numerics)"))
197 parLabVc <- samDF[, argVc['parLabVc']]
198 } else
199 parLabVc <- NA
200
201 if('parPc1I' %in% names(argVc)) {
202 parCompVi <- as.numeric(c(argVc["parPc1I"], argVc["parPc2I"]))
203 } else
204 parCompVi <- c(1, 2)
205
206
207 ## checking
208 ##---------
209
210
211 flgF("argVc['predI'] == 'NA' || argVc['orthoI'] == 'NA' || as.numeric(argVc['orthoI']) > 0 || parCompVi[2] <= as.numeric(argVc['predI'])",
212 txtC = paste0("The highest component to display (", parCompVi[2], ") must not exceed the number of predictive components of the model (", argVc['predI'], ")"))
213
214
215 if(argVc["orthoI"] == "NA" || argVc["orthoI"] != "0")
216 if(argVc["predI"] == "NA" || argVc["predI"] != "0") {
217 argVc["predI"] <- "1"
218 cat("\nWarning: OPLS: number of predictive components ('predI' argument) set to 1\n", sep = "")
219 }
220
221 if(argVc["predI"] != "NA")
222 if(as.numeric(argVc["predI"]) > min(nrow(xMN), ncol(xMN))) {
223 argVc["predI"] <- as.character(min(nrow(xMN), ncol(xMN)))
224 cat("\nWarning: 'predI' set to the minimum of the dataMatrix dimensions: ", as.numeric(argVc["predI"]), "\n", sep = "")
225 }
226
227 if("algoC" %in% names(argVc) && argVc["algoC"] == "svd" && length(which(is.na(c(xMN)))) > 0) {
228 minN <- min(c(xMN[!is.na(xMN)])) / 2
229 cat("\nWarning: Missing values set to ", round(minN, 1), " (half minimum value) for 'svd' algorithm to be used\n", sep = "")
230 }
231
232
233 ##------------------------------
234 ## Computation and plot
235 ##------------------------------
236
237
238 sink()
239
240 optWrnN <- options()$warn
241 options(warn = -1)
242
243 ropLs <- opls(x = xMN,
244 y = yMCN,
245 predI = ifelse(argVc["predI"] == "NA", NA, as.numeric(argVc["predI"])),
246 orthoI = ifelse(argVc["orthoI"] == "NA", NA, as.numeric(argVc["orthoI"])),
247 algoC = ifelse('algoC' %in% names(argVc), argVc["algoC"], "default"),
248 crossvalI = ifelse('crossvalI' %in% names(argVc), as.numeric(argVc["crossvalI"]), 7),
249 log10L = ifelse('log10L' %in% names(argVc), as.logical(argVc["log10L"]), FALSE),
250 permI = ifelse('permI' %in% names(argVc), as.numeric(argVc["permI"]), 20),
251 scaleC = ifelse('scaleC' %in% names(argVc), argVc["scaleC"], "standard"),
252 subset = NULL,
253 printL = FALSE,
254 plotL = FALSE,
255 .sinkC = argVc['information'])
256
257 modC <- ropLs@typeC
258 sumDF <- getSummaryDF(ropLs)
259 desMC <- ropLs@descriptionMC
260 scoreMN <- getScoreMN(ropLs)
261 loadingMN <- getLoadingMN(ropLs)
262
263 vipVn <- coeMN <- orthoScoreMN <- orthoLoadingMN <- orthoVipVn <- NULL
264
265 if(grepl("PLS", modC)) {
266
267 vipVn <- getVipVn(ropLs)
268 coeMN <- coef(ropLs)
269
270 if(grepl("OPLS", modC)) {
271 orthoScoreMN <- getScoreMN(ropLs, orthoL = TRUE)
272 orthoLoadingMN <- getLoadingMN(ropLs, orthoL = TRUE)
273 orthoVipVn <- getVipVn(ropLs, orthoL = TRUE)
274 }
275
276 }
277
278 ploC <- ifelse('typeC' %in% names(argVc), argVc["typeC"], "summary")
279
280 if(sumDF[, "pre"] + sumDF[, "ort"] < 2) {
281 if(!(ploC %in% c("permutation", "overview"))) {
282 ploC <- "summary"
283 plotWarnL <- TRUE
284 }
285 } else
286 plotWarnL <- FALSE
287
288 plot(ropLs,
289 typeVc = ploC,
290 parAsColFcVn = parAsColFcVn,
291 parCexN = ifelse('parCexN' %in% names(argVc), as.numeric(argVc["parCexN"]), 0.8),
292 parCompVi = parCompVi,
293 parEllipsesL = parEllipsesL,
294 parLabVc = parLabVc,
295 file.pdfC = argVc['figure'],
296 .sinkC = argVc['information'])
297
298 options(warn = optWrnN)
299
300
301 ##------------------------------
302 ## Print
303 ##------------------------------
304
305
306 sink(argVc["information"], append = TRUE)
307
308 if(plotWarnL)
309 cat("\nWarning: For single component models, only 'overview' (and 'permutation' in case of single response (O)PLS(-DA)) plot(s) are available\n", sep = "")
310
311
312 cat("\n", modC, "\n", sep = "")
313
314 cat("\n", desMC["samples", ],
315 " samples x ",
316 desMC["X_variables", ],
317 " variables",
318 ifelse(modC != "PCA",
319 " and 1 response",
320 ""),
321 "\n", sep = "")
322
323 cat("\n", ropLs@suppLs[["scaleC"]], " scaling of dataMatrix",
324 ifelse(modC == "PCA",
325 "",
326 paste0(" and ",
327 ifelse(mode(ropLs@suppLs[["yMCN"]]) == "character" && ropLs@suppLs[["scaleC"]] != "standard",
328 "standard scaling of ",
329 ""),
330 "response\n")), sep = "")
331
332 if(substr(desMC["missing_values", ], 1, 1) != "0")
333 cat("\n", desMC["missing_values", ], " NAs\n", sep = "")
334
335 if(substr(desMC["near_zero_excluded_X_variables", ], 1, 1) != "0")
336 cat("\n", desMC["near_zero_excluded_X_variables", ],
337 " excluded variables during model building (because of near zero variance)\n", sep = "")
338
339 cat("\n")
340
341 optDigN <- options()[["digits"]]
342 options(digits = 3)
343 print(ropLs@modelDF)
344 options(digits = optDigN)
345
346
347 ##------------------------------
348 ## Ending
349 ##------------------------------
350
351
352 ## Saving
353 ##-------
354
355
356 rspModC <- gsub("-", "", modC)
357 if(rspModC != "PCA")
358 rspModC <- paste0(make.names(argVc['respC']), "_", rspModC)
359
360 if(sumDF[, "pre"] + sumDF[, "ort"] < 2) {
361
362 tCompMN <- scoreMN
363 pCompMN <- loadingMN
364
365 } else {
366
367 if(sumDF[, "ort"] > 0) {
368 if(parCompVi[2] > sumDF[, "ort"] + 1)
369 stop("Selected orthogonal component for plotting (ordinate) exceeds the total number of orthogonal components of the model", call. = FALSE)
370 tCompMN <- cbind(scoreMN[, 1], orthoScoreMN[, parCompVi[2] - 1])
371 pCompMN <- cbind(loadingMN[, 1], orthoLoadingMN[, parCompVi[2] - 1])
372 colnames(pCompMN) <- colnames(tCompMN) <- c("h1", paste("o", parCompVi[2] - 1, sep = ""))
373 } else {
374 if(max(parCompVi) > sumDF[, "pre"])
375 stop("Selected component for plotting as ordinate exceeds the total number of predictive components of the model", call. = FALSE)
376 tCompMN <- scoreMN[, parCompVi, drop = FALSE]
377 pCompMN <- loadingMN[, parCompVi, drop = FALSE]
378 }
379
380 }
381
382 ## x-scores and prediction
383
384 colnames(tCompMN) <- paste0(rspModC, "_XSCOR-", colnames(tCompMN))
385 tCompDF <- as.data.frame(tCompMN)[rownames(samDF), , drop = FALSE]
386
387 if(modC != "PCA") {
388
389 if(!is.null(tesVl)) {
390 tCompFulMN <- matrix(NA,
391 nrow = nrow(samDF),
392 ncol = ncol(tCompMN),
393 dimnames = list(rownames(samDF), colnames(tCompMN)))
394 mode(tCompFulMN) <- "numeric"
395 tCompFulMN[rownames(tCompMN), ] <- tCompMN
396 tCompMN <- tCompFulMN
397
398 fitMCN <- fitted(ropLs)
399 fitFulMCN <- matrix(NA,
400 nrow = nrow(samDF),
401 ncol = 1,
402 dimnames = list(rownames(samDF), NULL))
403 mode(fitFulMCN) <- mode(fitMCN)
404 fitFulMCN[rownames(fitMCN), ] <- fitMCN
405 yPreMCN <- predict(ropLs, newdata = as.data.frame(xTesMN))
406 fitFulMCN[rownames(yPreMCN), ] <- yPreMCN
407 fitMCN <- fitFulMCN
408
409 } else
410 fitMCN <- fitted(ropLs)
411
412 colnames(fitMCN) <- paste0(rspModC,
413 "_predictions")
414 fitDF <- as.data.frame(fitMCN)[rownames(samDF), , drop = FALSE]
415
416 tCompDF <- cbind.data.frame(tCompDF, fitDF)
417 }
418
419 samDF <- cbind.data.frame(samDF, tCompDF)
420
421 ## x-loadings and VIP
422
423 colnames(pCompMN) <- paste0(rspModC, "_XLOAD-", colnames(pCompMN))
424 if(!is.null(vipVn)) {
425 pCompMN <- cbind(pCompMN, vipVn)
426 colnames(pCompMN)[ncol(pCompMN)] <- paste0(rspModC,
427 "_VIP",
428 ifelse(!is.null(orthoVipVn),
429 "_pred",
430 ""))
431 if(!is.null(orthoVipVn)) {
432 pCompMN <- cbind(pCompMN, orthoVipVn)
433 colnames(pCompMN)[ncol(pCompMN)] <- paste0(rspModC,
434 "_VIP_ortho")
435 }
436 }
437 if(!is.null(coeMN)) {
438 pCompMN <- cbind(pCompMN, coeMN)
439 if(ncol(coeMN) == 1)
440 colnames(pCompMN)[ncol(pCompMN)] <- paste0(rspModC, "_COEFF")
441 else
442 colnames(pCompMN)[(ncol(pCompMN) - ncol(coeMN) + 1):ncol(pCompMN)] <- paste0(rspModC, "_", colnames(coeMN), "-COEFF")
443 }
444 pCompDF <- as.data.frame(pCompMN)[rownames(varDF), , drop = FALSE]
445 varDF <- cbind.data.frame(varDF, pCompDF)
446
447 ## sampleMetadata
448
449 samDF <- cbind.data.frame(sampleMetadata = rownames(samDF),
450 samDF)
451 write.table(samDF,
452 file = argVc["sampleMetadata_out"],
453 quote = FALSE,
454 row.names = FALSE,
455 sep = "\t")
456
457 ## variableMetadata
458
459 varDF <- cbind.data.frame(variableMetadata = rownames(varDF),
460 varDF)
461 write.table(varDF,
462 file = argVc["variableMetadata_out"],
463 quote = FALSE,
464 row.names = FALSE,
465 sep = "\t")
466
467 # Output ropLs
468 if (!is.null(argVc['ropls_out']) && !is.na(argVc['ropls_out']))
469 save(ropLs, file = argVc['ropls_out'])
470
471 ## Closing
472 ##--------
473
474 cat("\nEnd of '", modNamC, "' Galaxy module call: ",
475 as.character(Sys.time()), "\n", sep = "")
476
477 cat("\n\n\n============================================================================")
478 cat("\nAdditional information about the call:\n")
479 cat("\n1) Parameters:\n")
480 print(cbind(value = argVc))
481
482 cat("\n2) Session Info:\n")
483 sessioninfo <- sessionInfo()
484 cat(sessioninfo$R.version$version.string,"\n")
485 cat("Main packages:\n")
486 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
487 cat("Other loaded packages:\n")
488 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
489
490 cat("============================================================================\n")
491
492 sink()
493
494 options(stringsAsFactors = strAsFacL)
495
496 rm(list = ls())