Mercurial > repos > eschen42 > multivariate
comparison multivariate_wrapper.R @ 0:b2b02fb81a0a draft default tip
planemo upload for repository https://github.com/eschen42/multivariate/tree/dump_rdata forked from https://github.com/workflow4metabolomics/multivariate.git commit 2ace6612c83223925e25d38bce9530f90f20a602-dirty
author | eschen42 |
---|---|
date | Mon, 14 Aug 2017 20:57:59 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:b2b02fb81a0a |
---|---|
1 #!/usr/bin/env Rscript | |
2 | |
3 library(batch) ## parseCommandArgs | |
4 | |
5 # Constants | |
6 argv <- commandArgs(trailingOnly = FALSE) | |
7 script.path <- sub("--file=","",argv[grep("--file=",argv)]) | |
8 prog.name <- basename(script.path) | |
9 | |
10 # Print help | |
11 if (length(grep('-h', argv)) >0) { | |
12 cat("Usage:", prog.name, | |
13 "dataMatrix_in myDataMatrix.tsv", | |
14 "sampleMetadata_in mySampleData.tsv", | |
15 "variableMetadata_in myVariableMetadata.tsv", | |
16 "respC ...", | |
17 "predI ...", | |
18 "orthoI ...", | |
19 "testL ...", | |
20 "typeC ...", | |
21 "parAsColC ...", | |
22 "parCexN ...", | |
23 "parPc1I ...", | |
24 "parPc2I ...", | |
25 "parMahalC ...", | |
26 "parLabVc ...", | |
27 "algoC ...", | |
28 "crossvalI ...", | |
29 "log10L ...", | |
30 "permI ...", | |
31 "scaleC ...", | |
32 "sampleMetadata_out mySampleMetadata_out.tsv", | |
33 "variableMetadata_out myVariableMetadata_out.tsv", | |
34 "figure figure.pdf", | |
35 "information information.txt", | |
36 "\n") | |
37 quit(status = 0) | |
38 } | |
39 | |
40 ######## | |
41 # MAIN # | |
42 ######## | |
43 | |
44 argVc <- unlist(parseCommandArgs(evaluate=FALSE)) | |
45 | |
46 ##------------------------------ | |
47 ## Initializing | |
48 ##------------------------------ | |
49 | |
50 ## options | |
51 ##-------- | |
52 | |
53 strAsFacL <- options()$stringsAsFactors | |
54 options(stringsAsFactors = FALSE) | |
55 | |
56 ## libraries | |
57 ##---------- | |
58 | |
59 suppressMessages(library(ropls)) | |
60 | |
61 if(packageVersion("ropls") < "1.4.0") | |
62 stop("Please use 'ropls' versions of 1.4.0 and above") | |
63 | |
64 ## constants | |
65 ##---------- | |
66 | |
67 modNamC <- "Multivariate" ## module name | |
68 | |
69 topEnvC <- environment() | |
70 flgC <- "\n" | |
71 | |
72 ## functions | |
73 ##---------- | |
74 | |
75 flgF <- function(tesC, | |
76 envC = topEnvC, | |
77 txtC = NA) { ## management of warning and error messages | |
78 | |
79 tesL <- eval(parse(text = tesC), envir = envC) | |
80 | |
81 if(!tesL) { | |
82 | |
83 sink() | |
84 stpTxtC <- ifelse(is.na(txtC), | |
85 paste0(tesC, " is FALSE"), | |
86 txtC) | |
87 | |
88 stop(stpTxtC, | |
89 call. = FALSE) | |
90 | |
91 } | |
92 | |
93 } ## flgF | |
94 | |
95 | |
96 ## log file | |
97 ##--------- | |
98 | |
99 sink(argVc["information"]) | |
100 | |
101 cat("\nStart of the '", modNamC, "' Galaxy module call: ", | |
102 format(Sys.time(), "%a %d %b %Y %X"), "\n", sep="") | |
103 | |
104 | |
105 ## arguments | |
106 ##---------- | |
107 | |
108 xMN <- t(as.matrix(read.table(argVc["dataMatrix_in"], | |
109 check.names = FALSE, | |
110 header = TRUE, | |
111 row.names = 1, | |
112 sep = "\t", | |
113 comment.char = ""))) | |
114 | |
115 samDF <- read.table(argVc["sampleMetadata_in"], | |
116 check.names = FALSE, | |
117 header = TRUE, | |
118 row.names = 1, | |
119 sep = "\t", | |
120 comment.char = "") | |
121 flgF("identical(rownames(xMN), rownames(samDF))", txtC = "Sample names (or number) in the data matrix (first row) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section") | |
122 | |
123 varDF <- read.table(argVc["variableMetadata_in"], | |
124 check.names = FALSE, | |
125 header = TRUE, | |
126 row.names = 1, | |
127 sep = "\t", | |
128 comment.char = "") | |
129 flgF("identical(colnames(xMN), rownames(varDF))", txtC = "Variable names (or number) in the data matrix (first column) and sample metadata (first column) are not identical; use the 'Check Format' module in the 'Quality Control' section") | |
130 | |
131 flgF("argVc['respC'] == 'none' || (argVc['respC'] %in% colnames(samDF))", | |
132 txtC = paste0("Y Response argument (", argVc['respC'], ") must be either none or one of the column names (first row) of your sample metadata")) | |
133 if(argVc["respC"] != "none") { | |
134 yMCN <- matrix(samDF[, argVc['respC']], ncol = 1, dimnames = list(rownames(xMN), argVc['respC'])) | |
135 } else | |
136 yMCN <- NULL | |
137 | |
138 if(argVc["testL"] == "TRUE") { | |
139 flgF("!is.null(yMCN)", | |
140 txtC = "Predictions cannot be peformed with PCA models") | |
141 flgF("'test.' %in% colnames(samDF)", | |
142 txtC = "No 'test.' column found in the sample metadata") | |
143 flgF("identical(sort(unique(samDF[, 'test.'])), c('no', 'yes'))", | |
144 txtC = "'test.' column of sample metadata must contain both 'yes' (tested samples) and 'no' (samples to be used for model training) values, and nothing else") | |
145 flgF("identical(sort(unique(samDF[, 'test.'])), c('no', 'yes'))", | |
146 txtC = "'test.' column of sample metadata must contain both 'yes' (tested samples) and 'no' (samples to be used for model training) values, and nothing else") | |
147 flgF("!any(is.na(yMCN[samDF[, 'test.'] == 'no', ]))", | |
148 txtC = "samples for model training (i.e. 'no' value in the 'test.' column) should not contain NA in the response") | |
149 tesVl <- samDF[, "test."] == "yes" | |
150 xTesMN <- xMN[tesVl, , drop = FALSE] | |
151 xMN <- xMN[!tesVl, , drop = FALSE] | |
152 yMCN <- yMCN[!tesVl, , drop = FALSE] | |
153 } else | |
154 tesVl <- NULL | |
155 | |
156 if(!('parAsColC' %in% names(argVc))) | |
157 argVc["parAsColC"] <- "none" | |
158 flgF("argVc['parAsColC'] == 'none' || argVc['parAsColC'] %in% colnames(samDF)", txtC = paste0("Sample color argument (", argVc['parAsColC'], ") must be either none or one of the column names (first row) of your sample metadata")) | |
159 if(argVc["parAsColC"] != "none") { | |
160 parAsColFcVn <- samDF[, argVc['parAsColC']] | |
161 if(is.character(parAsColFcVn)) | |
162 parAsColFcVn <- factor(parAsColFcVn) | |
163 } else | |
164 parAsColFcVn <- NA | |
165 | |
166 if(!('parMahalC' %in% names(argVc)) || argVc["parMahalC"] == "NA") { | |
167 if(!is.null(yMCN) && ncol(yMCN) == 1 && mode(yMCN) == "character") | |
168 argVc["parMahalC"] <- argVc["respC"] | |
169 else | |
170 argVc["parMahalC"] <- "none" | |
171 } | |
172 flgF("argVc['parMahalC'] == 'none' || (argVc['parMahalC'] %in% colnames(samDF))", | |
173 txtC = paste0("Mahalanobis argument (", argVc['parMahalC'], ") must be either 'NA', 'none' or one of the column names (first row) of your sample metadata")) | |
174 if(argVc["parMahalC"] == "none") { | |
175 parEllipsesL <- FALSE | |
176 } else { | |
177 if(is.null(yMCN)) { ## PCA case | |
178 flgF("mode(samDF[, argVc['parMahalC']]) == 'character'", | |
179 txtC = paste0("Mahalanobis argument (", argVc['parMahalC'], ") must correspond to a column of characters in your sampleMetadata")) | |
180 parAsColFcVn <- factor(samDF[, argVc["parMahalC"]]) | |
181 parEllipsesL <- TRUE | |
182 } else { ## (O)PLS-DA case | |
183 flgF("identical(as.character(argVc['respC']), as.character(argVc['parMahalC']))", | |
184 txtC = paste0("The Mahalanobis argument (", argVc['parMahalC'], ") must be identical to the Y response argument (", argVc['respC'], ")")) | |
185 parEllipsesL <- TRUE | |
186 } | |
187 } | |
188 | |
189 if(!('parLabVc' %in% names(argVc))) | |
190 argVc["parLabVc"] <- "none" | |
191 flgF("argVc['parLabVc'] == 'none' || (argVc['parLabVc'] %in% colnames(samDF))", | |
192 txtC = paste0("Sample labels argument (", argVc['parLabVc'], ") must be either none or one of the column names (first row) of your sample metadata")) | |
193 if('parLabVc' %in% names(argVc)) | |
194 if(argVc["parLabVc"] != "none") { | |
195 flgF("mode(samDF[, argVc['parLabVc']]) == 'character'", | |
196 txtC = paste0("The sample label argument (", argVc['parLabVc'], ") must correspond to a sample metadata column of characters (not numerics)")) | |
197 parLabVc <- samDF[, argVc['parLabVc']] | |
198 } else | |
199 parLabVc <- NA | |
200 | |
201 if('parPc1I' %in% names(argVc)) { | |
202 parCompVi <- as.numeric(c(argVc["parPc1I"], argVc["parPc2I"])) | |
203 } else | |
204 parCompVi <- c(1, 2) | |
205 | |
206 | |
207 ## checking | |
208 ##--------- | |
209 | |
210 | |
211 flgF("argVc['predI'] == 'NA' || argVc['orthoI'] == 'NA' || as.numeric(argVc['orthoI']) > 0 || parCompVi[2] <= as.numeric(argVc['predI'])", | |
212 txtC = paste0("The highest component to display (", parCompVi[2], ") must not exceed the number of predictive components of the model (", argVc['predI'], ")")) | |
213 | |
214 | |
215 if(argVc["orthoI"] == "NA" || argVc["orthoI"] != "0") | |
216 if(argVc["predI"] == "NA" || argVc["predI"] != "0") { | |
217 argVc["predI"] <- "1" | |
218 cat("\nWarning: OPLS: number of predictive components ('predI' argument) set to 1\n", sep = "") | |
219 } | |
220 | |
221 if(argVc["predI"] != "NA") | |
222 if(as.numeric(argVc["predI"]) > min(nrow(xMN), ncol(xMN))) { | |
223 argVc["predI"] <- as.character(min(nrow(xMN), ncol(xMN))) | |
224 cat("\nWarning: 'predI' set to the minimum of the dataMatrix dimensions: ", as.numeric(argVc["predI"]), "\n", sep = "") | |
225 } | |
226 | |
227 if("algoC" %in% names(argVc) && argVc["algoC"] == "svd" && length(which(is.na(c(xMN)))) > 0) { | |
228 minN <- min(c(xMN[!is.na(xMN)])) / 2 | |
229 cat("\nWarning: Missing values set to ", round(minN, 1), " (half minimum value) for 'svd' algorithm to be used\n", sep = "") | |
230 } | |
231 | |
232 | |
233 ##------------------------------ | |
234 ## Computation and plot | |
235 ##------------------------------ | |
236 | |
237 | |
238 sink() | |
239 | |
240 optWrnN <- options()$warn | |
241 options(warn = -1) | |
242 | |
243 ropLs <- opls(x = xMN, | |
244 y = yMCN, | |
245 predI = ifelse(argVc["predI"] == "NA", NA, as.numeric(argVc["predI"])), | |
246 orthoI = ifelse(argVc["orthoI"] == "NA", NA, as.numeric(argVc["orthoI"])), | |
247 algoC = ifelse('algoC' %in% names(argVc), argVc["algoC"], "default"), | |
248 crossvalI = ifelse('crossvalI' %in% names(argVc), as.numeric(argVc["crossvalI"]), 7), | |
249 log10L = ifelse('log10L' %in% names(argVc), as.logical(argVc["log10L"]), FALSE), | |
250 permI = ifelse('permI' %in% names(argVc), as.numeric(argVc["permI"]), 20), | |
251 scaleC = ifelse('scaleC' %in% names(argVc), argVc["scaleC"], "standard"), | |
252 subset = NULL, | |
253 printL = FALSE, | |
254 plotL = FALSE, | |
255 .sinkC = argVc['information']) | |
256 | |
257 modC <- ropLs@typeC | |
258 sumDF <- getSummaryDF(ropLs) | |
259 desMC <- ropLs@descriptionMC | |
260 scoreMN <- getScoreMN(ropLs) | |
261 loadingMN <- getLoadingMN(ropLs) | |
262 | |
263 vipVn <- coeMN <- orthoScoreMN <- orthoLoadingMN <- orthoVipVn <- NULL | |
264 | |
265 if(grepl("PLS", modC)) { | |
266 | |
267 vipVn <- getVipVn(ropLs) | |
268 coeMN <- coef(ropLs) | |
269 | |
270 if(grepl("OPLS", modC)) { | |
271 orthoScoreMN <- getScoreMN(ropLs, orthoL = TRUE) | |
272 orthoLoadingMN <- getLoadingMN(ropLs, orthoL = TRUE) | |
273 orthoVipVn <- getVipVn(ropLs, orthoL = TRUE) | |
274 } | |
275 | |
276 } | |
277 | |
278 ploC <- ifelse('typeC' %in% names(argVc), argVc["typeC"], "summary") | |
279 | |
280 if(sumDF[, "pre"] + sumDF[, "ort"] < 2) { | |
281 if(!(ploC %in% c("permutation", "overview"))) { | |
282 ploC <- "summary" | |
283 plotWarnL <- TRUE | |
284 } | |
285 } else | |
286 plotWarnL <- FALSE | |
287 | |
288 plot(ropLs, | |
289 typeVc = ploC, | |
290 parAsColFcVn = parAsColFcVn, | |
291 parCexN = ifelse('parCexN' %in% names(argVc), as.numeric(argVc["parCexN"]), 0.8), | |
292 parCompVi = parCompVi, | |
293 parEllipsesL = parEllipsesL, | |
294 parLabVc = parLabVc, | |
295 file.pdfC = argVc['figure'], | |
296 .sinkC = argVc['information']) | |
297 | |
298 options(warn = optWrnN) | |
299 | |
300 | |
301 ##------------------------------ | |
302 ## Print | |
303 ##------------------------------ | |
304 | |
305 | |
306 sink(argVc["information"], append = TRUE) | |
307 | |
308 if(plotWarnL) | |
309 cat("\nWarning: For single component models, only 'overview' (and 'permutation' in case of single response (O)PLS(-DA)) plot(s) are available\n", sep = "") | |
310 | |
311 | |
312 cat("\n", modC, "\n", sep = "") | |
313 | |
314 cat("\n", desMC["samples", ], | |
315 " samples x ", | |
316 desMC["X_variables", ], | |
317 " variables", | |
318 ifelse(modC != "PCA", | |
319 " and 1 response", | |
320 ""), | |
321 "\n", sep = "") | |
322 | |
323 cat("\n", ropLs@suppLs[["scaleC"]], " scaling of dataMatrix", | |
324 ifelse(modC == "PCA", | |
325 "", | |
326 paste0(" and ", | |
327 ifelse(mode(ropLs@suppLs[["yMCN"]]) == "character" && ropLs@suppLs[["scaleC"]] != "standard", | |
328 "standard scaling of ", | |
329 ""), | |
330 "response\n")), sep = "") | |
331 | |
332 if(substr(desMC["missing_values", ], 1, 1) != "0") | |
333 cat("\n", desMC["missing_values", ], " NAs\n", sep = "") | |
334 | |
335 if(substr(desMC["near_zero_excluded_X_variables", ], 1, 1) != "0") | |
336 cat("\n", desMC["near_zero_excluded_X_variables", ], | |
337 " excluded variables during model building (because of near zero variance)\n", sep = "") | |
338 | |
339 cat("\n") | |
340 | |
341 optDigN <- options()[["digits"]] | |
342 options(digits = 3) | |
343 print(ropLs@modelDF) | |
344 options(digits = optDigN) | |
345 | |
346 | |
347 ##------------------------------ | |
348 ## Ending | |
349 ##------------------------------ | |
350 | |
351 | |
352 ## Saving | |
353 ##------- | |
354 | |
355 | |
356 rspModC <- gsub("-", "", modC) | |
357 if(rspModC != "PCA") | |
358 rspModC <- paste0(make.names(argVc['respC']), "_", rspModC) | |
359 | |
360 if(sumDF[, "pre"] + sumDF[, "ort"] < 2) { | |
361 | |
362 tCompMN <- scoreMN | |
363 pCompMN <- loadingMN | |
364 | |
365 } else { | |
366 | |
367 if(sumDF[, "ort"] > 0) { | |
368 if(parCompVi[2] > sumDF[, "ort"] + 1) | |
369 stop("Selected orthogonal component for plotting (ordinate) exceeds the total number of orthogonal components of the model", call. = FALSE) | |
370 tCompMN <- cbind(scoreMN[, 1], orthoScoreMN[, parCompVi[2] - 1]) | |
371 pCompMN <- cbind(loadingMN[, 1], orthoLoadingMN[, parCompVi[2] - 1]) | |
372 colnames(pCompMN) <- colnames(tCompMN) <- c("h1", paste("o", parCompVi[2] - 1, sep = "")) | |
373 } else { | |
374 if(max(parCompVi) > sumDF[, "pre"]) | |
375 stop("Selected component for plotting as ordinate exceeds the total number of predictive components of the model", call. = FALSE) | |
376 tCompMN <- scoreMN[, parCompVi, drop = FALSE] | |
377 pCompMN <- loadingMN[, parCompVi, drop = FALSE] | |
378 } | |
379 | |
380 } | |
381 | |
382 ## x-scores and prediction | |
383 | |
384 colnames(tCompMN) <- paste0(rspModC, "_XSCOR-", colnames(tCompMN)) | |
385 tCompDF <- as.data.frame(tCompMN)[rownames(samDF), , drop = FALSE] | |
386 | |
387 if(modC != "PCA") { | |
388 | |
389 if(!is.null(tesVl)) { | |
390 tCompFulMN <- matrix(NA, | |
391 nrow = nrow(samDF), | |
392 ncol = ncol(tCompMN), | |
393 dimnames = list(rownames(samDF), colnames(tCompMN))) | |
394 mode(tCompFulMN) <- "numeric" | |
395 tCompFulMN[rownames(tCompMN), ] <- tCompMN | |
396 tCompMN <- tCompFulMN | |
397 | |
398 fitMCN <- fitted(ropLs) | |
399 fitFulMCN <- matrix(NA, | |
400 nrow = nrow(samDF), | |
401 ncol = 1, | |
402 dimnames = list(rownames(samDF), NULL)) | |
403 mode(fitFulMCN) <- mode(fitMCN) | |
404 fitFulMCN[rownames(fitMCN), ] <- fitMCN | |
405 yPreMCN <- predict(ropLs, newdata = as.data.frame(xTesMN)) | |
406 fitFulMCN[rownames(yPreMCN), ] <- yPreMCN | |
407 fitMCN <- fitFulMCN | |
408 | |
409 } else | |
410 fitMCN <- fitted(ropLs) | |
411 | |
412 colnames(fitMCN) <- paste0(rspModC, | |
413 "_predictions") | |
414 fitDF <- as.data.frame(fitMCN)[rownames(samDF), , drop = FALSE] | |
415 | |
416 tCompDF <- cbind.data.frame(tCompDF, fitDF) | |
417 } | |
418 | |
419 samDF <- cbind.data.frame(samDF, tCompDF) | |
420 | |
421 ## x-loadings and VIP | |
422 | |
423 colnames(pCompMN) <- paste0(rspModC, "_XLOAD-", colnames(pCompMN)) | |
424 if(!is.null(vipVn)) { | |
425 pCompMN <- cbind(pCompMN, vipVn) | |
426 colnames(pCompMN)[ncol(pCompMN)] <- paste0(rspModC, | |
427 "_VIP", | |
428 ifelse(!is.null(orthoVipVn), | |
429 "_pred", | |
430 "")) | |
431 if(!is.null(orthoVipVn)) { | |
432 pCompMN <- cbind(pCompMN, orthoVipVn) | |
433 colnames(pCompMN)[ncol(pCompMN)] <- paste0(rspModC, | |
434 "_VIP_ortho") | |
435 } | |
436 } | |
437 if(!is.null(coeMN)) { | |
438 pCompMN <- cbind(pCompMN, coeMN) | |
439 if(ncol(coeMN) == 1) | |
440 colnames(pCompMN)[ncol(pCompMN)] <- paste0(rspModC, "_COEFF") | |
441 else | |
442 colnames(pCompMN)[(ncol(pCompMN) - ncol(coeMN) + 1):ncol(pCompMN)] <- paste0(rspModC, "_", colnames(coeMN), "-COEFF") | |
443 } | |
444 pCompDF <- as.data.frame(pCompMN)[rownames(varDF), , drop = FALSE] | |
445 varDF <- cbind.data.frame(varDF, pCompDF) | |
446 | |
447 ## sampleMetadata | |
448 | |
449 samDF <- cbind.data.frame(sampleMetadata = rownames(samDF), | |
450 samDF) | |
451 write.table(samDF, | |
452 file = argVc["sampleMetadata_out"], | |
453 quote = FALSE, | |
454 row.names = FALSE, | |
455 sep = "\t") | |
456 | |
457 ## variableMetadata | |
458 | |
459 varDF <- cbind.data.frame(variableMetadata = rownames(varDF), | |
460 varDF) | |
461 write.table(varDF, | |
462 file = argVc["variableMetadata_out"], | |
463 quote = FALSE, | |
464 row.names = FALSE, | |
465 sep = "\t") | |
466 | |
467 # Output ropLs | |
468 if (!is.null(argVc['ropls_out']) && !is.na(argVc['ropls_out'])) | |
469 save(ropLs, file = argVc['ropls_out']) | |
470 | |
471 ## Closing | |
472 ##-------- | |
473 | |
474 cat("\nEnd of '", modNamC, "' Galaxy module call: ", | |
475 as.character(Sys.time()), "\n", sep = "") | |
476 | |
477 cat("\n\n\n============================================================================") | |
478 cat("\nAdditional information about the call:\n") | |
479 cat("\n1) Parameters:\n") | |
480 print(cbind(value = argVc)) | |
481 | |
482 cat("\n2) Session Info:\n") | |
483 sessioninfo <- sessionInfo() | |
484 cat(sessioninfo$R.version$version.string,"\n") | |
485 cat("Main packages:\n") | |
486 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | |
487 cat("Other loaded packages:\n") | |
488 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | |
489 | |
490 cat("============================================================================\n") | |
491 | |
492 sink() | |
493 | |
494 options(stringsAsFactors = strAsFacL) | |
495 | |
496 rm(list = ls()) |