# HG changeset patch # User eschen42 # Date 1536189887 14400 # Node ID 0b49916c5c5248ba1fba014fd158898a95dcc66a # Parent 1d046f648b47d4e330303416e514010e5a02fcd2 planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit 4428e3252d54c8a8e0e5d85e8eaaeb13e9b21de7 diff -r 1d046f648b47 -r 0b49916c5c52 w4mcorcov.xml --- a/w4mcorcov.xml Fri Mar 02 08:26:36 2018 -0500 +++ b/w4mcorcov.xml Wed Sep 05 19:24:47 2018 -0400 @@ -1,104 +1,160 @@ - - - OPLS-DA Contrasts of Univariate Results - - - r-batch - bioconductor-ropls - - - - - - - - + OPLS-DA Contrasts of Univariate Results + + + + + + + + + + + + + + + + r-base + r-batch + bioconductor-ropls + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - + - + - + - + @@ -130,7 +186,6 @@ - @@ -146,22 +201,16 @@ - - - - + + + + - - - - - - - - - - + + + + @@ -186,6 +235,7 @@ + @@ -194,7 +244,6 @@ - @@ -209,21 +258,11 @@ - - - - - - - - - - - - - - - + + + + + @@ -248,15 +287,14 @@ + - - @@ -271,24 +309,19 @@ - - - - + + + + - - - - - - - - - - + + + + + @@ -311,10 +344,150 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + - éééééé + 10.5281/zenodo.1034784 10.1002/cem.2627 @@ -711,6 +851,6 @@ 10.1021/ac0713510 diff -r 1d046f648b47 -r 0b49916c5c52 w4mcorcov_calc.R --- a/w4mcorcov_calc.R Fri Mar 02 08:26:36 2018 -0500 +++ b/w4mcorcov_calc.R Wed Sep 05 19:24:47 2018 -0400 @@ -7,9 +7,23 @@ #### OPLS-DA algoC <- "nipals" -do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_show_loado_labels, x_progress = print, x_env, x_crossval_i) { +do_detail_plot <- function( + x_dataMatrix +, x_predictor +, x_is_match +, x_algorithm +, x_prefix +, x_show_labels +, x_progress = print +, x_env +, x_crossval_i +) { off <- function(x) if (x_show_labels == "0") 0 else x - if ( x_is_match && ncol(x_dataMatrix) > 0 && length(unique(x_predictor))> 1 && x_crossval_i < nrow(x_dataMatrix) ) { + if ( x_is_match + && ncol(x_dataMatrix) > 0 + && length(unique(x_predictor))> 1 + && x_crossval_i < nrow(x_dataMatrix) + ) { my_oplsda <- opls( x = x_dataMatrix , y = x_predictor @@ -19,86 +33,270 @@ , printL = FALSE , plotL = FALSE , crossvalI = x_crossval_i + , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2. ) + # strip out variables having negligible variance + x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE] my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) + # x_progress(strF(x_dataMatrix)) + # x_progress(strF(my_oplsda)) + #x_progress(head(my_oplsda_suppLs_y_levels)) + #x_progress(unique(my_oplsda_suppLs_y_levels)) fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] - my_cor_vs_cov <- cor_vs_cov( - matrix_x = x_dataMatrix - , ropls_x = my_oplsda + do_s_plot <- function( + x_env + , predictor_projection_x = TRUE + , cplot_x = FALSE + , cor_vs_cov_x = NULL ) - with( - my_cor_vs_cov - , { - min_x <- min(covariance) - max_x <- max(covariance) - lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) - covariance <- covariance / lim_x - lim_x <- 1.2 - main_label <- sprintf("%s for level %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2) - main_cex <- min(1.0, 46.0/nchar(main_label)) - # "It is generally accepted that a variable should be selected if vj>1, [27–29], - # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." - # (Mehmood 2012 doi:10.1186/1748-7188-6-27) - vipco <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) - alpha <- 0.1 + 0.4 * vipco - red <- as.numeric(correlation > 0) * vipco - blue <- as.numeric(correlation < 0) * vipco - plus_cor <- correlation - plus_cov <- covariance - cex <- 0.75 - plot( - y = plus_cor - , x = plus_cov - , type="p" - , xlim=c( -lim_x - off(0.2), lim_x + off(0.2) ) - , ylim=c( -1.0 - off(0.2), 1.0 + off(0.2) ) - , xlab = sprintf("relative covariance(feature,t1)") - , ylab = sprintf("correlation(feature,t1)") - , main = main_label - , cex.main = main_cex - , cex = cex - , pch = 16 - , col = rgb(blue = blue, red = red, green = 0, alpha = alpha) - ) - low_x <- -0.7 * lim_x - high_x <- 0.7 * lim_x - text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") - text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") - if ( x_show_labels != "0" ) { - my_loadp <- loadp - my_loado <- loado - names(my_loadp) <- tsv1$featureID - names(my_loado) <- tsv1$featureID - if ( x_show_labels == "ALL" ) { - n_labels <- length(loadp) - } else { - n_labels <- as.numeric(x_show_labels) + { + #print(ls(x_env)) # "cplot_y" etc + #print(str(x_env$cplot_y)) # chr "covariance" + if (cplot_x) { + #print(x_env$cplot_y) # "covariance" + cplot_y_correlation <- (x_env$cplot_y == "correlation") + #print(cplot_y_correlation) # FALSE + } + if (is.null(cor_vs_cov_x)) { + my_cor_vs_cov <- cor_vs_cov( + matrix_x = x_dataMatrix + , ropls_x = my_oplsda + , predictor_projection_x = predictor_projection_x + , x_progress + ) + } else { + my_cor_vs_cov <- cor_vs_cov_x + } + # print("str(my_cor_vs_cov)") + # str(my_cor_vs_cov) + if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { + if (is.null(cor_vs_cov_x)) { + x_progress("No cor_vs_cov data produced") + } + plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") + text(x=1, y=1, labels="too few covariance data") + return(my_cor_vs_cov) + } + with( + my_cor_vs_cov + , { + min_x <- min(covariance, na.rm = TRUE) + max_x <- max(covariance, na.rm = TRUE) + lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) + covariance <- covariance / lim_x + lim_x <- 1.2 + # "It is generally accepted that a variable should be selected if vj>1, [27–29], + # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." + # (Mehmood 2012 doi:10.1186/1748-7188-6-27) + plus_cor <- correlation + plus_cov <- covariance + cex <- 0.65 + which_projection <- if (projection == 1) "t1" else "o1" + which_loading <- if (projection == 1) "parallel" else "orthogonal" + if (projection == 1) { # predictor-projection + vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) + if (!cplot_x) { # S-plot predictor-projection + my_xlab <- "relative covariance(feature,t1)" + my_x <- plus_cov + my_ylab <- "correlation(feature,t1)" + my_y <- plus_cor + } else { # C-plot predictor-projection + my_xlab <- "variable importance in predictor-projection" + my_x <- vip4p + if (cplot_y_correlation) { + my_ylab <- "correlation(feature,t1)" + my_y <- plus_cor + } else { + my_ylab <- "relative covariance(feature,t1)" + my_y <- plus_cov + } + } + if (cplot_x) { + lim_x <- max(my_x, na.rm = TRUE) * 1.1 + my_xlim <- c( 0, lim_x + off(0.2) ) + } else { + my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) + } + my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) + my_load_distal <- loadp + my_load_proximal <- loado + red <- as.numeric(correlation > 0) * vipcp + blue <- as.numeric(correlation < 0) * vipcp + alpha <- 0.1 + 0.4 * vipcp + red[is.na(red)] <- 0 + blue[is.na(blue)] <- 0 + alpha[is.na(alpha)] <- 0 + my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha) + main_label <- sprintf("%s for level %s versus %s" + , x_prefix, fctr_lvl_1, fctr_lvl_2) + } else { # orthogonal projection + vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83))) + if (!cplot_x) { + my_xlab <- "relative covariance(feature,to1)" + my_x <- -plus_cov + } else { + my_xlab <- "variable importance in orthogonal projection" + my_x <- vip4o + } + if (!cplot_x) { # S-plot orthogonal projection + my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) + my_ylab <- "correlation(feature,to1)" + my_y <- plus_cor + } else { # C-plot orthogonal projection + lim_x <- max(my_x, na.rm = TRUE) * 1.1 + my_xlim <- c( 0, lim_x + off(0.2) ) + if (cplot_y_correlation) { + my_ylab <- "correlation(feature,to1)" + my_y <- plus_cor + } else { + my_ylab <- "relative covariance(feature,to1)" + my_y <- plus_cov + } + } + my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) + my_load_distal <- loado + my_load_proximal <- loadp + alpha <- 0.1 + 0.4 * vipco + alpha[is.na(alpha)] <- 0 + my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha) + main_label <- sprintf( + "Features influencing orthogonal projection for %s versus %s" + , fctr_lvl_1, fctr_lvl_2) } - n_labels <- min( n_labels, (1 + length(loadp)) / 2 ) - labels_to_show <- c( - names(head(sort(my_loadp),n = n_labels)) - , names(tail(sort(my_loadp),n = n_labels)) + main_cex <- min(1.0, 46.0/nchar(main_label)) + my_feature_label_slant <- -30 # slant feature labels 30 degrees downward + plot( + y = my_y + , x = my_x + , type = "p" + , xlim = my_xlim + , ylim = my_ylim + , xlab = my_xlab + , ylab = my_ylab + , main = main_label + , cex.main = main_cex + , cex = cex + , pch = 16 + , col = my_col ) - if ( x_show_loado_labels ) { - labels_to_show <- c( - labels_to_show - , names(head(sort(my_loado),n = n_labels)) - , names(tail(sort(my_loado),n = n_labels)) - ) + low_x <- -0.7 * lim_x + high_x <- 0.7 * lim_x + if (projection == 1 && !cplot_x) { + text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") + text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") } - labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" )) - text( - y = plus_cor - 0.013 - , x = plus_cov + 0.020 - , cex = 0.4 - , labels = labels - , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # rgb(blue = blue, red = red, green = 0, alpha = 0.2 + 0.8 * alpha) - , srt = -30 # slant 30 degrees downward - , adj = 0 # left-justified - ) + if ( x_show_labels != "0" ) { + names(my_load_distal) <- tsv1$featureID + names(my_load_proximal) <- tsv1$featureID + if ( x_show_labels == "ALL" ) { + n_labels <- length(my_load_distal) + } else { + n_labels <- as.numeric(x_show_labels) + } + n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 ) + labels_to_show <- c( + names(head(sort(my_load_distal),n = n_labels)) + , names(tail(sort(my_load_distal),n = n_labels)) + ) + labels <- unname( + sapply( + X = tsv1$featureID + , FUN = function(x) if( x %in% labels_to_show ) x else "" + ) + ) + x_text_offset <- 0.024 + y_text_off <- 0.017 + if (!cplot_x) { # S-plot + y_text_offset <- if (projection == 1) -y_text_off else y_text_off + } else { # C-plot + y_text_offset <- + sapply( + X = (my_y > 0) + , FUN = function(x) { if (x) y_text_off else -y_text_off } + ) + } + label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { + # print("str(x_arg)") + # print(str(x_arg)) + # print("str(y_arg)") + # print(str(y_arg)) + # print("str(labels_arg)") + # print(str(labels_arg)) + if (length(labels_arg) > 0) { + unique_slant <- unique(slant_arg) + if (length(unique_slant) == 1) { + text( + y = y_arg + , x = x_arg + x_text_offset + , cex = 0.4 + , labels = labels_arg + , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels + , srt = slant_arg + , adj = 0 # left-justified + ) + } else { + for (slant in unique_slant) { + text( + y = y_arg[slant_arg == slant] + , x = x_arg[slant_arg == slant] + x_text_offset + , cex = 0.4 + , labels = labels_arg[slant_arg == slant] + , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels + , srt = slant + , adj = 0 # left-justified + ) + } + } + } + } + if (!cplot_x) { + my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant + } else { + my_slant <- sapply( + X = (my_y > 0) + , FUN = function(x) if (x) 2 else -2 + ) * my_feature_label_slant + } + if (length(my_x) > 1) { + label_features( + x_arg = my_x [my_x > 0] + , y_arg = my_y [my_x > 0] - y_text_offset + , labels_arg = labels[my_x > 0] + , slant_arg = (if (!cplot_x) -my_slant else (my_slant)) + ) + if (!cplot_x) { + label_features( + x_arg = my_x [my_x < 0] + , y_arg = my_y [my_x < 0] + y_text_offset + , labels_arg = labels[my_x < 0] + , slant_arg = my_slant + ) + } + } else { + if (!cplot_x) { + my_slant <- (if (my_x > 1) -1 else 1) * my_slant + my_y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset + } else { + my_slant <- (if (my_y > 1) -1 else 1) * my_slant + my_y_arg = my_y + (if (my_y > 1) -1 else 1) * y_text_offset + } + label_features( + x_arg = my_x + , y_arg = my_y_arg + , labels_arg = labels + , slant_arg = my_slant + ) + } + } } - } + ) + return (my_cor_vs_cov) + } + my_cor_vs_cov <- do_s_plot( + x_env = x_env + , predictor_projection_x = TRUE + , cplot_x = FALSE ) typeVc <- c("correlation", # 1 "outlier", # 2 @@ -118,36 +316,93 @@ } else { my_typevc <- c("(dummy)","overview","(dummy)") } + my_ortho_cor_vs_cov_exists <- FALSE for (my_type in my_typevc) { if (my_type %in% typeVc) { - # print(sprintf("plotting type %s", my_type)) tryCatch({ - plot( - x = my_oplsda - , typeVc = my_type - , parCexN = 0.4 - , parDevNewL = FALSE - , parLayL = TRUE - , parEllipsesL = TRUE - ) + if ( my_type != "x-loading" ) { + plot( + x = my_oplsda + , typeVc = my_type + , parCexN = 0.4 + , parDevNewL = FALSE + , parLayL = TRUE + , parEllipsesL = TRUE + ) + if (my_type == "overview") { + sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2) + title(sub = sub_label) + } + } else { + my_ortho_cor_vs_cov <- do_s_plot( + x_env = x_env + , predictor_projection_x = FALSE + , cplot_x = FALSE + ) + my_ortho_cor_vs_cov_exists <- TRUE + } }, error = function(e) { - x_progress(sprintf("factor level %s or %s may have only one sample", fctr_lvl_1, fctr_lvl_2)) + x_progress( + sprintf( + "factor level %s or %s may have only one sample - %s" + , fctr_lvl_1 + , fctr_lvl_2 + , e$message + ) + ) }) } else { - # print("plotting dummy graph") plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") text(x=1, y=1, labels="no orthogonal projection is possible") } } + cplot_p <- x_env$cplot_p + cplot_o <- x_env$cplot_o + if (cplot_p || cplot_o) { + if (cplot_p) { + do_s_plot( + x_env = x_env + , predictor_projection_x = TRUE + , cplot_x = TRUE + , cor_vs_cov_x = my_cor_vs_cov + ) + did_plots <- 1 + } else { + did_plots <- 0 + } + if (cplot_o) { + if (my_ortho_cor_vs_cov_exists) { + do_s_plot( + x_env = x_env + , predictor_projection_x = FALSE + , cplot_x = TRUE + , cor_vs_cov_x = my_ortho_cor_vs_cov + ) + } else { + plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") + text(x=1, y=1, labels="no orthogonal projection is possible") + } + did_plots <- 1 + did_plots + } + if (did_plots == 1) { + plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n", fg = "white") + } + } return (my_cor_vs_cov) } else { - # x_progress(sprintf("x_is_match = %s, ncol(x_dataMatrix) = %d, length(unique(x_predictor)) = %d",x_is_match, ncol(x_dataMatrix), length(unique(x_predictor)))) return (NULL) } } # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510 -corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = function(t){}, salience_tsv_action = function(t){}) { +corcov_calc <- function( + calc_env +, failure_action = stop +, progress_action = function(x) { } +, corcov_tsv_action = function(t) { } +, salience_tsv_action = function(t) { } +, extra_plots = c() +) { if ( ! is.environment(calc_env) ) { failure_action("corcov_calc: fatal error - 'calc_env' is not an environment") return ( FALSE ) @@ -174,22 +429,23 @@ # matchingC is one of { "none", "wildcard", "regex" } matchingC <- calc_env$matchingC labelFeatures <- calc_env$labelFeatures - labelOrthoFeatures <- calc_env$labelOrthoFeatures # arg/env checking if (!(facC %in% names(smpl_metadata))) { - failure_action(sprintf("bad parameter! Factor name '%s' not found in sampleMetadata", facC)) + failure_action( + sprintf("bad parameter! Factor name '%s' not found in sampleMetadata" + , facC)) return ( FALSE ) } mz <- vrbl_metadata$mz names(mz) <- vrbl_metadata$variableMetadata mz_lookup <- function(feature) unname(mz[feature]) - + rt <- vrbl_metadata$rt names(rt) <- vrbl_metadata$variableMetadata rt_lookup <- function(feature) unname(rt[feature]) - + # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv) salience_df <- calc_env$salience_df <- w4msalience( data_matrix = data_matrix @@ -236,12 +492,9 @@ } # transpose matrix because ropls matrix is the transpose of XCMS matrix + tdm <- t(data_matrix) # Wiklund_2008 centers and pareto-scales data before OPLS-DA S-plot - # center - cdm <- center_colmeans(t(data_matrix)) - # pareto-scale - my_scale <- sqrt(apply(cdm, 2, sd, na.rm=TRUE)) - scdm <- sweep(cdm, 2, my_scale, "/") + # However, data should be neither centered nor pareto scaled here because ropls::opls does that; this fixes issue #2. # pattern to match column names like k10_kruskal_k4.k3_sig col_pattern <- sprintf('^%s_%s_(.*)[.](.*)_sig$', facC, tesC) @@ -265,7 +518,10 @@ # for each column name, extract the parts of the name matched by 'col_pattern', if any the_colnames <- colnames(vrbl_metadata) if ( ! Reduce( f = "||", x = grepl(tesC, the_colnames) ) ) { - failure_action(sprintf("bad parameter! variableMetadata must contain results of W4M Univariate test '%s'.", tesC)) + failure_action( + sprintf( + "bad parameter! variableMetadata must contain results of W4M Univariate test '%s'." + , tesC)) return ( FALSE ) } col_matches <- lapply( @@ -292,34 +548,52 @@ } } level_union <- unique(sort(level_union)) - overall_significant <- 1 == ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE ) + overall_significant <- 1 == ( + if (intersample_sig_col %in% colnames(vrbl_metadata)) { + vrbl_metadata[,intersample_sig_col] + } else { + 1 + } + ) if ( length(level_union) > 2 ) { chosen_samples <- smpl_metadata_facC %in% level_union chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) col_selector <- vrbl_metadata_names[ overall_significant ] - my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] + my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] plot_action <- function(fctr_lvl_1, fctr_lvl_2) { - progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) - predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 ) + progress_action( + sprintf("calculating/plotting contrast of %s vs. %s" + , fctr_lvl_1, fctr_lvl_2) + ) + predictor <- sapply( + X = chosen_facC + , FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 + ) my_cor_cov <- do_detail_plot( x_dataMatrix = my_matrix , x_predictor = predictor - , x_is_match = is_match + , x_is_match = TRUE , x_algorithm = algoC - , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" + , x_prefix = if (pairSigFeatOnly) { + "Significantly contrasting features" + } else { + "Significant features" + } , x_show_labels = labelFeatures - , x_show_loado_labels = labelOrthoFeatures , x_progress = progress_action , x_crossval_i = min(7, length(chosen_samples)) , x_env = calc_env ) if ( is.null(my_cor_cov) ) { - progress_action("NOTHING TO PLOT.") + progress_action("NOTHING TO PLOT") } else { my_tsv <- my_cor_cov$tsv1 my_tsv$mz <- mz_lookup(my_tsv$featureID) my_tsv$rt <- rt_lookup(my_tsv$featureID) - my_tsv["level1Level2Sig"] <- vrbl_metadata[ match(my_tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] + my_tsv["level1Level2Sig"] <- vrbl_metadata[ + match(my_tsv$featureID, vrbl_metadata_names) + , vrbl_metadata_col + ] tsv <<- my_tsv corcov_tsv_action(tsv) did_plot <<- TRUE @@ -348,50 +622,79 @@ fctr_lvl_2 <- col_match[3] # ^^ # Factor-level 2 # only process this column if both factors are members of lvlCSV is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) - progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) - # TODO delete next line displaying currently-processed column - # cat(sprintf("%s %s %s %s\n", vrbl_metadata_col, fctr_lvl_1, fctr_lvl_2, is_match)) - # choose only samples with one of the two factors for this column - chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) - predictor <- smpl_metadata_facC[chosen_samples] - # extract only the significantly-varying features and the chosen samples - fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE ) - col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ] - my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] - my_cor_cov <- do_detail_plot( - x_dataMatrix = my_matrix - , x_predictor = predictor - , x_is_match = is_match - , x_algorithm = algoC - , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" - , x_show_labels = labelFeatures - , x_show_loado_labels = labelOrthoFeatures - , x_progress = progress_action - , x_crossval_i = min(7, length(chosen_samples)) - , x_env = calc_env - ) - if ( is.null(my_cor_cov) ) { - progress_action("NOTHING TO PLOT.") + if (is_match) { + progress_action( + sprintf("calculating/plotting contrast of %s vs. %s." + , fctr_lvl_1, fctr_lvl_2 + ) + ) + # choose only samples with one of the two factors for this column + chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) + predictor <- smpl_metadata_facC[chosen_samples] + # extract only the significantly-varying features and the chosen samples + fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * + ( if (intersample_sig_col %in% colnames(vrbl_metadata)) { + vrbl_metadata[,intersample_sig_col] + } else { + 1 + } + ) + col_selector <- vrbl_metadata_names[ + if ( pairSigFeatOnly ) fully_significant else overall_significant + ] + my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] + my_cor_cov <- do_detail_plot( + x_dataMatrix = my_matrix + , x_predictor = predictor + , x_is_match = is_match + , x_algorithm = algoC + , x_prefix = if (pairSigFeatOnly) { + "Significantly contrasting features" + } else { + "Significant features" + } + , x_show_labels = labelFeatures + , x_progress = progress_action + , x_crossval_i = min(7, length(chosen_samples)) + , x_env = calc_env + ) + if ( is.null(my_cor_cov) ) { + progress_action("NOTHING TO PLOT.") + } else { + tsv <- my_cor_cov$tsv1 + tsv$mz <- mz_lookup(tsv$featureID) + tsv$rt <- rt_lookup(tsv$featureID) + tsv["level1Level2Sig"] <- vrbl_metadata[ + match(tsv$featureID, vrbl_metadata_names) + , vrbl_metadata_col + ] + corcov_tsv_action(tsv) + did_plot <- TRUE + } } else { - tsv <- my_cor_cov$tsv1 - tsv$mz <- mz_lookup(tsv$featureID) - tsv$rt <- rt_lookup(tsv$featureID) - tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] - corcov_tsv_action(tsv) - did_plot <- TRUE + progress_action( + sprintf("skipping contrast of %s vs. %s." + , fctr_lvl_1, fctr_lvl_2 + ) + ) } } } } } else { # tesC == "none" + # find all the levels for factor facC in sampleMetadata level_union <- unique(sort(smpl_metadata_facC)) + # identify the selected levels for factor facC from sampleMetadata + level_include <- sapply(X = level_union, FUN = isLevelSelected) + # discard the non-selected levels for factor facC + level_union <- level_union[level_include] if ( length(level_union) > 1 ) { if ( length(level_union) > 2 ) { ## pass 1 - contrast each selected level with all other levels combined into one "super-level" ## completed <- c() lapply( X = level_union - , FUN = function(x) { + , FUN = function(x) { fctr_lvl_1 <- x[1] fctr_lvl_2 <- { if ( fctr_lvl_1 %in% completed ) @@ -402,39 +705,50 @@ } chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) fctr_lvl_2 <- "other" - # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) ) - progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) if (length(unique(chosen_samples)) < 1) { - progress_action("NOTHING TO PLOT...") + progress_action( + sprintf("Skipping contrast of %s vs. %s; there are no chosen samples." + , fctr_lvl_1, fctr_lvl_2) + ) } else { chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) - predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 ) - my_matrix <- scdm[ chosen_samples, , drop = FALSE ] + predictor <- sapply( + X = chosen_facC + , FUN = function(fac) { + if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 + } + ) + my_matrix <- tdm[ chosen_samples, , drop = FALSE ] # only process this column if both factors are members of lvlCSV is_match <- isLevelSelected(fctr_lvl_1) - my_cor_cov <- do_detail_plot( - x_dataMatrix = my_matrix - , x_predictor = predictor - , x_is_match = is_match - , x_algorithm = algoC - , x_prefix = "Features" - , x_show_labels = labelFeatures - , x_show_loado_labels = labelOrthoFeatures - , x_progress = progress_action - , x_crossval_i = min(7, length(chosen_samples)) - , x_env = calc_env - ) - if ( is.null(my_cor_cov) ) { - progress_action("NOTHING TO PLOT") + if (is_match) { + progress_action( + sprintf("Calculating/plotting contrast of %s vs. %s" + , fctr_lvl_1, fctr_lvl_2) + ) + my_cor_cov <- do_detail_plot( + x_dataMatrix = my_matrix + , x_predictor = predictor + , x_is_match = is_match + , x_algorithm = algoC + , x_prefix = "Features" + , x_show_labels = labelFeatures + , x_progress = progress_action + , x_crossval_i = min(7, length(chosen_samples)) + , x_env = calc_env + ) + if ( is.null(my_cor_cov) ) { + progress_action("NOTHING TO PLOT...") + } else { + tsv <- my_cor_cov$tsv1 + tsv$mz <- mz_lookup(tsv$featureID) + tsv$rt <- rt_lookup(tsv$featureID) + corcov_tsv_action(tsv) + did_plot <<- TRUE + } } else { - tsv <- my_cor_cov$tsv1 - tsv$mz <- mz_lookup(tsv$featureID) - tsv$rt <- rt_lookup(tsv$featureID) - corcov_tsv_action(tsv) - did_plot <<- TRUE } } - #print("baz") "dummy" # need to return a value; otherwise combn fails with an error } ) @@ -444,52 +758,67 @@ utils::combn( x = level_union , m = 2 - , FUN = function(x) { + , FUN = function(x) { fctr_lvl_1 <- x[1] fctr_lvl_2 <- x[2] chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) - # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) ) - progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) if (length(unique(chosen_samples)) < 1) { - progress_action("NOTHING TO PLOT...") + progress_action( + sprintf("Skipping contrast of %s vs. %s. - There are no chosen samples." + , fctr_lvl_1, fctr_lvl_2 + ) + ) } else { chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) predictor <- chosen_facC - my_matrix <- scdm[ chosen_samples, , drop = FALSE ] + my_matrix <- tdm[ chosen_samples, , drop = FALSE ] # only process this column if both factors are members of lvlCSV is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) - my_cor_cov <- do_detail_plot( - x_dataMatrix = my_matrix - , x_predictor = predictor - , x_is_match = is_match - , x_algorithm = algoC - , x_prefix = "Features" - , x_show_labels = labelFeatures - , x_show_loado_labels = labelOrthoFeatures - , x_progress = progress_action - , x_crossval_i = min(7, length(chosen_samples)) - , x_env = calc_env - ) - if ( is.null(my_cor_cov) ) { - progress_action("NOTHING TO PLOT") + if (is_match) { + progress_action( + sprintf("Calculating/plotting contrast of %s vs. %s." + , fctr_lvl_1, fctr_lvl_2) + ) + my_cor_cov <- do_detail_plot( + x_dataMatrix = my_matrix + , x_predictor = predictor + , x_is_match = is_match + , x_algorithm = algoC + , x_prefix = "Features" + , x_show_labels = labelFeatures + , x_progress = progress_action + , x_crossval_i = min(7, length(chosen_samples)) + , x_env = calc_env + ) + if ( is.null(my_cor_cov) ) { + progress_action("NOTHING TO PLOT.....") + } else { + tsv <- my_cor_cov$tsv1 + tsv$mz <- mz_lookup(tsv$featureID) + tsv$rt <- rt_lookup(tsv$featureID) + corcov_tsv_action(tsv) + did_plot <<- TRUE + } } else { - tsv <- my_cor_cov$tsv1 - tsv$mz <- mz_lookup(tsv$featureID) - tsv$rt <- rt_lookup(tsv$featureID) - corcov_tsv_action(tsv) - did_plot <<- TRUE + progress_action( + sprintf("Skipping contrast of %s vs. %s." + , fctr_lvl_1, fctr_lvl_2 + ) + ) } } - #print("baz") "dummy" # need to return a value; otherwise combn fails with an error } ) } else { - progress_action("NOTHING TO PLOT....") + progress_action("NOTHING TO PLOT......") } } if (!did_plot) { - failure_action(sprintf("bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'", facC, originalLevCSV)) + failure_action( + sprintf( + "bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'" + , facC, originalLevCSV)) return ( FALSE ) } return ( TRUE ) @@ -500,27 +829,67 @@ # Wiklund_2008 doi:10.1021/ac0713510 # Galindo_Prieto_2014 doi:10.1002/cem.2627 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R -cor_vs_cov <- function(matrix_x, ropls_x) { +cor_vs_cov <- function( + matrix_x +, ropls_x +, predictor_projection_x = TRUE +, x_progress = print +) { + tryCatch({ + return( + cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress) + ) + }, error = function(e) { + x_progress( + sprintf( + "cor_vs_cov fatal error - %s" + , as.character(e) # e$message + ) + ) + return ( NULL ) + }) +} + +cor_vs_cov_try <- function( + matrix_x +, ropls_x +, predictor_projection_x = TRUE +, x_progress = print +) { x_class <- class(ropls_x) - if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) - stop( "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class ", as.character(x_class) ) + if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) + stop( + paste( + "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class " + , as.character(x_class) + ) + ) } result <- list() + result$projection <- projection <- if (predictor_projection_x) 1 else 2 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS if ( ropls_x@suppLs$algoC == "nipals") { # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x])) - score_matrix <- ropls_x@scoreMN + if (predictor_projection_x) + score_matrix <- ropls_x@scoreMN + else + score_matrix <- ropls_x@orthoScoreMN score_matrix_transposed <- t(score_matrix) score_matrix_magnitude <- mag(score_matrix) - result$covariance <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) - result$correlation <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi ) + result$covariance <- + score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) + result$correlation <- + score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi ) } else { # WARNING - untested code - I don't have test data to exercise this branch # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F - score_matrix <- ropls_x@scoreMN + if (predictor_projection_x) + score_matrix <- ropls_x@scoreMN + else + score_matrix <- ropls_x@orthoScoreMN score_matrix_transposed <- t(score_matrix) cov_divisor <- nrow(matrix_x) - 1 result$covariance <- sapply( @@ -540,8 +909,8 @@ } ) } - result$correlation <- result$correlation[1,,drop = TRUE] - result$covariance <- result$covariance[1,,drop = TRUE] + result$correlation <- result$correlation[ 1, , drop = TRUE ] + result$covariance <- result$covariance [ 1, , drop = TRUE ] # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) @@ -556,15 +925,42 @@ feature_count <- length(ropls_x@vipVn) result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count) result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) - # print(sprintf("sd(covariance) = %f; sd(correlation) = %f", sd(result$covariance), sd(result$correlation))) superresult <- list() if (length(result$vip4o) == 0) result$vip4o <- NA - greaterLevel <- sapply( X = result$correlation, FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 ) - superresult$tsv1 <- data.frame( - featureID = names(ropls_x@vipVn) + greaterLevel <- sapply( + X = result$correlation + , FUN = function(my_corr) + tryCatch({ + if ( is.nan( my_corr ) ) { + print("my_corr is NaN") + NA + } else { + if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 + } + }, error = function(e) { + x_progress( + sprintf( + "cor_vs_cov -> sapply: error - substituting NA - %s" + , as.character(e) + ) + ) + NA + }) + ) + + # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 + featureID <- names(ropls_x@vipVn) + greaterLevel <- greaterLevel[featureID] + result$correlation <- result$correlation[featureID] + result$covariance <- result$covariance[featureID] + # end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 + + tsv1 <- data.frame( + featureID = featureID , factorLevel1 = result$level1 , factorLevel2 = result$level2 , greaterLevel = greaterLevel + , projection = result$projection , correlation = result$correlation , covariance = result$covariance , vip4p = result$vip4p @@ -573,7 +969,11 @@ , loado = result$loado , row.names = NULL ) - rownames(superresult$tsv1) <- superresult$tsv1$featureID + tsv1 <- tsv1[!is.na(tsv1$correlation),] + tsv1 <- tsv1[!is.na(tsv1$covariance),] + superresult$tsv1 <- tsv1 + rownames(superresult$tsv1) <- tsv1$featureID + superresult$projection <- result$projection superresult$covariance <- result$covariance superresult$correlation <- result$correlation superresult$vip4p <- result$vip4p @@ -581,12 +981,11 @@ superresult$loadp <- result$loadp superresult$loado <- result$loado superresult$details <- result - # #print(superresult$tsv1) result$superresult <- superresult # Include thise in case future consumers of this routine want to use it in currently unanticipated ways - result$oplsda <- ropls_x + result$oplsda <- ropls_x result$predictor <- ropls_x@suppLs$y # in case future consumers of this routine want to use it in currently unanticipated ways return (superresult) } - +# vim: sw=2 ts=2 et : diff -r 1d046f648b47 -r 0b49916c5c52 w4mcorcov_input.R --- a/w4mcorcov_input.R Fri Mar 02 08:26:36 2018 -0500 +++ b/w4mcorcov_input.R Wed Sep 05 19:24:47 2018 -0400 @@ -1,6 +1,6 @@ # read_data_frame - read a w4m data frame, with error handling # e.g., data_matrix_input_env <- read_data_frame(dataMatrix_in, "data matrix input") -read_data_frame <- function(file_path, kind_string, failure_action = failure_action) { +read_data_frame <- function(file_path, kind_string, rdf_failure_action = failure_action) { my.env <- new.env() my.env$success <- FALSE my.env$msg <- sprintf("no message reading %s", kind_string) @@ -14,7 +14,7 @@ } ) if (!my.env$success) { - failure_action(my.env$msg) + rdf_failure_action(my.env$msg) return ( FALSE ) } return (my.env) @@ -36,7 +36,7 @@ my_failure_action( sprintf("bad parameter xcms_data_type '%s'", xcms_data_type) ) return ( FALSE ) } - if ( is.character(xcms_data_in) ){ + if ( is.character(xcms_data_in) ) { # case: xcms_data_in is a path to a file xcms_data_input_env <- read_data_frame( xcms_data_in, sprintf("%s input", xcms_data_type) ) if (!xcms_data_input_env$success) { @@ -44,30 +44,6 @@ return ( FALSE ) } return ( xcms_data_input_env$data ) - # commenting out pasted code that is not tested here - # } else if ( is.data.frame(xcms_data_in) || is.matrix(xcms_data_in) ) { - # # case: xcms_data_in is a data.frame or matrix - # return(xcms_data_in) - # } else if ( is.list(xcms_data_in) || is.environment(xcms_data_in) ) { - # # NOTE WELL: is.list succeeds for data.frame, so the is.data.frame test must appear before the is.list test - # # case: xcms_data_in is a list - # if ( ! exists(xcms_data_type, where = xcms_data_in) ) { - # my_failure_action(sprintf("%s xcms_data_in is missing member '%s'"), ifelse(is.environment(xcms_data_in),"environment","list"), xcms_data_type) - # return ( FALSE ) - # } - # prospect <- getElement(name = xcms_data_type, object = xcms_data_in) - # if ( ! is.data.frame(prospect) && ! is.matrix(prospect) ) { - # utils::str("list - str(prospect)") - # utils::str(prospect) - # if ( is.list(xcms_data_in) ) { - # my_failure_action(sprintf("the first member of xcms_data_in['%s'] is neither a data.frame nor a matrix but is a %s", xcms_data_type, typeof(prospect))) - # } else { - # my_failure_action(sprintf("the first member of xcms_data_in$%s is neither a data.frame nor a matrix but is a %s", xcms_data_type, typeof(prospect))) - # } - # return ( prospect ) - # } - # # stop("stopping here for a snapshot") - # return ( prospect ) } else { # case: xcms_data_in is invalid my_failure_action( sprintf("xcms_data_in has unexpected type %s", typeof(xcms_data_in)) ) @@ -189,6 +165,10 @@ data_matrix <- as.matrix(data_matrix) } + # Omit any feature not found in variableMetadata and any sample not found in sampleMetadata + # For something more elaborate, see https://github.com/HegemanLab/w4mclassfilter + data_matrix <- data_matrix[rownames(data_matrix) %in% rownames(vrbl_metadata),colnames(data_matrix) %in% rownames(smpl_metadata)] + input_env$data_matrix <- data_matrix # ... } else { diff -r 1d046f648b47 -r 0b49916c5c52 w4mcorcov_lib.R --- a/w4mcorcov_lib.R Fri Mar 02 08:26:36 2018 -0500 +++ b/w4mcorcov_lib.R Wed Sep 05 19:24:47 2018 -0400 @@ -1,3 +1,3 @@ suppressMessages(library(batch)) -# suppressMessages(library(foreach)) suppressMessages(library(ropls)) +suppressMessages(library(methods)) diff -r 1d046f648b47 -r 0b49916c5c52 w4mcorcov_output.R --- a/w4mcorcov_output.R Fri Mar 02 08:26:36 2018 -0500 +++ b/w4mcorcov_output.R Wed Sep 05 19:24:47 2018 -0400 @@ -0,0 +1,77 @@ + +# turn off all plotting devices +dev.off.all <- function() { + while (!is.null(dev.list())) { dev.off() } +} + +# capture plot and write to PDF; then close any devices opened in the process +plot2pdf <- function( + file.name +, plot.function +, width = 12 +, height = 12 +) { + # capture plot and write to PDF + cur.dev <- dev.list() + filename <- file.name + pdf(file = filename, width = width, height = height) + plot.function() + # close any devices opened in the process + dev.off() + if (is.null(cur.dev)) { + dev.off.all() + } else { + while ( length(dev.list()) > length(cur.dev) ) { dev.off() } + } +} + +# print and capture plot and write to PDF; then close any devices opened in the process +# This is needed for ggplot which does not print the plot when invoked within a function. +print2pdf <- function( + file.name +, plot.function +, width = 12 +, height = 12 +) { + plot2pdf( + file.name = file.name + , width = width + , height = height + , plot.function = function() { + print(plot.function()) + } + ) +} + +iso8601.znow <- function() +{ + strftime(as.POSIXlt(Sys.time(), "UTC"), "%Y-%m-%dT%H:%M:%SZ") +} + +# pdf.name <- function(name) +# { +# paste0(name, "_", iso8601.filename.fragment(), ".pdf") +# } +# +# tsv.name <- function(name) +# { +# paste0(name, "_", iso8601.filename.fragment(), ".tsv") +# } +# + +tsv_action_factory <- function(file, colnames, append) { + return ( + function(tsv) { + write.table( + x = tsv + , file = file + , sep = "\t" + , quote = FALSE + , row.names = FALSE + , col.names = colnames + , append = append + ) + } + ) +} + diff -r 1d046f648b47 -r 0b49916c5c52 w4mcorcov_salience.R --- a/w4mcorcov_salience.R Fri Mar 02 08:26:36 2018 -0500 +++ b/w4mcorcov_salience.R Wed Sep 05 19:24:47 2018 -0400 @@ -61,8 +61,8 @@ ) rcvOfFeatureBySampleClassLevel[is.nan(rcvOfFeatureBySampleClassLevel)] <- max(9999,max(rcvOfFeatureBySampleClassLevel, na.rm = TRUE)) - # "For each feature, 'select max(median_feature_intensity) from feature'." - maxApplyMedianOfFeatureBySampleClassLevel <- sapply( + # "For each feature, 'select max(max_feature_intensity) from feature'." + maxApplyMaxOfFeatureBySampleClassLevel <- sapply( X = 1:n_features , FUN = function(i) { match( @@ -84,19 +84,19 @@ # the feature name feature = features # the name (or factor-level) of the class-level with the highest median intensity for the feature - , max_level = medianOfFeatureBySampleClassLevel[maxApplyMedianOfFeatureBySampleClassLevel,1] + , max_level = medianOfFeatureBySampleClassLevel[maxApplyMaxOfFeatureBySampleClassLevel,1] # the median intensity for the feature and the level max_level , max_median = sapply( X = 1:n_features , FUN = function(i) { - maxOfFeatureBySampleClassLevel[maxApplyMedianOfFeatureBySampleClassLevel[i], 1 + i] + maxOfFeatureBySampleClassLevel[maxApplyMaxOfFeatureBySampleClassLevel[i], 1 + i] } ) # the coefficient of variation (expressed as a proportion) for the intensity for the feature and the level max_level , max_rcv = sapply( X = 1:n_features , FUN = function(i) { - rcvOfFeatureBySampleClassLevel[maxApplyMedianOfFeatureBySampleClassLevel[i], i] + rcvOfFeatureBySampleClassLevel[maxApplyMaxOfFeatureBySampleClassLevel[i], i] } ) # the mean of the medians of intensity for all class-levels for the feature diff -r 1d046f648b47 -r 0b49916c5c52 w4mcorcov_util.R --- a/w4mcorcov_util.R Fri Mar 02 08:26:36 2018 -0500 +++ b/w4mcorcov_util.R Wed Sep 05 19:24:47 2018 -0400 @@ -21,68 +21,54 @@ return (retval) } -# turn off all plotting devices -dev.off.all <- function() { - while (!is.null(dev.list())) { dev.off() } +errorSink <- function(which_function, ...) { + var_args <- "..." + tryCatch( + var_args <<- (deparse(..., width.cutoff = 60)) + , error = function(e) {print(e$message)} + ) + if (var_args == "...") + return + # format error for logging + format_error <- function(e) { + sprintf( + "Error\n{ message: %s\n, arguments: %s\n}\n" + , e$message + , Reduce(f = paste, x = var_args) + ) + } + format_warning <- function(e) { + sprintf( + "Warning\n{ message: %s\n, arguments: %s\n}\n" + , e$message + , Reduce(f = paste, x = var_args) + ) + } + sink_number <- sink.number() + sink(stderr()) + tryCatch( + var_args <- (deparse(..., width.cutoff = 60)) + , expr = { + retval <- which_function(...) + } + , error = function(e) cat(format_error(e), file = stderr()) + , warning = function(w) cat(format_warning(w), file = stderr()) + ) + while (sink.number() > sink_number) { + sink() + } } - -# capture plot and write to PDF; then close any devices opened in the process -plot2pdf <- function( - file.name -, plot.function -, width = 12 -, height = 12 -) { - # capture plot and write to PDF - cur.dev <- dev.list() - filename <- file.name - pdf(file = filename, width = width, height = height) - plot.function() - # close any devices opened in the process - dev.off() - if (is.null(cur.dev)) { - dev.off.all() - } else { - while ( length(dev.list()) > length(cur.dev) ) { dev.off() } - } +errorPrint <- function(...) { + errorSink(which_function = print, ...) +} +errorCat <- function(...) { + errorSink(which_function = cat, ..., "\n") } -# print and capture plot and write to PDF; then close any devices opened in the process -# This is needed for ggplot which does not print the plot when invoked within a function. -print2pdf <- function( - file.name -, plot.function -, width = 12 -, height = 12 -) { - plot2pdf( - file.name = file.name - , width = width - , height = height - , plot.function = function() { - print(plot.function()) - } - ) -} -iso8601.znow <- function() -{ - strftime(as.POSIXlt(Sys.time(), "UTC"), "%Y-%m-%dT%H:%M:%SZ") -} - -# pdf.name <- function(name) -# { -# paste0(name, "_", iso8601.filename.fragment(), ".pdf") -# } -# -# tsv.name <- function(name) -# { -# paste0(name, "_", iso8601.filename.fragment(), ".tsv") -# } -# -# # pseudo-inverse - computational inverse non-square matrix a +# # pseudo-inverse - computational inverse of non-square matrix a # p.i <- function(a) { # solve(t(a) %*% a) %*% t(a) # } - +# vim: sw=2 ts=2 et ai : diff -r 1d046f648b47 -r 0b49916c5c52 w4mcorcov_wrapper.R --- a/w4mcorcov_wrapper.R Fri Mar 02 08:26:36 2018 -0500 +++ b/w4mcorcov_wrapper.R Wed Sep 05 19:24:47 2018 -0400 @@ -22,18 +22,45 @@ ## subroutines ##---------- -source("w4mcorcov_lib.R") -source("w4mcorcov_util.R") -source("w4mcorcov_input.R") -source("w4mcorcov_salience.R") -source("w4mcorcov_calc.R") -source("w4mcorcov_output.R") +# from: https://github.com/molgenis/molgenis-pipelines/wiki/How-to-source-another_file.R-from-within-your-R-script +LocationOfThisScript = function() # Function LocationOfThisScript returns the location of this .R script (may be needed to source other files in same dir) +{ + this.file = NULL + # This file may be 'sourced' + for (i in -(1:sys.nframe())) { + if (identical(sys.function(i), base::source)) this.file = (normalizePath(sys.frame(i)$ofile)) + } + + if (!is.null(this.file)) return(dirname(this.file)) + + # But it may also be called from the command line + cmd.args = commandArgs(trailingOnly = FALSE) + cmd.args.trailing = commandArgs(trailingOnly = TRUE) + cmd.args = cmd.args[seq.int(from=1, length.out=length(cmd.args) - length(cmd.args.trailing))] + res = gsub("^(?:--file=(.*)|.*)$", "\\1", cmd.args) + + # If multiple --file arguments are given, R uses the last one + res = tail(res[res != ""], 1) + if (0 < length(res)) return(dirname(res)) + + # Both are not the case. Maybe we are in an R GUI? + return(NULL) +} + +script.dir <- LocationOfThisScript() + +source(paste(script.dir, "w4mcorcov_lib.R", sep="/")) +source(paste(script.dir, "w4mcorcov_util.R", sep="/")) +source(paste(script.dir, "w4mcorcov_input.R", sep="/")) +source(paste(script.dir, "w4mcorcov_salience.R", sep="/")) +source(paste(script.dir, "w4mcorcov_calc.R", sep="/")) +source(paste(script.dir, "w4mcorcov_output.R", sep="/")) ## log file ##--------- my_log <- function(x, ...) { cat(paste(iso8601.znow(), " ", x, ..., nl, sep=""))} -my_fatal <- function(x, ...) { +my_fatal <- function(x, ...) { my_log("ERROR: ", x, ...) quit(save = "no", status = 11, runLast = TRUE) } @@ -45,7 +72,12 @@ # MAIN # ######## +errorPrint(sessionInfo()) + argVc <- unlist(parseCommandArgs(evaluate=FALSE)) +errorCat("\n\n---\n\nArguments that were passed to R are as follows:\n") +errorPrint(argVc) + my_env <- new.env() ##------------------------------ @@ -63,7 +95,6 @@ my_env$contrast_detail <- as.character(argVc["contrast_detail"]) my_env$contrast_corcov <- as.character(argVc["contrast_corcov"]) my_env$contrast_salience <- as.character(argVc["contrast_salience"]) -# print(sprintf("contrast_salience: %s", my_env$contrast_salience)) # other parameters @@ -73,7 +104,9 @@ my_env$levCSV <- as.character(argVc["levCSV"]) my_env$matchingC <- as.character(argVc["matchingC"]) my_env$labelFeatures <- as.character(argVc["labelFeatures"]) # number of features to label at each extreme of the loadings or 'ALL' -my_env$labelOrthoFeatures <- as.logical(argVc["labelOrthoFeatures"]) +my_env$cplot_o <- as.logical(argVc["cplot_o"]) # TRUE if orthogonal C-plot is requested +my_env$cplot_p <- as.logical(argVc["cplot_p"]) # TRUE if parallel C-plot is requested +my_env$cplot_y <- as.character(argVc["cplot_y"]) # Choice of covariance/correlation for Y-axis on C-plot label_features <- my_env$labelFeatures labelfeatures_check <- TRUE @@ -93,22 +126,6 @@ quit(save = "no", status = 10, runLast = TRUE) } -tsv_action_factory <- function(file, colnames, append) { - return ( - function(tsv) { - write.table( - x = tsv - , file = file - , sep = "\t" - , quote = FALSE - , row.names = FALSE - , col.names = colnames - , append = append - ) - } - ) -} - corcov_tsv_colnames <- TRUE corcov_tsv_append <- FALSE corcov_tsv_action <- function(tsv) { @@ -146,24 +163,70 @@ # compute and plot the correlation_vs_covariance details plot # The parameter settings here are generally taken from bioconductor ropls::plot.opls source. - marVn <- c(4.6, 4.1, 2.6, 1.6) - old_par <- par( - font = 2 # bold font face - , font.axis = 2 # bold font face for axis - , font.lab = 2 # bold font face for x and y labels - , lwd = 2 # line-width - interpretation is device spcific - , mar = marVn # margins - , pch = 18 # black diamond plot-character, see help for graphics::points - # , mfrow = c(2,2) # two rows by two columns - , pty = "s" # force plots to be square - ) + if ( my_env$cplot_p || my_env$cplot_o ) { + old_par <- par( + font = 2 # bold font face + , font.axis = 2 # bold font face for axis + , font.lab = 2 # bold font face for x and y labels + , lwd = 2 # line-width - interpretation is device spcific + , pch = 18 # black diamond plot-character, see help for graphics::points + , pty = "m" # do not force plots to be square + , no.readonly = TRUE # only save writable parameters + ) + pdf_height <- 12 + pdf_width <- 8 + my_layout <- function() { + # lay out 2 columns by 3 rows with extra width at the margin of individual plots + layout( + matrix( + # blank row plot 1 & 2 blank row plot 3 & 4 blank row plot 5 & 6 blank row + c(0,0,0,0,0, 0,1,0,2,0, 0,0,0,0,0, 0,3,0,4,0, 0,0,0,0,0, 0,5,0,6,0, 0,0,0,0,0) + , nrow = 7 + , ncol = 5 + , byrow = TRUE + ) + # slim columns 1, 3, and 5 + , widths = c(0.1, 0.9, 0.1, 0.9, 0.1) + # slim rows 1, 3, 5, and 7 + , heights = c(0.1, 0.9, 0.1, 0.9, 0.1, 0.9, 0.1) + ) + } + } else { + old_par <- par( + font = 2 # bold font face + , font.axis = 2 # bold font face for axis + , font.lab = 2 # bold font face for x and y labels + , lwd = 2 # line-width - interpretation is device spcific + , pch = 18 # black diamond plot-character, see help for graphics::points + , pty = "m" # do not force plots to be square + , no.readonly = TRUE # only save writable parameters + ) + pdf_height <- 8 + pdf_width <- 8 + my_layout <- function() { + # lay out 2 columns by 2 rows with extra width at the margin of individual plots + layout( + matrix( + # blank row plot 1 & 2 blank row plot 3 & 4 blank row + c(0,0,0,0,0, 0,1,0,2,0, 0,0,0,0,0, 0,3,0,4,0, 0,0,0,0,0) + , nrow = 5 + , ncol = 5 + , byrow = TRUE + ) + # slim columns 1, 3, and 5 + , widths = c(0.1, 0.9, 0.1, 0.9, 0.1) + # slim rows 1, 3, and 5 + , heights = c(0.1, 0.9, 0.1, 0.9, 0.1) + ) + } + } plot2pdf( file.name = my_env$contrast_detail - , width = 8 - , height = 8 + , width = pdf_width + , height = pdf_height , plot.function = function() { - # plot layout four plots per page - layout(matrix(1:4, byrow = TRUE, nrow = 2)) + # plot layout four or six plots per page + my_layout() my_result <<- corcov_calc( calc_env = my_env , failure_action = my_fatal @@ -174,7 +237,7 @@ } ) par(old_par) - + my_log( "-------------------------- Finished data processing --------------------------") }