w4mcorcov: w4mcorcov_calc.R comparison

comparison w4mcorcov_calc.R @ 6:0b49916c5c52 draft

planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit 4428e3252d54c8a8e0e5d85e8eaaeb13e9b21de7

author	eschen42
date	Wed, 05 Sep 2018 19:24:47 -0400
parents	1d046f648b47
children	ca9938f2eb6a

comparison

equal deleted inserted replaced

-:1d046f648b47
+:0b49916c5c52
 }
 #### OPLS-DA
 algoC <- "nipals"
-do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_show_loado_labels, x_progress = print, x_env, x_crossval_i) {
+do_detail_plot <- function(
+x_dataMatrix
+, x_predictor
+, x_is_match
+, x_algorithm
+, x_prefix
+, x_show_labels
+, x_progress = print
+, x_env
+, x_crossval_i
+) {
 off <- function(x) if (x_show_labels == "0") 0 else x
-if ( x_is_match && ncol(x_dataMatrix) > 0 && length(unique(x_predictor))> 1 && x_crossval_i < nrow(x_dataMatrix) ) {
+if ( x_is_match
+&& ncol(x_dataMatrix) > 0
+&& length(unique(x_predictor))> 1
+&& x_crossval_i < nrow(x_dataMatrix)
+) {
 my_oplsda <- opls(
 x      = x_dataMatrix
 , y      = x_predictor
 , algoC  = x_algorithm
 , predI  = 1
 , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0
 , printL = FALSE
 , plotL  = FALSE
 , crossvalI = x_crossval_i
+, scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2.
 )
+# strip out variables having negligible variance
+x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE]
 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y))
+# x_progress(strF(x_dataMatrix))
+# x_progress(strF(my_oplsda))
+#x_progress(head(my_oplsda_suppLs_y_levels))
+#x_progress(unique(my_oplsda_suppLs_y_levels))
 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1]
 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2]
-my_cor_vs_cov <- cor_vs_cov(
+do_s_plot <- function(
-matrix_x = x_dataMatrix
+x_env
-, ropls_x  = my_oplsda
+, predictor_projection_x   = TRUE
+, cplot_x      = FALSE
+, cor_vs_cov_x = NULL
 )
-with(
+{
-my_cor_vs_cov
+#print(ls(x_env))               # "cplot_y" etc
-, {
+#print(str(x_env$cplot_y))      # chr "covariance"
-min_x <- min(covariance)
+if (cplot_x) {
-max_x <- max(covariance)
+#print(x_env$cplot_y)         # "covariance"
-lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs))
+cplot_y_correlation <- (x_env$cplot_y == "correlation")
-covariance <- covariance / lim_x
+#print(cplot_y_correlation)   # FALSE
-lim_x <- 1.2
+}
-main_label <- sprintf("%s for level %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2)
+if (is.null(cor_vs_cov_x)) {
-main_cex <- min(1.0, 46.0/nchar(main_label))
+my_cor_vs_cov <- cor_vs_cov(
-# "It is generally accepted that a variable should be selected if vj>1, [27–29],
+matrix_x   = x_dataMatrix
-#   but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]."
+, ropls_x    = my_oplsda
-#   (Mehmood 2012 doi:10.1186/1748-7188-6-27)
+, predictor_projection_x = predictor_projection_x
-vipco <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83)))
+, x_progress
-alpha <- 0.1 + 0.4 * vipco
+)
-red  <- as.numeric(correlation > 0) * vipco
+} else {
-blue <- as.numeric(correlation < 0) * vipco
+my_cor_vs_cov <- cor_vs_cov_x
-plus_cor <- correlation
+}
-plus_cov <- covariance
+# print("str(my_cor_vs_cov)")
-cex <- 0.75
+# str(my_cor_vs_cov)
-plot(
+if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) {
-y = plus_cor
+if (is.null(cor_vs_cov_x)) {
-, x = plus_cov
+x_progress("No cor_vs_cov data produced")
-, type="p"
+}
-, xlim=c( -lim_x - off(0.2), lim_x + off(0.2) )
+plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
-, ylim=c( -1.0   - off(0.2), 1.0   + off(0.2) )
+text(x=1, y=1, labels="too few covariance data")
-, xlab = sprintf("relative covariance(feature,t1)")
+return(my_cor_vs_cov)
-, ylab = sprintf("correlation(feature,t1)")
+}
-, main = main_label
+with(
-, cex.main = main_cex
+my_cor_vs_cov
-, cex = cex
+, {
-, pch = 16
+min_x <- min(covariance, na.rm = TRUE)
-, col = rgb(blue = blue, red = red, green = 0, alpha = alpha)
+max_x <- max(covariance, na.rm = TRUE)
-)
+lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs))
-low_x <- -0.7 * lim_x
+covariance <- covariance / lim_x
-high_x <- 0.7 * lim_x
+lim_x <- 1.2
-text(x = low_x, y = -0.05, labels =  fctr_lvl_1, col = "blue")
+# "It is generally accepted that a variable should be selected if vj>1, [27–29],
-text(x = high_x, y = 0.05, labels =  fctr_lvl_2, col = "red")
+#   but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]."
-if ( x_show_labels != "0" ) {
+#   (Mehmood 2012 doi:10.1186/1748-7188-6-27)
-my_loadp <- loadp
+plus_cor <- correlation
-my_loado <- loado
+plus_cov <- covariance
-names(my_loadp) <- tsv1$featureID
+cex <- 0.65
-names(my_loado) <- tsv1$featureID
+which_projection <- if (projection == 1) "t1" else "o1"
-if ( x_show_labels == "ALL" ) {
+which_loading <- if (projection == 1) "parallel" else "orthogonal"
-n_labels <- length(loadp)
+if (projection == 1) { # predictor-projection
-} else {
+vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83)))
-n_labels <- as.numeric(x_show_labels)
+if (!cplot_x) { # S-plot predictor-projection
+my_xlab <- "relative covariance(feature,t1)"
+my_x <- plus_cov
+my_ylab <- "correlation(feature,t1)"
+my_y <- plus_cor
+} else { # C-plot predictor-projection
+my_xlab <- "variable importance in predictor-projection"
+my_x <- vip4p
+if (cplot_y_correlation) {
+my_ylab <- "correlation(feature,t1)"
+my_y <- plus_cor
+} else {
+my_ylab <- "relative covariance(feature,t1)"
+my_y <- plus_cov
+}
+}
+if (cplot_x) {
+lim_x <- max(my_x, na.rm = TRUE) * 1.1
+my_xlim <- c( 0, lim_x + off(0.2) )
+} else {
+my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
+}
+my_ylim <- c( -1.0   - off(0.2), 1.0   + off(0.2) )
+my_load_distal <- loadp
+my_load_proximal <- loado
+red  <- as.numeric(correlation > 0) * vipcp
+blue <- as.numeric(correlation < 0) * vipcp
+alpha <- 0.1 + 0.4 * vipcp
+red[is.na(red)] <- 0
+blue[is.na(blue)] <- 0
+alpha[is.na(alpha)] <- 0
+my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha)
+main_label <- sprintf("%s for level %s versus %s"
+, x_prefix, fctr_lvl_1, fctr_lvl_2)
+} else { # orthogonal projection
+vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83)))
+if (!cplot_x) {
+my_xlab <- "relative covariance(feature,to1)"
+my_x <- -plus_cov
+} else {
+my_xlab <- "variable importance in orthogonal projection"
+my_x <- vip4o
+}
+if (!cplot_x) { # S-plot orthogonal projection
+my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
+my_ylab <- "correlation(feature,to1)"
+my_y <- plus_cor
+} else { # C-plot orthogonal projection
+lim_x <- max(my_x, na.rm = TRUE) * 1.1
+my_xlim <- c( 0, lim_x + off(0.2) )
+if (cplot_y_correlation) {
+my_ylab <- "correlation(feature,to1)"
+my_y <- plus_cor
+} else {
+my_ylab <- "relative covariance(feature,to1)"
+my_y <- plus_cov
+}
+}
+my_ylim <- c( -1.0   - off(0.2), 1.0   + off(0.2) )
+my_load_distal <- loado
+my_load_proximal <- loadp
+alpha <- 0.1 + 0.4 * vipco
+alpha[is.na(alpha)] <- 0
+my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha)
+main_label <- sprintf(
+"Features influencing orthogonal projection for %s versus %s"
+, fctr_lvl_1, fctr_lvl_2)
 }
-n_labels <- min( n_labels, (1 + length(loadp)) / 2 )
+main_cex <- min(1.0, 46.0/nchar(main_label))
-labels_to_show <- c(
+my_feature_label_slant <- -30 # slant feature labels 30 degrees downward
-names(head(sort(my_loadp),n = n_labels))
+plot(
-, names(tail(sort(my_loadp),n = n_labels))
+y = my_y
+, x = my_x
+, type = "p"
+, xlim = my_xlim
+, ylim = my_ylim
+, xlab = my_xlab
+, ylab = my_ylab
+, main = main_label
+, cex.main = main_cex
+, cex = cex
+, pch = 16
+, col = my_col
 )
-if ( x_show_loado_labels ) {
+low_x <- -0.7 * lim_x
+high_x <- 0.7 * lim_x
+if (projection == 1 && !cplot_x) {
+text(x = low_x, y = -0.05, labels =  fctr_lvl_1, col = "blue")
+text(x = high_x, y = 0.05, labels =  fctr_lvl_2, col = "red")
+}
+if ( x_show_labels != "0" ) {
+names(my_load_distal) <- tsv1$featureID
+names(my_load_proximal) <- tsv1$featureID
+if ( x_show_labels == "ALL" ) {
+n_labels <- length(my_load_distal)
+} else {
+n_labels <- as.numeric(x_show_labels)
+}
+n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 )
 labels_to_show <- c(
-labels_to_show
+names(head(sort(my_load_distal),n = n_labels))
-, names(head(sort(my_loado),n = n_labels))
+, names(tail(sort(my_load_distal),n = n_labels))
-, names(tail(sort(my_loado),n = n_labels))
 )
+labels <- unname(
+sapply(
+X = tsv1$featureID
+, FUN = function(x) if( x %in% labels_to_show ) x else ""
+)
+)
+x_text_offset <- 0.024
+y_text_off <- 0.017
+if (!cplot_x) { # S-plot
+y_text_offset <- if (projection == 1) -y_text_off else y_text_off
+} else { # C-plot
+y_text_offset <-
+sapply(
+X = (my_y > 0)
+, FUN = function(x) { if (x) y_text_off else -y_text_off }
+)
+}
+label_features <- function(x_arg, y_arg, labels_arg, slant_arg) {
+# print("str(x_arg)")
+# print(str(x_arg))
+# print("str(y_arg)")
+# print(str(y_arg))
+# print("str(labels_arg)")
+# print(str(labels_arg))
+if (length(labels_arg) > 0) {
+unique_slant <- unique(slant_arg)
+if (length(unique_slant) == 1) {
+text(
+y = y_arg
+, x = x_arg + x_text_offset
+, cex = 0.4
+, labels = labels_arg
+, col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
+, srt = slant_arg
+, adj = 0   # left-justified
+)
+} else {
+for (slant in unique_slant) {
+text(
+y = y_arg[slant_arg == slant]
+, x = x_arg[slant_arg == slant] + x_text_offset
+, cex = 0.4
+, labels = labels_arg[slant_arg == slant]
+, col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
+, srt = slant
+, adj = 0   # left-justified
+)
+}
+}
+}
+}
+if (!cplot_x) {
+my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant
+} else {
+my_slant <- sapply(
+X = (my_y > 0)
+, FUN = function(x) if (x) 2 else -2
+) * my_feature_label_slant
+}
+if (length(my_x) > 1) {
+label_features(
+x_arg      = my_x  [my_x > 0]
+, y_arg      = my_y  [my_x > 0] - y_text_offset
+, labels_arg = labels[my_x > 0]
+, slant_arg = (if (!cplot_x) -my_slant else (my_slant))
+)
+if (!cplot_x) {
+label_features(
+x_arg      = my_x  [my_x < 0]
+, y_arg      = my_y  [my_x < 0] + y_text_offset
+, labels_arg = labels[my_x < 0]
+, slant_arg = my_slant
+)
+}
+} else {
+if (!cplot_x) {
+my_slant <- (if (my_x > 1) -1 else 1) * my_slant
+my_y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset
+} else {
+my_slant <- (if (my_y > 1) -1 else 1) * my_slant
+my_y_arg = my_y + (if (my_y > 1) -1 else 1) * y_text_offset
+}
+label_features(
+x_arg = my_x
+, y_arg = my_y_arg
+, labels_arg = labels
+, slant_arg = my_slant
+)
+}
 }
-labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" ))
-text(
-y = plus_cor - 0.013
-, x = plus_cov + 0.020
-, cex = 0.4
-, labels = labels
-, col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # rgb(blue = blue, red = red, green = 0, alpha = 0.2 + 0.8 * alpha)
-, srt = -30 # slant 30 degrees downward
-, adj = 0   # left-justified
-)
 }
-}
+)
+return (my_cor_vs_cov)
+}
+my_cor_vs_cov <- do_s_plot(
+x_env = x_env
+, predictor_projection_x = TRUE
+, cplot_x = FALSE
 )
 typeVc <- c("correlation",      # 1
 "outlier",          # 2
 "overview",         # 3
 "permutation",      # 4
 if ( length(my_oplsda@orthoVipVn) > 0 ) {
 my_typevc <- typeVc[c(9,3,8)]
 } else {
 my_typevc <- c("(dummy)","overview","(dummy)")
 }
+my_ortho_cor_vs_cov_exists <- FALSE
 for (my_type in my_typevc) {
 if (my_type %in% typeVc) {
-# print(sprintf("plotting type %s", my_type))
 tryCatch({
-plot(
+if ( my_type != "x-loading" ) {
-x            = my_oplsda
+plot(
-, typeVc       = my_type
+x            = my_oplsda
-, parCexN      = 0.4
+, typeVc       = my_type
-, parDevNewL   = FALSE
+, parCexN      = 0.4
-, parLayL      = TRUE
+, parDevNewL   = FALSE
-, parEllipsesL = TRUE
+, parLayL      = TRUE
-)
+, parEllipsesL = TRUE
+)
+if (my_type == "overview") {
+sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2)
+title(sub = sub_label)
+}
+} else {
+my_ortho_cor_vs_cov <- do_s_plot(
+x_env = x_env
+, predictor_projection_x = FALSE
+, cplot_x = FALSE
+)
+my_ortho_cor_vs_cov_exists <- TRUE
+}
 }, error = function(e) {
-x_progress(sprintf("factor level %s or %s may have only one sample", fctr_lvl_1, fctr_lvl_2))
+x_progress(
+sprintf(
+"factor level %s or %s may have only one sample - %s"
+, fctr_lvl_1
+, fctr_lvl_2
+, e$message
+)
+)
 })
 } else {
-# print("plotting dummy graph")
 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
 text(x=1, y=1, labels="no orthogonal projection is possible")
+}
+}
+cplot_p <- x_env$cplot_p
+cplot_o <- x_env$cplot_o
+if (cplot_p || cplot_o) {
+if (cplot_p) {
+do_s_plot(
+x_env = x_env
+, predictor_projection_x = TRUE
+, cplot_x = TRUE
+, cor_vs_cov_x = my_cor_vs_cov
+)
+did_plots <- 1
+} else {
+did_plots <- 0
+}
+if (cplot_o) {
+if (my_ortho_cor_vs_cov_exists) {
+do_s_plot(
+x_env = x_env
+, predictor_projection_x = FALSE
+, cplot_x = TRUE
+, cor_vs_cov_x = my_ortho_cor_vs_cov
+)
+} else {
+plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
+text(x=1, y=1, labels="no orthogonal projection is possible")
+}
+did_plots <- 1 + did_plots
+}
+if (did_plots == 1) {
+plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n", fg = "white")
 }
 }
 return (my_cor_vs_cov)
 } else {
-# x_progress(sprintf("x_is_match = %s, ncol(x_dataMatrix) = %d, length(unique(x_predictor)) = %d",x_is_match, ncol(x_dataMatrix), length(unique(x_predictor))))
 return (NULL)
 }
 }
 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510
-corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = function(t){}, salience_tsv_action = function(t){}) {
+corcov_calc <- function(
+calc_env
+, failure_action      = stop
+, progress_action     = function(x) { }
+, corcov_tsv_action   = function(t) { }
+, salience_tsv_action = function(t) { }
+, extra_plots         = c()
+) {
 if ( ! is.environment(calc_env) ) {
 failure_action("corcov_calc: fatal error - 'calc_env' is not an environment")
 return ( FALSE )
 }
 if ( ! is.function(corcov_tsv_action) ) {
 # extract the levels from the environment
 originalLevCSV <- levCSV <- calc_env$levCSV
 # matchingC is one of { "none", "wildcard", "regex" }
 matchingC <- calc_env$matchingC
 labelFeatures <- calc_env$labelFeatures
-labelOrthoFeatures <- calc_env$labelOrthoFeatures
 # arg/env checking
 if (!(facC %in% names(smpl_metadata))) {
-failure_action(sprintf("bad parameter!  Factor name '%s' not found in sampleMetadata", facC))
+failure_action(
+sprintf("bad parameter!  Factor name '%s' not found in sampleMetadata"
+, facC))
 return ( FALSE )
 }
 mz             <- vrbl_metadata$mz
 names(mz)      <- vrbl_metadata$variableMetadata
 mz_lookup      <- function(feature) unname(mz[feature])
 rt             <- vrbl_metadata$rt
 names(rt)      <- vrbl_metadata$variableMetadata
 rt_lookup      <- function(feature) unname(rt[feature])
 # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv)
 salience_df <- calc_env$salience_df <- w4msalience(
 data_matrix    = data_matrix
 , sample_class   = smpl_metadata[,facC]
 , failure_action = failure_action
 )
 )
 }
 # transpose matrix because ropls matrix is the transpose of XCMS matrix
+tdm <- t(data_matrix)
 # Wiklund_2008 centers and pareto-scales data before OPLS-DA S-plot
-# center
+# However, data should be neither centered nor pareto scaled here because ropls::opls does that; this fixes issue #2.
-cdm <- center_colmeans(t(data_matrix))
-# pareto-scale
-my_scale <- sqrt(apply(cdm, 2, sd, na.rm=TRUE))
-scdm <- sweep(cdm, 2, my_scale, "/")
 # pattern to match column names like k10_kruskal_k4.k3_sig
 col_pattern <- sprintf('^%s_%s_(.*)[.](.*)_sig$', facC, tesC)
 # column name like k10_kruskal_sig
 intersample_sig_col  <- sprintf('%s_%s_sig', facC, tesC)
 did_plot <- FALSE
 if (tesC != "none") {
 # for each column name, extract the parts of the name matched by 'col_pattern', if any
 the_colnames <- colnames(vrbl_metadata)
 if ( ! Reduce( f = "||", x = grepl(tesC, the_colnames) ) ) {
-failure_action(sprintf("bad parameter!  variableMetadata must contain results of W4M Univariate test '%s'.", tesC))
+failure_action(
+sprintf(
+"bad parameter!  variableMetadata must contain results of W4M Univariate test '%s'."
+, tesC))
 return ( FALSE )
 }
 col_matches <- lapply(
 X = the_colnames,
 FUN = function(x) {
 level_union <- c(level_union, col_match[2], col_match[3])
 }
 }
 }
 level_union <- unique(sort(level_union))
-overall_significant <- 1 == ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE )
+overall_significant <- 1 == (
+if (intersample_sig_col %in% colnames(vrbl_metadata)) {
+vrbl_metadata[,intersample_sig_col]
+} else {
+1
+}
+)
 if ( length(level_union) > 2 ) {
 chosen_samples <- smpl_metadata_facC %in% level_union
 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
 col_selector <- vrbl_metadata_names[ overall_significant ]
-my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
+my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ]
 plot_action <- function(fctr_lvl_1, fctr_lvl_2) {
-progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
+progress_action(
-predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 )
+sprintf("calculating/plotting contrast of %s vs. %s"
+, fctr_lvl_1, fctr_lvl_2)
+)
+predictor <- sapply(
+X = chosen_facC
+, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2
+)
 my_cor_cov <- do_detail_plot(
 x_dataMatrix  = my_matrix
 , x_predictor   = predictor
-, x_is_match    = is_match
+, x_is_match    = TRUE
 , x_algorithm   = algoC
-, x_prefix      = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features"
+, x_prefix      = if (pairSigFeatOnly) {
+"Significantly contrasting features"
+} else {
+"Significant features"
+}
 , x_show_labels = labelFeatures
-, x_show_loado_labels = labelOrthoFeatures
 , x_progress    = progress_action
 , x_crossval_i  = min(7, length(chosen_samples))
 , x_env         = calc_env
 )
 if ( is.null(my_cor_cov) ) {
-progress_action("NOTHING TO PLOT.")
+progress_action("NOTHING TO PLOT")
 } else {
 my_tsv <- my_cor_cov$tsv1
 my_tsv$mz <- mz_lookup(my_tsv$featureID)
 my_tsv$rt <- rt_lookup(my_tsv$featureID)
-my_tsv["level1Level2Sig"] <- vrbl_metadata[ match(my_tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ]
+my_tsv["level1Level2Sig"] <- vrbl_metadata[
+match(my_tsv$featureID, vrbl_metadata_names)
+, vrbl_metadata_col
+]
 tsv <<- my_tsv
 corcov_tsv_action(tsv)
 did_plot <<- TRUE
 }
 }
 vrbl_metadata_col <- col_match[1]               # ^^^^^^^^^^^^^^^^^^^^^  # Column name
 fctr_lvl_1        <- col_match[2]               #             ^^         # Factor-level 1
 fctr_lvl_2        <- col_match[3]               #                ^^      # Factor-level 2
 # only process this column if both factors are members of lvlCSV
 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
-progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
+if (is_match) {
-# TODO delete next line displaying currently-processed column
+progress_action(
-# cat(sprintf("%s %s %s %s\n", vrbl_metadata_col, fctr_lvl_1, fctr_lvl_2, is_match))
+sprintf("calculating/plotting contrast of %s vs. %s."
-# choose only samples with one of the two factors for this column
+, fctr_lvl_1, fctr_lvl_2
-chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
+)
-predictor <- smpl_metadata_facC[chosen_samples]
+)
-# extract only the significantly-varying features and the chosen samples
+# choose only samples with one of the two factors for this column
-fully_significant   <- 1 == vrbl_metadata[,vrbl_metadata_col] * ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE )
+chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
-col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ]
+predictor <- smpl_metadata_facC[chosen_samples]
-my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
+# extract only the significantly-varying features and the chosen samples
-my_cor_cov <- do_detail_plot(
+fully_significant   <- 1 == vrbl_metadata[,vrbl_metadata_col] *
-x_dataMatrix  = my_matrix
+( if (intersample_sig_col %in% colnames(vrbl_metadata)) {
-, x_predictor   = predictor
+vrbl_metadata[,intersample_sig_col]
-, x_is_match    = is_match
+} else {
-, x_algorithm   = algoC
+1
-, x_prefix      = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features"
+}
-, x_show_labels = labelFeatures
+)
-, x_show_loado_labels = labelOrthoFeatures
+col_selector <- vrbl_metadata_names[
-, x_progress    = progress_action
+if ( pairSigFeatOnly ) fully_significant else overall_significant
-, x_crossval_i  = min(7, length(chosen_samples))
+]
-, x_env         = calc_env
+my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ]
-)
+my_cor_cov <- do_detail_plot(
-if ( is.null(my_cor_cov) ) {
+x_dataMatrix  = my_matrix
-progress_action("NOTHING TO PLOT.")
+, x_predictor   = predictor
+, x_is_match    = is_match
+, x_algorithm   = algoC
+, x_prefix      = if (pairSigFeatOnly) {
+"Significantly contrasting features"
+} else {
+"Significant features"
+}
+, x_show_labels = labelFeatures
+, x_progress    = progress_action
+, x_crossval_i  = min(7, length(chosen_samples))
+, x_env         = calc_env
+)
+if ( is.null(my_cor_cov) ) {
+progress_action("NOTHING TO PLOT.")
+} else {
+tsv <- my_cor_cov$tsv1
+tsv$mz <- mz_lookup(tsv$featureID)
+tsv$rt <- rt_lookup(tsv$featureID)
+tsv["level1Level2Sig"] <- vrbl_metadata[
+match(tsv$featureID, vrbl_metadata_names)
+, vrbl_metadata_col
+]
+corcov_tsv_action(tsv)
+did_plot <- TRUE
+}
 } else {
-tsv <- my_cor_cov$tsv1
+progress_action(
-tsv$mz <- mz_lookup(tsv$featureID)
+sprintf("skipping contrast of %s vs. %s."
-tsv$rt <- rt_lookup(tsv$featureID)
+, fctr_lvl_1, fctr_lvl_2
-tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ]
+)
-corcov_tsv_action(tsv)
+)
-did_plot <- TRUE
 }
 }
 }
 }
 } else { # tesC == "none"
+# find all the levels for factor facC in sampleMetadata
 level_union <- unique(sort(smpl_metadata_facC))
+# identify the selected levels for factor facC from sampleMetadata
+level_include <- sapply(X = level_union, FUN = isLevelSelected)
+# discard the non-selected levels for factor facC
+level_union <- level_union[level_include]
 if ( length(level_union) > 1 ) {
 if ( length(level_union) > 2 ) {
 ## pass 1 - contrast each selected level with all other levels combined into one "super-level" ##
 completed <- c()
 lapply(
 X = level_union
 , FUN = function(x) {
 fctr_lvl_1 <- x[1]
 fctr_lvl_2 <- {
 if ( fctr_lvl_1 %in% completed )
 return("DUMMY")
 # strF(completed)
 completed <<- c(completed, fctr_lvl_1)
 setdiff(level_union, fctr_lvl_1)
 }
 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
 fctr_lvl_2 <- "other"
-# print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) )
-progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
 if (length(unique(chosen_samples)) < 1) {
-progress_action("NOTHING TO PLOT...")
+progress_action(
+sprintf("Skipping contrast of %s vs. %s; there are no chosen samples."
+, fctr_lvl_1, fctr_lvl_2)
+)
 } else {
 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
-predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 )
+predictor <- sapply(
-my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
+X = chosen_facC
+, FUN = function(fac) {
+if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2
+}
+)
+my_matrix <- tdm[ chosen_samples, , drop = FALSE ]
 # only process this column if both factors are members of lvlCSV
 is_match <- isLevelSelected(fctr_lvl_1)
+if (is_match) {
+progress_action(
+sprintf("Calculating/plotting contrast of %s vs. %s"
+, fctr_lvl_1, fctr_lvl_2)
+)
+my_cor_cov <- do_detail_plot(
+x_dataMatrix  = my_matrix
+, x_predictor   = predictor
+, x_is_match    = is_match
+, x_algorithm   = algoC
+, x_prefix      = "Features"
+, x_show_labels = labelFeatures
+, x_progress    = progress_action
+, x_crossval_i  = min(7, length(chosen_samples))
+, x_env         = calc_env
+)
+if ( is.null(my_cor_cov) ) {
+progress_action("NOTHING TO PLOT...")
+} else {
+tsv <- my_cor_cov$tsv1
+tsv$mz <- mz_lookup(tsv$featureID)
+tsv$rt <- rt_lookup(tsv$featureID)
+corcov_tsv_action(tsv)
+did_plot <<- TRUE
+}
+} else {
+}
+}
+"dummy" # need to return a value; otherwise combn fails with an error
+}
+)
+}
+## pass 2 - contrast each selected level with each of the other levels individually ##
+completed <- c()
+utils::combn(
+x = level_union
+, m = 2
+, FUN = function(x) {
+fctr_lvl_1 <- x[1]
+fctr_lvl_2 <- x[2]
+chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
+if (length(unique(chosen_samples)) < 1) {
+progress_action(
+sprintf("Skipping contrast of %s vs. %s. - There are no chosen samples."
+, fctr_lvl_1, fctr_lvl_2
+)
+)
+} else {
+chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
+predictor <- chosen_facC
+my_matrix <- tdm[ chosen_samples, , drop = FALSE ]
+# only process this column if both factors are members of lvlCSV
+is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
+if (is_match) {
+progress_action(
+sprintf("Calculating/plotting contrast of %s vs. %s."
+, fctr_lvl_1, fctr_lvl_2)
+)
 my_cor_cov <- do_detail_plot(
 x_dataMatrix  = my_matrix
 , x_predictor   = predictor
 , x_is_match    = is_match
 , x_algorithm   = algoC
 , x_prefix      = "Features"
 , x_show_labels = labelFeatures
-, x_show_loado_labels = labelOrthoFeatures
 , x_progress    = progress_action
 , x_crossval_i  = min(7, length(chosen_samples))
 , x_env         = calc_env
 )
 if ( is.null(my_cor_cov) ) {
-progress_action("NOTHING TO PLOT")
+progress_action("NOTHING TO PLOT.....")
 } else {
 tsv <- my_cor_cov$tsv1
 tsv$mz <- mz_lookup(tsv$featureID)
 tsv$rt <- rt_lookup(tsv$featureID)
 corcov_tsv_action(tsv)
 did_plot <<- TRUE
 }
-}
+} else {
-#print("baz")
+progress_action(
-"dummy" # need to return a value; otherwise combn fails with an error
+sprintf("Skipping contrast of %s vs. %s."
+, fctr_lvl_1, fctr_lvl_2
+)
+)
+}
 }
-)
-}
-## pass 2 - contrast each selected level with each of the other levels individually ##
-completed <- c()
-utils::combn(
-x = level_union
-, m = 2
-, FUN = function(x) {
-fctr_lvl_1 <- x[1]
-fctr_lvl_2 <- x[2]
-chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
-# print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) )
-progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
-if (length(unique(chosen_samples)) < 1) {
-progress_action("NOTHING TO PLOT...")
-} else {
-chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
-predictor <- chosen_facC
-my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
-# only process this column if both factors are members of lvlCSV
-is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
-my_cor_cov <- do_detail_plot(
-x_dataMatrix  = my_matrix
-, x_predictor   = predictor
-, x_is_match    = is_match
-, x_algorithm   = algoC
-, x_prefix      = "Features"
-, x_show_labels = labelFeatures
-, x_show_loado_labels = labelOrthoFeatures
-, x_progress    = progress_action
-, x_crossval_i  = min(7, length(chosen_samples))
-, x_env         = calc_env
-)
-if ( is.null(my_cor_cov) ) {
-progress_action("NOTHING TO PLOT")
-} else {
-tsv <- my_cor_cov$tsv1
-tsv$mz <- mz_lookup(tsv$featureID)
-tsv$rt <- rt_lookup(tsv$featureID)
-corcov_tsv_action(tsv)
-did_plot <<- TRUE
-}
-}
-#print("baz")
 "dummy" # need to return a value; otherwise combn fails with an error
 }
 )
 } else {
-progress_action("NOTHING TO PLOT....")
+progress_action("NOTHING TO PLOT......")
 }
 }
 if (!did_plot) {
-failure_action(sprintf("bad parameter!  sampleMetadata must have at least two levels of factor '%s' matching '%s'", facC, originalLevCSV))
+failure_action(
+sprintf(
+"bad parameter!  sampleMetadata must have at least two levels of factor '%s' matching '%s'"
+, facC, originalLevCSV))
 return ( FALSE )
 }
 return ( TRUE )
 }
 # Calculate data for correlation-versus-covariance plot
 #   Adapted from:
 #     Wiklund_2008 doi:10.1021/ac0713510
 #     Galindo_Prieto_2014 doi:10.1002/cem.2627
 #     https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R
-cor_vs_cov <- function(matrix_x, ropls_x) {
+cor_vs_cov <- function(
+matrix_x
+, ropls_x
+, predictor_projection_x = TRUE
+, x_progress = print
+) {
+tryCatch({
+return(
+cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress)
+)
+}, error = function(e) {
+x_progress(
+sprintf(
+"cor_vs_cov fatal error - %s"
+, as.character(e) # e$message
+)
+)
+return ( NULL )
+})
+}
+cor_vs_cov_try <- function(
+matrix_x
+, ropls_x
+, predictor_projection_x = TRUE
+, x_progress = print
+) {
 x_class <- class(ropls_x)
 if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) )
-stop( "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class ", as.character(x_class) )
+stop(
+paste(
+"cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class "
+, as.character(x_class)
+)
+)
 }
 result <- list()
+result$projection <- projection <- if (predictor_projection_x) 1 else 2
 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS
 if ( ropls_x@suppLs$algoC == "nipals") {
 # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510
 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional))
 mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x]))
-score_matrix <- ropls_x@scoreMN
+if (predictor_projection_x)
+score_matrix <- ropls_x@scoreMN
+else
+score_matrix <- ropls_x@orthoScoreMN
 score_matrix_transposed <- t(score_matrix)
 score_matrix_magnitude <- mag(score_matrix)
-result$covariance <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude )
+result$covariance <-
-result$correlation <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi )
+score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude )
+result$correlation <-
+score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi )
 } else {
 # WARNING - untested code - I don't have test data to exercise this branch
 # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510
 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F
-score_matrix <- ropls_x@scoreMN
+if (predictor_projection_x)
+score_matrix <- ropls_x@scoreMN
+else
+score_matrix <- ropls_x@orthoScoreMN
 score_matrix_transposed <- t(score_matrix)
 cov_divisor <- nrow(matrix_x) - 1
 result$covariance <- sapply(
 X = 1:ncol(matrix_x)
 , FUN = function(x) score_matrix_transposed %*% matrix_x[,x] / cov_divisor
 , FUN = function(x) {
 ( score_matrix_transposed / score_sd ) %*% ( matrix_x[,x] / (xSdVn[x] * cov_divisor) )
 }
 )
 }
-result$correlation <- result$correlation[1,,drop = TRUE]
+result$correlation <- result$correlation[ 1, , drop = TRUE ]
-result$covariance  <- result$covariance[1,,drop = TRUE]
+result$covariance  <- result$covariance [ 1, , drop = TRUE ]
 # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014
 #    Length = number of features; labels = feature identifiers.  (The same is true for $correlation and $covariance.)
 result$vip4p     <- as.numeric(ropls_x@vipVn)
 result$vip4o     <- as.numeric(ropls_x@orthoVipVn)
 fctr_lvl_1       <- level_names[1]
 fctr_lvl_2       <- level_names[2]
 feature_count    <- length(ropls_x@vipVn)
 result$level1    <- rep.int(x = fctr_lvl_1, times = feature_count)
 result$level2    <- rep.int(x = fctr_lvl_2, times = feature_count)
-# print(sprintf("sd(covariance) = %f; sd(correlation) = %f", sd(result$covariance), sd(result$correlation)))
 superresult <- list()
 if (length(result$vip4o) == 0) result$vip4o <- NA
-greaterLevel <- sapply( X = result$correlation, FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 )
+greaterLevel <- sapply(
-superresult$tsv1 <- data.frame(
+X = result$correlation
-featureID           = names(ropls_x@vipVn)
+, FUN = function(my_corr)
+tryCatch({
+if ( is.nan( my_corr ) ) {
+print("my_corr is NaN")
+NA
+} else {
+if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2
+}
+}, error = function(e) {
+x_progress(
+sprintf(
+"cor_vs_cov -> sapply:  error - substituting NA - %s"
+, as.character(e)
+)
+)
+NA
+})
+)
+# begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
+featureID          <- names(ropls_x@vipVn)
+greaterLevel       <- greaterLevel[featureID]
+result$correlation <- result$correlation[featureID]
+result$covariance  <- result$covariance[featureID]
+# end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
+tsv1 <- data.frame(
+featureID           = featureID
 , factorLevel1        = result$level1
 , factorLevel2        = result$level2
 , greaterLevel        = greaterLevel
+, projection          = result$projection
 , correlation         = result$correlation
 , covariance          = result$covariance
 , vip4p               = result$vip4p
 , vip4o               = result$vip4o
 , loadp               = result$loadp
 , loado               = result$loado
 , row.names           = NULL
 )
-rownames(superresult$tsv1) <- superresult$tsv1$featureID
+tsv1 <- tsv1[!is.na(tsv1$correlation),]
+tsv1 <- tsv1[!is.na(tsv1$covariance),]
+superresult$tsv1 <- tsv1
+rownames(superresult$tsv1) <- tsv1$featureID
+superresult$projection <- result$projection
 superresult$covariance <- result$covariance
 superresult$correlation <- result$correlation
 superresult$vip4p <- result$vip4p
 superresult$vip4o <- result$vip4o
 superresult$loadp <- result$loadp
 superresult$loado <- result$loado
 superresult$details <- result
-# #print(superresult$tsv1)
 result$superresult <- superresult
 # Include thise in case future consumers of this routine want to use it in currently unanticipated ways
 result$oplsda    <- ropls_x
 result$predictor <- ropls_x@suppLs$y   # in case future consumers of this routine want to use it in currently unanticipated ways
 return (superresult)
 }
+# vim: sw=2 ts=2 et :

Mercurial > repos > eschen42 > w4mcorcov

comparison w4mcorcov_calc.R @ 6:0b49916c5c52 draft