w4mcorcov: w4mcorcov_calc.R comparison

comparison w4mcorcov_calc.R @ 7:ca9938f2eb6a draft default tip

"planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit 5fd9687d543a48a715b1180caf93abebebd58b0e"

author	eschen42
date	Tue, 17 Nov 2020 23:29:59 +0000
parents	0b49916c5c52
children

comparison

equal deleted inserted replaced

-:0b49916c5c52
+:ca9938f2eb6a
-# center with 'colMeans()' - ref: http://gastonsanchez.com/visually-enforced/how-to/2014/01/15/Center-data-in-R/
+# compute and output detail plots
-center_colmeans <- function(x) {
-xcenter = colMeans(x)
-x - rep(xcenter, rep.int(nrow(x), ncol(x)))
-}
-#### OPLS-DA
-algoC <- "nipals"
 do_detail_plot <- function(
 x_dataMatrix
 , x_predictor
 , x_is_match
 , x_algorithm
 , x_crossval_i
 ) {
 off <- function(x) if (x_show_labels == "0") 0 else x
 if ( x_is_match
 && ncol(x_dataMatrix) > 0
-&& length(unique(x_predictor))> 1
+&& length(unique(x_predictor)) > 1
 && x_crossval_i < nrow(x_dataMatrix)
 ) {
 my_oplsda <- opls(
 x      = x_dataMatrix
 , y      = x_predictor
 , algoC  = x_algorithm
 , predI  = 1
 , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0
-, printL = FALSE
+, fig.pdfC = 'none'
-, plotL  = FALSE
+, info.txtC  = 'none'
 , crossvalI = x_crossval_i
 , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2.
 )
 # strip out variables having negligible variance
-x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE]
+x_dataMatrix <- x_dataMatrix[ , names(my_oplsda@vipVn), drop = FALSE]
 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y))
-# x_progress(strF(x_dataMatrix))
-# x_progress(strF(my_oplsda))
-#x_progress(head(my_oplsda_suppLs_y_levels))
-#x_progress(unique(my_oplsda_suppLs_y_levels))
 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1]
 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2]
 do_s_plot <- function(
 x_env
 , predictor_projection_x   = TRUE
 , cplot_x      = FALSE
 , cor_vs_cov_x = NULL
-)
+) {
-{
-#print(ls(x_env))               # "cplot_y" etc
-#print(str(x_env$cplot_y))      # chr "covariance"
 if (cplot_x) {
-#print(x_env$cplot_y)         # "covariance"
 cplot_y_correlation <- (x_env$cplot_y == "correlation")
-#print(cplot_y_correlation)   # FALSE
+default_lim_x <- 10
+} else {
+default_lim_x <- 1.2
 }
 if (is.null(cor_vs_cov_x)) {
 my_cor_vs_cov <- cor_vs_cov(
 matrix_x   = x_dataMatrix
 , ropls_x    = my_oplsda
 , predictor_projection_x = predictor_projection_x
 , x_progress
+, x_env
 )
 } else {
 my_cor_vs_cov <- cor_vs_cov_x
 }
-# print("str(my_cor_vs_cov)")
-# str(my_cor_vs_cov)
 if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) {
 if (is.null(cor_vs_cov_x)) {
-x_progress("No cor_vs_cov data produced")
+x_progress(
+sprintf("No cor_vs_cov data produced for level %s versus %s", fctr_lvl_1, fctr_lvl_2)
+)
 }
 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
 text(x=1, y=1, labels="too few covariance data")
 return(my_cor_vs_cov)
 }
 my_cor_vs_cov
 , {
 min_x <- min(covariance, na.rm = TRUE)
 max_x <- max(covariance, na.rm = TRUE)
 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs))
-covariance <- covariance / lim_x
-lim_x <- 1.2
+# Regarding using VIP as a guide to selecting a biomarker:
-# "It is generally accepted that a variable should be selected if vj>1, [27–29],
+#   "It is generally accepted that a variable should be selected if vj>1, [27–29],
 #   but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]."
 #   (Mehmood 2012 doi:10.1186/1748-7188-6-27)
 plus_cor <- correlation
 plus_cov <- covariance
-cex <- 0.65
+if (length(plus_cor) != 0 && length(plus_cor) != 0) {
-which_projection <- if (projection == 1) "t1" else "o1"
+cex <- 0.65
-which_loading <- if (projection == 1) "parallel" else "orthogonal"
+if (projection == 1) {
-if (projection == 1) { # predictor-projection
+# predictor-projection
-vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83)))
+vipcp <- pmax(0, pmin(1, (vip4p - 0.83) / (1.21 - 0.83)))
-if (!cplot_x) { # S-plot predictor-projection
+if (!cplot_x) {
-my_xlab <- "relative covariance(feature,t1)"
+# S-plot predictor-projection
-my_x <- plus_cov
+my_xlab <- "covariance(feature,t1)"
-my_ylab <- "correlation(feature,t1)"
+my_x <- plus_cov
-my_y <- plus_cor
-} else { # C-plot predictor-projection
-my_xlab <- "variable importance in predictor-projection"
-my_x <- vip4p
-if (cplot_y_correlation) {
 my_ylab <- "correlation(feature,t1)"
 my_y <- plus_cor
+# X,Y limits for S-PLOT
+my_xlim <- c( -lim_x, lim_x ) * (1.0 + off(0.3))
+my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) )
 } else {
-my_ylab <- "relative covariance(feature,t1)"
+# C-plot predictor-projection
-my_y <- plus_cov
+my_xlab <- "variable importance in predictor-projection"
+my_x <- vip4p
+if (cplot_y_correlation) {
+my_ylab <- "correlation(feature,t1)"
+my_y <- plus_cor
+my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) )
+} else {
+my_ylab <- "covariance(feature,t1)"
+my_y <- plus_cov
+my_ylim <- max(abs(plus_cov))
+my_ylim <- c( -my_ylim, my_ylim ) * (1.0 + off(0.2) )
+}
+# X,Y limits for C-plot
+lim_x <- max(my_x, na.rm = TRUE) * 1.1
+lim_x <- min(lim_x, default_lim_x)
+my_xlim <- c( 0, lim_x ) # + off(0.2) )
 }
-}
+my_load_distal <- loadp
-if (cplot_x) {
+my_load_proximal <- loado
-lim_x <- max(my_x, na.rm = TRUE) * 1.1
+red  <- as.numeric(correlation > 0) * vipcp
-my_xlim <- c( 0, lim_x + off(0.2) )
+blue <- as.numeric(correlation < 0) * vipcp
+alpha <- 0.1 + 0.4 * vipcp
+red[is.na(red)] <- 0
+blue[is.na(blue)] <- 0
+alpha[is.na(alpha)] <- 0
+my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha)
+main_label <- sprintf("%s for level %s versus %s"
+, x_prefix, fctr_lvl_1, fctr_lvl_2)
 } else {
-my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
+# orthogonal projection
-}
+vipco <- pmax(0, pmin(1, (vip4o - 0.83) / (1.21 - 0.83)))
-my_ylim <- c( -1.0   - off(0.2), 1.0   + off(0.2) )
+if (!cplot_x) {
-my_load_distal <- loadp
+# S-PLOT orthogonal-projection
-my_load_proximal <- loado
+my_xlab <- "covariance(feature,to1)"
-red  <- as.numeric(correlation > 0) * vipcp
+my_x <- -plus_cov
-blue <- as.numeric(correlation < 0) * vipcp
+# X,Y limits for S-PLOT
-alpha <- 0.1 + 0.4 * vipcp
+my_xlim <- c( -lim_x, lim_x ) * (1.0 + off(0.3))
-red[is.na(red)] <- 0
-blue[is.na(blue)] <- 0
-alpha[is.na(alpha)] <- 0
-my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha)
-main_label <- sprintf("%s for level %s versus %s"
-, x_prefix, fctr_lvl_1, fctr_lvl_2)
-} else { # orthogonal projection
-vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83)))
-if (!cplot_x) {
-my_xlab <- "relative covariance(feature,to1)"
-my_x <- -plus_cov
-} else {
-my_xlab <- "variable importance in orthogonal projection"
-my_x <- vip4o
-}
-if (!cplot_x) { # S-plot orthogonal projection
-my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
-my_ylab <- "correlation(feature,to1)"
-my_y <- plus_cor
-} else { # C-plot orthogonal projection
-lim_x <- max(my_x, na.rm = TRUE) * 1.1
-my_xlim <- c( 0, lim_x + off(0.2) )
-if (cplot_y_correlation) {
 my_ylab <- "correlation(feature,to1)"
 my_y <- plus_cor
+my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) )
 } else {
-my_ylab <- "relative covariance(feature,to1)"
+# C-plot orthogonal-projection
-my_y <- plus_cov
+my_xlab <- "variable importance in orthogonal projection"
+my_x <- vip4o
+# C-plot orthogonal projection
+# X,Y limits for C-plot
+lim_x <- max(my_x, na.rm = TRUE) * 1.1
+my_xlim <- c( 0, lim_x ) # + off(0.2) )
+if (cplot_y_correlation) {
+my_ylab <- "correlation(feature,to1)"
+my_y <- plus_cor
+my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) )
+} else {
+my_ylab <- "covariance(feature,to1)"
+my_y <- plus_cov
+my_ylim <- max(abs(plus_cov))
+my_ylim <- c( -my_ylim, my_ylim ) * (1.0 + off(0.2) )
+}
 }
+my_load_distal <- loado
+my_load_proximal <- loadp
+alpha <- 0.1 + 0.4 * vipco
+alpha[is.na(alpha)] <- 0
+my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha)
+main_label <- sprintf(
+"Features influencing orthogonal projection for %s versus %s"
+, fctr_lvl_1, fctr_lvl_2)
 }
-my_ylim <- c( -1.0   - off(0.2), 1.0   + off(0.2) )
+main_cex <- min(1.0, 46.0/nchar(main_label))
-my_load_distal <- loado
+my_feature_label_slant <- -30 # slant feature labels 30 degrees downward
-my_load_proximal <- loadp
+my_pch <- sapply(X = cor_p_value, function(x) if (x < 0.01) 16 else if (x < 0.05) 17 else 18)
-alpha <- 0.1 + 0.4 * vipco
+if ( sum(is.infinite(my_xlim)) == 0 ) {
-alpha[is.na(alpha)] <- 0
+plot(
-my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha)
+y = my_y
-main_label <- sprintf(
+, x = my_x
-"Features influencing orthogonal projection for %s versus %s"
+, type = "p"
-, fctr_lvl_1, fctr_lvl_2)
+, xlim = my_xlim
-}
+, ylim = my_ylim
-main_cex <- min(1.0, 46.0/nchar(main_label))
+, xlab = my_xlab
-my_feature_label_slant <- -30 # slant feature labels 30 degrees downward
+, ylab = my_ylab
-plot(
+, main = main_label
-y = my_y
+, cex.main = main_cex
-, x = my_x
+, cex = cex
-, type = "p"
+, pch = my_pch
-, xlim = my_xlim
+, col = my_col
-, ylim = my_ylim
-, xlab = my_xlab
-, ylab = my_ylab
-, main = main_label
-, cex.main = main_cex
-, cex = cex
-, pch = 16
-, col = my_col
-)
-low_x <- -0.7 * lim_x
-high_x <- 0.7 * lim_x
-if (projection == 1 && !cplot_x) {
-text(x = low_x, y = -0.05, labels =  fctr_lvl_1, col = "blue")
-text(x = high_x, y = 0.05, labels =  fctr_lvl_2, col = "red")
-}
-if ( x_show_labels != "0" ) {
-names(my_load_distal) <- tsv1$featureID
-names(my_load_proximal) <- tsv1$featureID
-if ( x_show_labels == "ALL" ) {
-n_labels <- length(my_load_distal)
-} else {
-n_labels <- as.numeric(x_show_labels)
-}
-n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 )
-labels_to_show <- c(
-names(head(sort(my_load_distal),n = n_labels))
-, names(tail(sort(my_load_distal),n = n_labels))
-)
-labels <- unname(
-sapply(
-X = tsv1$featureID
-, FUN = function(x) if( x %in% labels_to_show ) x else ""
 )
-)
+low_x <- -0.7 * lim_x
-x_text_offset <- 0.024
+high_x <- 0.7 * lim_x
-y_text_off <- 0.017
+if (projection == 1 && !cplot_x) {
-if (!cplot_x) { # S-plot
+text(x = low_x, y = -0.05, labels =  fctr_lvl_1, col = "blue")
-y_text_offset <- if (projection == 1) -y_text_off else y_text_off
+text(x = high_x, y = 0.05, labels =  fctr_lvl_2, col = "red")
-} else { # C-plot
+}
-y_text_offset <-
+if ( x_show_labels != "0" ) {
-sapply(
+names(my_load_distal) <- tsv1$featureID
-X = (my_y > 0)
+names(my_load_proximal) <- tsv1$featureID
-, FUN = function(x) { if (x) y_text_off else -y_text_off }
+if ( x_show_labels == "ALL" ) {
+n_labels <- length(my_load_distal)
+} else {
+n_labels <- as.numeric(x_show_labels)
+}
+n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 )
+labels_to_show <- c(
+names(head(sort(my_load_distal), n = n_labels))
+, names(tail(sort(my_load_distal), n = n_labels))
 )
-}
+labels <- unname(
-label_features <- function(x_arg, y_arg, labels_arg, slant_arg) {
+sapply(
-# print("str(x_arg)")
+X = tsv1$featureID
-# print(str(x_arg))
+, FUN = function(x) if ( x %in% labels_to_show ) x else ""
-# print("str(y_arg)")
-# print(str(y_arg))
-# print("str(labels_arg)")
-# print(str(labels_arg))
-if (length(labels_arg) > 0) {
-unique_slant <- unique(slant_arg)
-if (length(unique_slant) == 1) {
-text(
-y = y_arg
-, x = x_arg + x_text_offset
-, cex = 0.4
-, labels = labels_arg
-, col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
-, srt = slant_arg
-, adj = 0   # left-justified
 )
+)
+x_text_offset <- 0.024
+y_text_off <- 0.017
+if (!cplot_x) {
+# S-plot
+y_text_offset <- if (projection == 1) -y_text_off else y_text_off
 } else {
-for (slant in unique_slant) {
+# C-plot
-text(
+y_text_offset <-
-y = y_arg[slant_arg == slant]
+sapply(
-, x = x_arg[slant_arg == slant] + x_text_offset
+X = (my_y > 0)
-, cex = 0.4
+, FUN = function(x) {
-, labels = labels_arg[slant_arg == slant]
+if (x) y_text_off else -y_text_off
-, col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
+}
-, srt = slant
+)
-, adj = 0   # left-justified
+}
+label_features <- function(x_arg, y_arg, labels_arg, slant_arg) {
+if (length(labels_arg) > 0) {
+unique_slant <- unique(slant_arg)
+if (length(unique_slant) == 1) {
+text(
+y = y_arg
+, x = x_arg + x_text_offset
+, cex = 0.4
+, labels = labels_arg
+, col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
+, srt = slant_arg
+, adj = 0   # left-justified
+)
+} else {
+for (slant in unique_slant) {
+text(
+y = y_arg[slant_arg == slant]
+, x = x_arg[slant_arg == slant] + x_text_offset
+, cex = 0.4
+, labels = labels_arg[slant_arg == slant]
+, col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
+, srt = slant
+, adj = 0   # left-justified
+)
+}
+}
+}
+}
+if (!cplot_x) {
+my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant
+} else {
+my_slant <- sapply(
+X = (my_y > 0)
+, FUN = function(x) if (x) 2 else -2
+) * my_feature_label_slant
+}
+if (length(my_x) > 1) {
+label_features(
+x_arg      = my_x  [my_x > 0]
+, y_arg      = my_y  [my_x > 0] - y_text_offset
+, labels_arg = labels[my_x > 0]
+, slant_arg = (if (!cplot_x) -my_slant else (my_slant))
+)
+if (!cplot_x) {
+label_features(
+x_arg      = my_x  [my_x < 0]
+, y_arg      = my_y  [my_x < 0] + y_text_offset
+, labels_arg = labels[my_x < 0]
+, slant_arg = my_slant
 )
 }
-}
+} else {
-}
+if (!cplot_x) {
-}
+my_slant <- (if (my_x > 1) -1 else 1) * my_slant
-if (!cplot_x) {
+my_y_arg <- my_y + (if (my_x > 1) -1 else 1) * y_text_offset
-my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant
+} else {
+my_slant <- (if (my_y > 1) -1 else 1) * my_slant
+my_y_arg <- my_y + (if (my_y > 1) -1 else 1) * y_text_offset
+}
+label_features(
+x_arg = my_x
+, y_arg = my_y_arg
+, labels_arg = labels
+, slant_arg = my_slant
+)
+} # end if (length(my_x) > 1)
+} # end if ( x_show_labels != "0" )
 } else {
-my_slant <- sapply(
+plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
-X = (my_y > 0)
+text(x=1, y=1, labels="no S-plot is possible")
-, FUN = function(x) if (x) 2 else -2
+} # end if (sum(is.infinte(my_xlim))==0)
-) * my_feature_label_slant
+} else {
-}
+plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
-if (length(my_x) > 1) {
+text(x=1, y=1, labels="no S-plot is possible")
-label_features(
+} # end if (length(plus_cor) != 0 && length(plus_cor) != 0)
-x_arg      = my_x  [my_x > 0]
+} # end action
-, y_arg      = my_y  [my_x > 0] - y_text_offset
+) # end with( my_cor_vs_cov, action )
-, labels_arg = labels[my_x > 0]
-, slant_arg = (if (!cplot_x) -my_slant else (my_slant))
-)
-if (!cplot_x) {
-label_features(
-x_arg      = my_x  [my_x < 0]
-, y_arg      = my_y  [my_x < 0] + y_text_offset
-, labels_arg = labels[my_x < 0]
-, slant_arg = my_slant
-)
-}
-} else {
-if (!cplot_x) {
-my_slant <- (if (my_x > 1) -1 else 1) * my_slant
-my_y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset
-} else {
-my_slant <- (if (my_y > 1) -1 else 1) * my_slant
-my_y_arg = my_y + (if (my_y > 1) -1 else 1) * y_text_offset
-}
-label_features(
-x_arg = my_x
-, y_arg = my_y_arg
-, labels_arg = labels
-, slant_arg = my_slant
-)
-}
-}
-}
-)
 return (my_cor_vs_cov)
-}
+} # end function do_s_plot
 my_cor_vs_cov <- do_s_plot(
 x_env = x_env
 , predictor_projection_x = TRUE
 , cplot_x = FALSE
 )
-typeVc <- c("correlation",      # 1
+typevc <- c("correlation",      # 1
 "outlier",          # 2
 "overview",         # 3
 "permutation",      # 4
 "predict-train",    # 5
 "predict-test",     # 6
 "x-variance",       # 10
 "xy-score",         # 11
 "xy-weight"         # 12
 )                    # [c(3,8,9)] # [c(4,3,8,9)]
 if ( length(my_oplsda@orthoVipVn) > 0 ) {
-my_typevc <- typeVc[c(9,3,8)]
+my_typevc <- typevc[c(9,3,8)]
 } else {
 my_typevc <- c("(dummy)","overview","(dummy)")
 }
 my_ortho_cor_vs_cov_exists <- FALSE
 for (my_type in my_typevc) {
-if (my_type %in% typeVc) {
+if (my_type %in% typevc) {
 tryCatch({
 if ( my_type != "x-loading" ) {
 plot(
 x            = my_oplsda
 , typeVc       = my_type
 , parCexN      = 0.4
 , parDevNewL   = FALSE
 , parLayL      = TRUE
 , parEllipsesL = TRUE
 )
 if (my_type == "overview") {
 sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2)
 title(sub = sub_label)
 }
 } else {
 my_ortho_cor_vs_cov <- do_s_plot(
 x_env = x_env
 , predictor_projection_x = FALSE
 , cplot_x = FALSE
 )
 my_ortho_cor_vs_cov_exists <- TRUE
+}
 }
-}, error = function(e) {
+, error = function(e) {
 x_progress(
 sprintf(
 "factor level %s or %s may have only one sample - %s"
 , fctr_lvl_1
 , fctr_lvl_2
 }
 cplot_p <- x_env$cplot_p
 cplot_o <- x_env$cplot_o
 if (cplot_p || cplot_o) {
 if (cplot_p) {
-do_s_plot(
+if (!is.null(my_cor_vs_cov)) {
-x_env = x_env
+do_s_plot(
-, predictor_projection_x = TRUE
+x_env = x_env
-, cplot_x = TRUE
+, predictor_projection_x = TRUE
-, cor_vs_cov_x = my_cor_vs_cov
+, cplot_x = TRUE
-)
+, cor_vs_cov_x = my_cor_vs_cov
+)
+} else {
+plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
+text(x=1, y=1, labels="no predictor projection is possible")
+}
 did_plots <- 1
 } else {
 did_plots <- 0
 }
 if (cplot_o) {
 return ( FALSE )
 }
 # extract parameters from the environment
 vrbl_metadata <- calc_env$vrbl_metadata
-vrbl_metadata_names <- vrbl_metadata[,1]
+vrbl_metadata_names <- vrbl_metadata[, 1]
 smpl_metadata <- calc_env$smpl_metadata
 data_matrix <- calc_env$data_matrix
-pairSigFeatOnly <- calc_env$pairSigFeatOnly
+pair_significant_features_only <- calc_env$pairSigFeatOnly
 facC <- calc_env$facC
 tesC <- calc_env$tesC
 # extract the levels from the environment
 originalLevCSV <- levCSV <- calc_env$levCSV
 # matchingC is one of { "none", "wildcard", "regex" }
 matchingC <- calc_env$matchingC
 labelFeatures <- calc_env$labelFeatures
+minCrossvalI <- as.integer(calc_env$min_crossval_i)
 # arg/env checking
 if (!(facC %in% names(smpl_metadata))) {
 failure_action(
 sprintf("bad parameter!  Factor name '%s' not found in sampleMetadata"
 rt             <- vrbl_metadata$rt
 names(rt)      <- vrbl_metadata$variableMetadata
 rt_lookup      <- function(feature) unname(rt[feature])
-# calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv)
+# calculate salience_df as data.frame(feature, max_level, max_median, salience_rcv, mean_median, salience, salience_rcv)
 salience_df <- calc_env$salience_df <- w4msalience(
 data_matrix    = data_matrix
 , sample_class   = smpl_metadata[,facC]
 , failure_action = failure_action
 )
 salience_tsv_action({
-my_df <- data.frame(
+with (
-featureID    = salience_df$feature
+salience_df
-, salientLevel = salience_df$max_level
+, {
-, salientRCV   = salience_df$salient_rcv
+my_df <<- data.frame(
-, salience     = salience_df$salience
+featureID       = feature
-, mz           = mz_lookup(salience_df$feature)
+, salientLevel    = max_level
-, rt           = rt_lookup(salience_df$feature)
+, salienceRCV     = salience_rcv
-)
+, relativeSalientDistance = relative_salient_distance
-my_df[order(-my_df$salience),]
+, salience        = salience
+, mz              = mz_lookup(feature)
+, rt              = rt_lookup(feature)
+)
+})
+my_df[order(-my_df$relativeSalientDistance),]
 })
 # transform wildcards to regexen
 if (matchingC == "wildcard") {
 # strsplit(x = "hello,wild,world", split = ",")
 )
 my_cor_cov <- do_detail_plot(
 x_dataMatrix  = my_matrix
 , x_predictor   = predictor
 , x_is_match    = TRUE
-, x_algorithm   = algoC
+, x_algorithm   = "nipals"
-, x_prefix      = if (pairSigFeatOnly) {
+, x_prefix      = if (pair_significant_features_only) {
 "Significantly contrasting features"
 } else {
 "Significant features"
 }
 , x_show_labels = labelFeatures
 , x_progress    = progress_action
-, x_crossval_i  = min(7, length(chosen_samples))
+, x_crossval_i  = min(minCrossvalI, length(chosen_samples))
 , x_env         = calc_env
 )
 if ( is.null(my_cor_cov) ) {
 progress_action("NOTHING TO PLOT")
 } else {
 } else {
 1
 }
 )
 col_selector <- vrbl_metadata_names[
-if ( pairSigFeatOnly ) fully_significant else overall_significant
+if (pair_significant_features_only) fully_significant else overall_significant
 ]
 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ]
 my_cor_cov <- do_detail_plot(
 x_dataMatrix  = my_matrix
 , x_predictor   = predictor
 , x_is_match    = is_match
-, x_algorithm   = algoC
+, x_algorithm   = "nipals"
-, x_prefix      = if (pairSigFeatOnly) {
+, x_prefix      = if (pair_significant_features_only) {
 "Significantly contrasting features"
 } else {
 "Significant features"
 }
 , x_show_labels = labelFeatures
 , x_progress    = progress_action
-, x_crossval_i  = min(7, length(chosen_samples))
+, x_crossval_i  = min(minCrossvalI, length(chosen_samples))
 , x_env         = calc_env
 )
 if ( is.null(my_cor_cov) ) {
 progress_action("NOTHING TO PLOT.")
 } else {
 )
 }
 }
 }
 }
-} else { # tesC == "none"
+} else {
+# tesC == "none"
 # find all the levels for factor facC in sampleMetadata
 level_union <- unique(sort(smpl_metadata_facC))
 # identify the selected levels for factor facC from sampleMetadata
 level_include <- sapply(X = level_union, FUN = isLevelSelected)
 # discard the non-selected levels for factor facC
 , FUN = function(x) {
 fctr_lvl_1 <- x[1]
 fctr_lvl_2 <- {
 if ( fctr_lvl_1 %in% completed )
 return("DUMMY")
-# strF(completed)
 completed <<- c(completed, fctr_lvl_1)
 setdiff(level_union, fctr_lvl_1)
 }
 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
 fctr_lvl_2 <- "other"
 )
 my_cor_cov <- do_detail_plot(
 x_dataMatrix  = my_matrix
 , x_predictor   = predictor
 , x_is_match    = is_match
-, x_algorithm   = algoC
+, x_algorithm   = "nipals"
 , x_prefix      = "Features"
 , x_show_labels = labelFeatures
 , x_progress    = progress_action
-, x_crossval_i  = min(7, length(chosen_samples))
+, x_crossval_i  = min(minCrossvalI, length(chosen_samples))
 , x_env         = calc_env
 )
 if ( is.null(my_cor_cov) ) {
 progress_action("NOTHING TO PLOT...")
 } else {
 )
 my_cor_cov <- do_detail_plot(
 x_dataMatrix  = my_matrix
 , x_predictor   = predictor
 , x_is_match    = is_match
-, x_algorithm   = algoC
+, x_algorithm   = "nipals"
 , x_prefix      = "Features"
 , x_show_labels = labelFeatures
 , x_progress    = progress_action
-, x_crossval_i  = min(7, length(chosen_samples))
+, x_crossval_i  = min(minCrossvalI, length(chosen_samples))
 , x_env         = calc_env
 )
 if ( is.null(my_cor_cov) ) {
 progress_action("NOTHING TO PLOT.....")
 } else {
 }
 }
 if (!did_plot) {
 failure_action(
 sprintf(
-"bad parameter!  sampleMetadata must have at least two levels of factor '%s' matching '%s'"
+"Did not plot. Does sampleMetadata have at least two levels of factor '%s' matching '%s' ?"
 , facC, originalLevCSV))
 return ( FALSE )
 }
 return ( TRUE )
 }
 cor_vs_cov <- function(
 matrix_x
 , ropls_x
 , predictor_projection_x = TRUE
 , x_progress = print
+, x_env
 ) {
 tryCatch({
 return(
-cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress)
+cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress, x_env)
 )
-}, error = function(e) {
+}
+, error = function(e) {
 x_progress(
 sprintf(
 "cor_vs_cov fatal error - %s"
 , as.character(e) # e$message
 )
 return ( NULL )
 })
 }
 cor_vs_cov_try <- function(
-matrix_x
+matrix_x                      # rows are samples; columns, features
-, ropls_x
+, ropls_x                       # an instance of ropls::opls
-, predictor_projection_x = TRUE
+, predictor_projection_x = TRUE # TRUE for predictor projection; FALSE for orthogonal projection
-, x_progress = print
+, x_progress = print            # function to produce progress and error messages
+, x_env
 ) {
+my_matrix_x <- matrix_x
+my_matrix_x[my_matrix_x==0] <- NA
+fdr_features <- x_env$fdr_features
 x_class <- class(ropls_x)
-if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) )
+if ( !( as.character(x_class) == "opls" ) ) {
 stop(
 paste(
 "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class "
 , as.character(x_class)
 )
 )
 }
+if ( !ropls_x@suppLs$algoC == "nipals" ) {
+# suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS
+stop(
+paste(
+"cor_vs_cov: Expected ropls::opls instance to have been computed by the NIPALS algorithm rather than "
+, ropls_x@suppLs$algoC
+)
+)
+}
 result <- list()
 result$projection <- projection <- if (predictor_projection_x) 1 else 2
-# suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS
-if ( ropls_x@suppLs$algoC == "nipals") {
+# I used equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510
-# Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510
+#   (and not from the supplement despite the statement that, for the NIPALS algorithm,
-mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional))
+#   the equations from the supplement should be used) because of the definition of the
-mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x]))
+#   Pearson/Galton coefficient of correlation is defined as
-if (predictor_projection_x)
+#   $$
-score_matrix <- ropls_x@scoreMN
+#      \rho_{X,Y}= \frac{\operatorname{cov}(X,Y)}{\sigma_X \sigma_Y}
-else
+#   $$
-score_matrix <- ropls_x@orthoScoreMN
+#   as described (among other places) on Wikipedia at
-score_matrix_transposed <- t(score_matrix)
+#     https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#For_a_population
-score_matrix_magnitude <- mag(score_matrix)
+# The equations in the supplement said to use, for the predictive component t1,
-result$covariance <-
+#      \rho_{t1,X_i}= \frac{\operatorname{cov}(t1,X_i)}{(\operatorname{mag}(t1))(\operatorname{mag}(X_i))}
-score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude )
+# but the results that I got were dramatically different from published results for S-PLOTs;
-result$correlation <-
+# perhaps my data are not centered exactly the same way that theirs were.
-score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi )
+# The correlations calculated here are in agreement with those calculated with the code from
+#   page 22 of https://cran.r-project.org/web/packages/muma/muma.pdf
+# count the features/variables (one column for each sample)
+# count the features/variables (one column for each sample)
+n_features <- ncol(my_matrix_x)
+all_n_features <- x_env$fdr_features
+if (length(grep("^[0-9][0-9]*$", all_n_features)) > 0) {
+all_n_features <- as.integer(all_n_features)
 } else {
-# WARNING - untested code - I don't have test data to exercise this branch
+all_n_features <- n_features
-# Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510
+}
-# scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F
+fdr_n_features <- max(n_features, all_n_features)
-if (predictor_projection_x)
+# print("n_features")
-score_matrix <- ropls_x@scoreMN
+# print(n_features)
-else
-score_matrix <- ropls_x@orthoScoreMN
+# count the samples/observations (one row for each sample)
-score_matrix_transposed <- t(score_matrix)
+n_observations <- nrow(my_matrix_x)
-cov_divisor <- nrow(matrix_x) - 1
-result$covariance <- sapply(
+# choose whether to plot the predictive score vector or orthogonal score vector
-X = 1:ncol(matrix_x)
+if (predictor_projection_x)
-, FUN = function(x) score_matrix_transposed %*% matrix_x[,x] / cov_divisor
+score_vector <- ropls_x@scoreMN
+else
+score_vector <- ropls_x@orthoScoreMN
+# compute the covariance of each feature with the score vector
+result$covariance <-
+sapply(
+X = 1:n_features
+, FUN = function(i) {
+cov(score_vector, my_matrix_x[ , i, drop = TRUE], use = "pairwise.complete.obs")
+}
 )
-score_sd <- sapply(
+# access covariance by feature name
-X = 1:ncol(score_matrix)
+names(result$covariance) <- colnames(my_matrix_x)
-, FUN = function(x) sd(score_matrix[,x])
+# compute the correlation of each feature with the score vector
+result$correlation <-
+sapply(
+X = 1:n_features
+, FUN = function(i) {
+cor(score_vector, my_matrix_x[ , i, drop = TRUE], use = "pairwise.complete.obs")
+}
 )
-# xSdVn - Numerical vector: variable standard deviations of the 'x' matrix
+# access correlation by feature name
-xSdVn <- ropls_x@xSdVn
+names(result$correlation) <- colnames(my_matrix_x)
-result$correlation <- sapply(
-X = 1:ncol(matrix_x)
+# eliminate NAs in either correlation or covariance
-, FUN = function(x) {
+nas <- is.na(result$correlation) | is.na(result$covariance)
-( score_matrix_transposed / score_sd ) %*% ( matrix_x[,x] / (xSdVn[x] * cov_divisor) )
+featureID          <- names(ropls_x@vipVn)
-}
+featureID          <- featureID[!nas]
-)
+result$correlation <- result$correlation[!nas]
-}
+result$covariance  <- result$covariance[!nas]
-result$correlation <- result$correlation[ 1, , drop = TRUE ]
+n_features <- length(featureID)
-result$covariance  <- result$covariance [ 1, , drop = TRUE ]
+correl_pci <- lapply(
-# Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014
+X = 1:n_features
+, FUN = function(i) {
+correl.ci(
+r = result$correlation[i]
+, n_obs = n_observations
+, n_vars = fdr_n_features
+)
+}
+)
+result$p_value_raw <- sapply(
+X = 1:n_features
+, FUN = function(i) correl_pci[[i]]$p.value.raw
+)
+result$p_value_raw[is.na(result$p_value_raw)] <- 1.0
+result$p_value <- sapply(
+X = 1:n_features
+, FUN = function(i) correl_pci[[i]]$p.value
+)
+result$p_value[is.na(result$p_value)] <- 1.0
+result$ci_lower <- sapply(
+X = 1:n_features
+, FUN = function(i) correl_pci[[i]]$CI["lower"]
+)
+result$ci_upper <- sapply(
+X = 1:n_features
+, FUN = function(i) correl_pci[[i]]$CI["upper"]
+)
+# extract "variant 4 of Variable Influence on Projection for OPLS" (see Galindo_Prieto_2014, DOI 10.1002/cem.2627)
 #    Length = number of features; labels = feature identifiers.  (The same is true for $correlation and $covariance.)
-result$vip4p     <- as.numeric(ropls_x@vipVn)
+result$vip4p     <- as.numeric(ropls_x@vipVn)[!nas]
-result$vip4o     <- as.numeric(ropls_x@orthoVipVn)
+result$vip4o     <- as.numeric(ropls_x@orthoVipVn)[!nas]
-result$loadp     <- as.numeric(ropls_x@loadingMN)
+if (length(result$vip4o) == 0) result$vip4o <- NA
-result$loado     <- as.numeric(ropls_x@orthoLoadingMN)
+# extract the loadings
+result$loadp     <- as.numeric(ropls_x@loadingMN)[!nas]
+result$loado     <- as.numeric(ropls_x@orthoLoadingMN)[!nas]
 # get the level names
 level_names      <- sort(levels(as.factor(ropls_x@suppLs$y)))
 fctr_lvl_1       <- level_names[1]
 fctr_lvl_2       <- level_names[2]
-feature_count    <- length(ropls_x@vipVn)
+result$level1    <- rep.int(x = fctr_lvl_1, times = n_features)
-result$level1    <- rep.int(x = fctr_lvl_1, times = feature_count)
+result$level2    <- rep.int(x = fctr_lvl_2, times = n_features)
-result$level2    <- rep.int(x = fctr_lvl_2, times = feature_count)
-superresult <- list()
-if (length(result$vip4o) == 0) result$vip4o <- NA
 greaterLevel <- sapply(
 X = result$correlation
-, FUN = function(my_corr)
+, FUN = function(my_corr) {
 tryCatch({
-if ( is.nan( my_corr ) ) {
+if ( is.na(my_corr) || ! is.numeric( my_corr ) ) {
-print("my_corr is NaN")
+NA
-NA
 } else {
 if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2
 }
-}, error = function(e) {
+}
+, error = function(e) {
+print(my_corr)
 x_progress(
 sprintf(
 "cor_vs_cov -> sapply:  error - substituting NA - %s"
 , as.character(e)
 )
 )
 NA
-})
+}
+)
+}
 )
-# begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
+# build a data frame to hold the content for the tab-separated values file
-featureID          <- names(ropls_x@vipVn)
-greaterLevel       <- greaterLevel[featureID]
-result$correlation <- result$correlation[featureID]
-result$covariance  <- result$covariance[featureID]
-# end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
 tsv1 <- data.frame(
-featureID           = featureID
+featureID     = featureID
-, factorLevel1        = result$level1
+, factorLevel1  = result$level1
-, factorLevel2        = result$level2
+, factorLevel2  = result$level2
-, greaterLevel        = greaterLevel
+, greaterLevel  = greaterLevel
-, projection          = result$projection
+, projection    = result$projection
-, correlation         = result$correlation
+, correlation   = result$correlation
-, covariance          = result$covariance
+, covariance    = result$covariance
-, vip4p               = result$vip4p
+, vip4p         = result$vip4p
-, vip4o               = result$vip4o
+, vip4o         = result$vip4o
-, loadp               = result$loadp
+, loadp         = result$loadp
-, loado               = result$loado
+, loado         = result$loado
-, row.names           = NULL
+, cor_p_val_raw = result$p_value_raw
+, cor_p_value   = result$p_value
+, cor_ci_lower  = result$ci_lower
+, cor_ci_upper  = result$ci_upper
 )
-tsv1 <- tsv1[!is.na(tsv1$correlation),]
+rownames(tsv1) <- tsv1$featureID
-tsv1 <- tsv1[!is.na(tsv1$covariance),]
-superresult$tsv1 <- tsv1
+# build the superresult, i.e., the result returned by this function
-rownames(superresult$tsv1) <- tsv1$featureID
+superresult <- list()
 superresult$projection <- result$projection
 superresult$covariance <- result$covariance
 superresult$correlation <- result$correlation
 superresult$vip4p <- result$vip4p
 superresult$vip4o <- result$vip4o
 superresult$loadp <- result$loadp
 superresult$loado <- result$loado
+superresult$cor_p_value <- tsv1$cor_p_value
 superresult$details <- result
-result$superresult <- superresult
-# Include thise in case future consumers of this routine want to use it in currently unanticipated ways
+# remove any rows having NA for covariance or correlation
-result$oplsda    <- ropls_x
+tsv1 <- tsv1[!is.na(tsv1$correlation),]
-result$predictor <- ropls_x@suppLs$y   # in case future consumers of this routine want to use it in currently unanticipated ways
+tsv1 <- tsv1[!is.na(tsv1$covariance),]
+superresult$tsv1 <- tsv1
+# # I did not include these but left them commentd out in case future
+# #   consumers of this routine want to use it in currently unanticipated ways
+# result$superresult <- superresult
+# result$oplsda    <- ropls_x
+# result$predictor <- ropls_x@suppLs$y
 return (superresult)
 }
-# vim: sw=2 ts=2 et :
+# Code for correl.ci was adapted from correl function from:
+#   @book{
+#     Tsagris_2018,
+#     author = {Tsagris, Michail},
+#     year = {2018},
+#     link = {https://www.researchgate.net/publication/324363311_Multivariate_data_analysis_in_R},
+#     title = {Multivariate data analysis in R}
+#   }
+# which follows
+#   https://en.wikipedia.org/wiki/Fisher_transformation#Definition
+correl.ci <- function(r, n_obs, n_vars, a = 0.05, rho = 0) {
+## r is the calculated correlation coefficient for n_obs pairs of observations of one variable
+## a is the significance level
+## rho is the hypothesised correlation
+zh0 <- atanh(rho) # 0.5*log((1+rho)/(1-rho)), i.e., Fisher's z-transformation for Ho
+zh1 <- atanh(r)   # 0.5*log((1+r)/(1-r)), i.e., Fisher's z-transformation for H1
+se <- (1 - r^2)/sqrt(n_obs - 3) ## standard error for Fisher's z-transformation of Ho
+test <- (zh1 - zh0)/se ### test statistic
+pvalue <- 2*(1 - pnorm(abs(test))) ## p-value
+pvalue.adj <- p.adjust(p = pvalue, method = "BY", n = n_vars)
+z_L <- zh1 - qnorm(1 - a/2)*se
+z_h <- zh1 + qnorm(1 - a/2)*se
+fish_l <- tanh(z_L) # (exp(2*z_l)-1)/(exp(2*z_l)+1), i.e., lower confidence limit
+fish_h <- tanh(z_h) # (exp(2*z_h)-1)/(exp(2*z_h)+1), i.e., upper confidence limit
+ci <- c(fish_l, fish_h)
+names(ci) <- c("lower", "upper")
+list(
+correlation = r
+, p.value.raw = pvalue
+, p.value = pvalue.adj
+, CI = ci
+)
+}
+# vim: sw=2 ts=2 et ai :

Mercurial > repos > eschen42 > w4mcorcov

comparison w4mcorcov_calc.R @ 7:ca9938f2eb6a draft default tip