Mercurial > repos > eschen42 > w4mcorcov

diff w4mcorcov_calc.R @ 6:0b49916c5c52 draft
planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit 4428e3252d54c8a8e0e5d85e8eaaeb13e9b21de7
author: eschen42
date: Wed, 05 Sep 2018 19:24:47 -0400
parents: 1d046f648b47
children: ca9938f2eb6a
--- a/w4mcorcov_calc.R	Fri Mar 02 08:26:36 2018 -0500
+++ b/w4mcorcov_calc.R	Wed Sep 05 19:24:47 2018 -0400
@@ -7,9 +7,23 @@
 #### OPLS-DA
 algoC <- "nipals"
 
-do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_show_loado_labels, x_progress = print, x_env, x_crossval_i) {
+do_detail_plot <- function(
+  x_dataMatrix
+, x_predictor
+, x_is_match
+, x_algorithm
+, x_prefix
+, x_show_labels
+, x_progress = print
+, x_env
+, x_crossval_i
+) {
   off <- function(x) if (x_show_labels == "0") 0 else x
-  if ( x_is_match && ncol(x_dataMatrix) > 0 && length(unique(x_predictor))> 1 && x_crossval_i < nrow(x_dataMatrix) ) {
+  if ( x_is_match
+      && ncol(x_dataMatrix) > 0
+      && length(unique(x_predictor))> 1
+      && x_crossval_i < nrow(x_dataMatrix)
+  ) {
     my_oplsda <- opls(
         x      = x_dataMatrix
       , y      = x_predictor
@@ -19,86 +33,270 @@
       , printL = FALSE
       , plotL  = FALSE
       , crossvalI = x_crossval_i
+      , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2.
       )
+    # strip out variables having negligible variance
+    x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE]
     my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y))
+    # x_progress(strF(x_dataMatrix))
+    # x_progress(strF(my_oplsda))
+    #x_progress(head(my_oplsda_suppLs_y_levels))
+    #x_progress(unique(my_oplsda_suppLs_y_levels))
     fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1]
     fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2]
-    my_cor_vs_cov <- cor_vs_cov(
-        matrix_x = x_dataMatrix
-      , ropls_x  = my_oplsda
+    do_s_plot <- function(
+        x_env
+      , predictor_projection_x   = TRUE
+      , cplot_x      = FALSE
+      , cor_vs_cov_x = NULL
       )
-    with(
-      my_cor_vs_cov
-    , {
-        min_x <- min(covariance)
-        max_x <- max(covariance)
-        lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs))
-        covariance <- covariance / lim_x
-        lim_x <- 1.2
-        main_label <- sprintf("%s for level %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2)
-        main_cex <- min(1.0, 46.0/nchar(main_label))
-        # "It is generally accepted that a variable should be selected if vj>1, [27–29],
-        #   but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]."
-        #   (Mehmood 2012 doi:10.1186/1748-7188-6-27)
-        vipco <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83)))
-        alpha <- 0.1 + 0.4 * vipco
-        red  <- as.numeric(correlation > 0) * vipco
-        blue <- as.numeric(correlation < 0) * vipco
-        plus_cor <- correlation
-        plus_cov <- covariance
-        cex <- 0.75
-        plot(
-          y = plus_cor
-        , x = plus_cov
-        , type="p"
-        , xlim=c( -lim_x - off(0.2), lim_x + off(0.2) )
-        , ylim=c( -1.0   - off(0.2), 1.0   + off(0.2) )
-        , xlab = sprintf("relative covariance(feature,t1)")
-        , ylab = sprintf("correlation(feature,t1)")
-        , main = main_label
-        , cex.main = main_cex
-        , cex = cex
-        , pch = 16
-        , col = rgb(blue = blue, red = red, green = 0, alpha = alpha)
-        )
-        low_x <- -0.7 * lim_x
-        high_x <- 0.7 * lim_x
-        text(x = low_x, y = -0.05, labels =  fctr_lvl_1, col = "blue")
-        text(x = high_x, y = 0.05, labels =  fctr_lvl_2, col = "red")
-        if ( x_show_labels != "0" ) {
-          my_loadp <- loadp
-          my_loado <- loado
-          names(my_loadp) <- tsv1$featureID
-          names(my_loado) <- tsv1$featureID
-          if ( x_show_labels == "ALL" ) {
-            n_labels <- length(loadp)
-          } else {
-            n_labels <- as.numeric(x_show_labels)
+    {
+      #print(ls(x_env))               # "cplot_y" etc
+      #print(str(x_env$cplot_y))      # chr "covariance"
+      if (cplot_x) {
+        #print(x_env$cplot_y)         # "covariance"
+        cplot_y_correlation <- (x_env$cplot_y == "correlation")
+        #print(cplot_y_correlation)   # FALSE
+      }
+      if (is.null(cor_vs_cov_x)) {
+        my_cor_vs_cov <- cor_vs_cov(
+            matrix_x   = x_dataMatrix
+          , ropls_x    = my_oplsda
+          , predictor_projection_x = predictor_projection_x
+          , x_progress
+          )
+      } else {
+        my_cor_vs_cov <- cor_vs_cov_x
+      }
+      # print("str(my_cor_vs_cov)")
+      # str(my_cor_vs_cov)
+      if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) {
+        if (is.null(cor_vs_cov_x)) {
+          x_progress("No cor_vs_cov data produced")
+        }
+        plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
+        text(x=1, y=1, labels="too few covariance data")
+        return(my_cor_vs_cov)
+      }
+      with(
+        my_cor_vs_cov
+      , {
+          min_x <- min(covariance, na.rm = TRUE)
+          max_x <- max(covariance, na.rm = TRUE)
+          lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs))
+          covariance <- covariance / lim_x
+          lim_x <- 1.2
+          # "It is generally accepted that a variable should be selected if vj>1, [27–29],
+          #   but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]."
+          #   (Mehmood 2012 doi:10.1186/1748-7188-6-27)
+          plus_cor <- correlation
+          plus_cov <- covariance
+          cex <- 0.65
+          which_projection <- if (projection == 1) "t1" else "o1"
+          which_loading <- if (projection == 1) "parallel" else "orthogonal"
+          if (projection == 1) { # predictor-projection
+            vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83)))
+            if (!cplot_x) { # S-plot predictor-projection
+              my_xlab <- "relative covariance(feature,t1)"
+              my_x <- plus_cov
+              my_ylab <- "correlation(feature,t1)"
+              my_y <- plus_cor
+            } else { # C-plot predictor-projection
+              my_xlab <- "variable importance in predictor-projection"
+              my_x <- vip4p
+              if (cplot_y_correlation) {
+                my_ylab <- "correlation(feature,t1)"
+                my_y <- plus_cor
+              } else {
+                my_ylab <- "relative covariance(feature,t1)"
+                my_y <- plus_cov
+              }
+            }
+            if (cplot_x) {
+              lim_x <- max(my_x, na.rm = TRUE) * 1.1
+              my_xlim <- c( 0, lim_x + off(0.2) )
+            } else {
+              my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
+            }
+            my_ylim <- c( -1.0   - off(0.2), 1.0   + off(0.2) )
+            my_load_distal <- loadp
+            my_load_proximal <- loado
+            red  <- as.numeric(correlation > 0) * vipcp
+            blue <- as.numeric(correlation < 0) * vipcp
+            alpha <- 0.1 + 0.4 * vipcp
+            red[is.na(red)] <- 0
+            blue[is.na(blue)] <- 0
+            alpha[is.na(alpha)] <- 0
+            my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha)
+            main_label <- sprintf("%s for level %s versus %s"
+                                 , x_prefix, fctr_lvl_1, fctr_lvl_2)
+          } else { # orthogonal projection
+            vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83)))
+            if (!cplot_x) {
+              my_xlab <- "relative covariance(feature,to1)"
+              my_x <- -plus_cov
+            } else {
+              my_xlab <- "variable importance in orthogonal projection"
+              my_x <- vip4o
+            }
+            if (!cplot_x) { # S-plot orthogonal projection
+              my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
+              my_ylab <- "correlation(feature,to1)"
+              my_y <- plus_cor
+            } else { # C-plot orthogonal projection
+              lim_x <- max(my_x, na.rm = TRUE) * 1.1
+              my_xlim <- c( 0, lim_x + off(0.2) )
+              if (cplot_y_correlation) {
+                my_ylab <- "correlation(feature,to1)"
+                my_y <- plus_cor
+              } else {
+                my_ylab <- "relative covariance(feature,to1)"
+                my_y <- plus_cov
+              }
+            }
+            my_ylim <- c( -1.0   - off(0.2), 1.0   + off(0.2) )
+            my_load_distal <- loado
+            my_load_proximal <- loadp
+            alpha <- 0.1 + 0.4 * vipco
+            alpha[is.na(alpha)] <- 0
+            my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha)
+            main_label <- sprintf(
+              "Features influencing orthogonal projection for %s versus %s"
+            , fctr_lvl_1, fctr_lvl_2)
           }
-          n_labels <- min( n_labels, (1 + length(loadp)) / 2 )
-          labels_to_show <- c(
-            names(head(sort(my_loadp),n = n_labels))
-          , names(tail(sort(my_loadp),n = n_labels))
+          main_cex <- min(1.0, 46.0/nchar(main_label))
+          my_feature_label_slant <- -30 # slant feature labels 30 degrees downward
+          plot(
+            y = my_y
+          , x = my_x
+          , type = "p"
+          , xlim = my_xlim
+          , ylim = my_ylim
+          , xlab = my_xlab
+          , ylab = my_ylab
+          , main = main_label
+          , cex.main = main_cex
+          , cex = cex
+          , pch = 16
+          , col = my_col
           )
-          if ( x_show_loado_labels ) {
-            labels_to_show <- c(
-              labels_to_show
-            , names(head(sort(my_loado),n = n_labels))
-            , names(tail(sort(my_loado),n = n_labels))
-            )
+          low_x <- -0.7 * lim_x
+          high_x <- 0.7 * lim_x
+          if (projection == 1 && !cplot_x) {
+            text(x = low_x, y = -0.05, labels =  fctr_lvl_1, col = "blue")
+            text(x = high_x, y = 0.05, labels =  fctr_lvl_2, col = "red")
           }
-          labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" ))
-          text(
-            y = plus_cor - 0.013
-          , x = plus_cov + 0.020
-          , cex = 0.4
-          , labels = labels
-          , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # rgb(blue = blue, red = red, green = 0, alpha = 0.2 + 0.8 * alpha)
-          , srt = -30 # slant 30 degrees downward
-          , adj = 0   # left-justified
-          )
+          if ( x_show_labels != "0" ) {
+            names(my_load_distal) <- tsv1$featureID
+            names(my_load_proximal) <- tsv1$featureID
+            if ( x_show_labels == "ALL" ) {
+              n_labels <- length(my_load_distal)
+            } else {
+              n_labels <- as.numeric(x_show_labels)
+            }
+            n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 )
+            labels_to_show <- c(
+              names(head(sort(my_load_distal),n = n_labels))
+            , names(tail(sort(my_load_distal),n = n_labels))
+            )
+            labels <- unname(
+              sapply(
+                X = tsv1$featureID
+              , FUN = function(x) if( x %in% labels_to_show ) x else ""
+              )
+            )
+            x_text_offset <- 0.024
+            y_text_off <- 0.017
+            if (!cplot_x) { # S-plot
+              y_text_offset <- if (projection == 1) -y_text_off else y_text_off
+            } else { # C-plot
+              y_text_offset <-
+                sapply(
+                  X = (my_y > 0)
+                , FUN = function(x) { if (x) y_text_off else -y_text_off }
+                )
+            }
+            label_features <- function(x_arg, y_arg, labels_arg, slant_arg) {
+              # print("str(x_arg)")
+              # print(str(x_arg))
+              # print("str(y_arg)")
+              # print(str(y_arg))
+              # print("str(labels_arg)")
+              # print(str(labels_arg))
+              if (length(labels_arg) > 0) {
+                unique_slant <- unique(slant_arg)
+                if (length(unique_slant) == 1) {
+                  text(
+                    y = y_arg
+                  , x = x_arg + x_text_offset
+                  , cex = 0.4
+                  , labels = labels_arg
+                  , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
+                  , srt = slant_arg
+                  , adj = 0   # left-justified
+                  )
+                } else {
+                  for (slant in unique_slant) {
+                    text(
+                      y = y_arg[slant_arg == slant]
+                    , x = x_arg[slant_arg == slant] + x_text_offset
+                    , cex = 0.4
+                    , labels = labels_arg[slant_arg == slant]
+                    , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
+                    , srt = slant
+                    , adj = 0   # left-justified
+                    )
+                  }
+                }
+              }
+            }
+            if (!cplot_x) {
+              my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant
+            } else {
+              my_slant <- sapply(
+                            X = (my_y > 0)
+                          , FUN = function(x) if (x) 2 else -2
+                          ) * my_feature_label_slant
+            }
+            if (length(my_x) > 1) {
+              label_features(
+                x_arg      = my_x  [my_x > 0]
+              , y_arg      = my_y  [my_x > 0] - y_text_offset
+              , labels_arg = labels[my_x > 0]
+              , slant_arg = (if (!cplot_x) -my_slant else (my_slant))
+              )
+              if (!cplot_x) {
+                label_features(
+                  x_arg      = my_x  [my_x < 0]
+                , y_arg      = my_y  [my_x < 0] + y_text_offset
+                , labels_arg = labels[my_x < 0]
+                , slant_arg = my_slant
+                )
+              }
+            } else {
+              if (!cplot_x) {
+                my_slant <- (if (my_x > 1) -1 else 1) * my_slant
+                my_y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset
+              } else {
+                my_slant <- (if (my_y > 1) -1 else 1) * my_slant
+                my_y_arg = my_y + (if (my_y > 1) -1 else 1) * y_text_offset
+              }
+              label_features(
+                x_arg = my_x
+              , y_arg = my_y_arg
+              , labels_arg = labels
+              , slant_arg = my_slant
+              )
+            }
+          }
         }
-      }
+      )
+      return (my_cor_vs_cov)
+    }
+    my_cor_vs_cov <- do_s_plot(
+      x_env = x_env
+    , predictor_projection_x = TRUE
+    , cplot_x = FALSE
     )
     typeVc <- c("correlation",      # 1
                 "outlier",          # 2
@@ -118,36 +316,93 @@
     } else {
       my_typevc <- c("(dummy)","overview","(dummy)")
     }
+    my_ortho_cor_vs_cov_exists <- FALSE
     for (my_type in my_typevc) {
       if (my_type %in% typeVc) {
-        # print(sprintf("plotting type %s", my_type))
         tryCatch({
-          plot(
-            x            = my_oplsda
-          , typeVc       = my_type
-          , parCexN      = 0.4
-          , parDevNewL   = FALSE
-          , parLayL      = TRUE
-          , parEllipsesL = TRUE
-        )
+          if ( my_type != "x-loading" ) {
+             plot(
+               x            = my_oplsda
+             , typeVc       = my_type
+             , parCexN      = 0.4
+             , parDevNewL   = FALSE
+             , parLayL      = TRUE
+             , parEllipsesL = TRUE
+             )
+             if (my_type == "overview") {
+               sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2)
+               title(sub = sub_label)
+             }
+          } else {
+            my_ortho_cor_vs_cov <- do_s_plot(
+              x_env = x_env
+            , predictor_projection_x = FALSE
+            , cplot_x = FALSE
+            )
+            my_ortho_cor_vs_cov_exists <- TRUE
+          }
         }, error = function(e) {
-          x_progress(sprintf("factor level %s or %s may have only one sample", fctr_lvl_1, fctr_lvl_2))
+          x_progress(
+            sprintf(
+              "factor level %s or %s may have only one sample - %s"
+            , fctr_lvl_1
+            , fctr_lvl_2
+            , e$message
+            )
+          )
         })
       } else {
-        # print("plotting dummy graph")
         plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
         text(x=1, y=1, labels="no orthogonal projection is possible")
       }
     }
+    cplot_p <- x_env$cplot_p
+    cplot_o <- x_env$cplot_o
+    if (cplot_p || cplot_o) {
+      if (cplot_p) {
+        do_s_plot(
+          x_env = x_env
+        , predictor_projection_x = TRUE
+        , cplot_x = TRUE
+        , cor_vs_cov_x = my_cor_vs_cov
+        )
+        did_plots <- 1
+      } else {
+        did_plots <- 0
+      }
+      if (cplot_o) {
+        if (my_ortho_cor_vs_cov_exists) {
+          do_s_plot(
+            x_env = x_env
+          , predictor_projection_x = FALSE
+          , cplot_x = TRUE
+          , cor_vs_cov_x = my_ortho_cor_vs_cov
+          )
+        } else {
+          plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
+          text(x=1, y=1, labels="no orthogonal projection is possible")
+        }
+        did_plots <- 1 + did_plots
+      }
+      if (did_plots == 1) {
+        plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n", fg = "white")
+      }
+    }
     return (my_cor_vs_cov)
   } else {
-    # x_progress(sprintf("x_is_match = %s, ncol(x_dataMatrix) = %d, length(unique(x_predictor)) = %d",x_is_match, ncol(x_dataMatrix), length(unique(x_predictor))))
     return (NULL)
   }
 }
 
 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510
-corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = function(t){}, salience_tsv_action = function(t){}) {
+corcov_calc <- function(
+  calc_env
+, failure_action      = stop
+, progress_action     = function(x) { }
+, corcov_tsv_action   = function(t) { }
+, salience_tsv_action = function(t) { }
+, extra_plots         = c()
+) {
   if ( ! is.environment(calc_env) ) {
     failure_action("corcov_calc: fatal error - 'calc_env' is not an environment")
     return ( FALSE )
@@ -174,22 +429,23 @@
   # matchingC is one of { "none", "wildcard", "regex" }
   matchingC <- calc_env$matchingC
   labelFeatures <- calc_env$labelFeatures
-  labelOrthoFeatures <- calc_env$labelOrthoFeatures
 
   # arg/env checking
   if (!(facC %in% names(smpl_metadata))) {
-    failure_action(sprintf("bad parameter!  Factor name '%s' not found in sampleMetadata", facC))
+    failure_action(
+      sprintf("bad parameter!  Factor name '%s' not found in sampleMetadata"
+             , facC))
     return ( FALSE )
   }
 
   mz             <- vrbl_metadata$mz
   names(mz)      <- vrbl_metadata$variableMetadata
   mz_lookup      <- function(feature) unname(mz[feature])
-  
+
   rt             <- vrbl_metadata$rt
   names(rt)      <- vrbl_metadata$variableMetadata
   rt_lookup      <- function(feature) unname(rt[feature])
-  
+
   # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv)
   salience_df <- calc_env$salience_df <- w4msalience(
     data_matrix    = data_matrix
@@ -236,12 +492,9 @@
   }
 
   # transpose matrix because ropls matrix is the transpose of XCMS matrix
+  tdm <- t(data_matrix)
   # Wiklund_2008 centers and pareto-scales data before OPLS-DA S-plot
-  # center
-  cdm <- center_colmeans(t(data_matrix))
-  # pareto-scale
-  my_scale <- sqrt(apply(cdm, 2, sd, na.rm=TRUE))
-  scdm <- sweep(cdm, 2, my_scale, "/")
+  # However, data should be neither centered nor pareto scaled here because ropls::opls does that; this fixes issue #2.
 
   # pattern to match column names like k10_kruskal_k4.k3_sig
   col_pattern <- sprintf('^%s_%s_(.*)[.](.*)_sig$', facC, tesC)
@@ -265,7 +518,10 @@
     # for each column name, extract the parts of the name matched by 'col_pattern', if any
     the_colnames <- colnames(vrbl_metadata)
     if ( ! Reduce( f = "||", x = grepl(tesC, the_colnames) ) ) {
-      failure_action(sprintf("bad parameter!  variableMetadata must contain results of W4M Univariate test '%s'.", tesC))
+      failure_action(
+        sprintf(
+          "bad parameter!  variableMetadata must contain results of W4M Univariate test '%s'."
+        , tesC))
       return ( FALSE )
     }
     col_matches <- lapply(
@@ -292,34 +548,52 @@
       }
     }
     level_union <- unique(sort(level_union))
-    overall_significant <- 1 == ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE )
+    overall_significant <- 1 == (
+      if (intersample_sig_col %in% colnames(vrbl_metadata)) {
+        vrbl_metadata[,intersample_sig_col]
+      } else {
+        1
+      }
+    )
     if ( length(level_union) > 2 ) {
       chosen_samples <- smpl_metadata_facC %in% level_union
       chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
       col_selector <- vrbl_metadata_names[ overall_significant ]
-      my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
+      my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ]
       plot_action <- function(fctr_lvl_1, fctr_lvl_2) {
-        progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
-        predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 )
+        progress_action(
+          sprintf("calculating/plotting contrast of %s vs. %s"
+                 , fctr_lvl_1, fctr_lvl_2)
+        )
+        predictor <- sapply(
+          X = chosen_facC
+        , FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2
+        )
         my_cor_cov <- do_detail_plot(
           x_dataMatrix  = my_matrix
         , x_predictor   = predictor
-        , x_is_match    = is_match
+        , x_is_match    = TRUE
         , x_algorithm   = algoC
-        , x_prefix      = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features"
+        , x_prefix      = if (pairSigFeatOnly) {
+                            "Significantly contrasting features"
+                          } else {
+                            "Significant features"
+                          }
         , x_show_labels = labelFeatures
-        , x_show_loado_labels = labelOrthoFeatures
         , x_progress    = progress_action
         , x_crossval_i  = min(7, length(chosen_samples))
         , x_env         = calc_env
         )
         if ( is.null(my_cor_cov) ) {
-          progress_action("NOTHING TO PLOT.")
+          progress_action("NOTHING TO PLOT")
         } else {
           my_tsv <- my_cor_cov$tsv1
           my_tsv$mz <- mz_lookup(my_tsv$featureID)
           my_tsv$rt <- rt_lookup(my_tsv$featureID)
-          my_tsv["level1Level2Sig"] <- vrbl_metadata[ match(my_tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] 
+          my_tsv["level1Level2Sig"] <- vrbl_metadata[
+            match(my_tsv$featureID, vrbl_metadata_names)
+          , vrbl_metadata_col
+          ]
           tsv <<- my_tsv
           corcov_tsv_action(tsv)
           did_plot <<- TRUE
@@ -348,50 +622,79 @@
           fctr_lvl_2        <- col_match[3]               #                ^^      # Factor-level 2
           # only process this column if both factors are members of lvlCSV
           is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
-          progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
-          # TODO delete next line displaying currently-processed column
-          # cat(sprintf("%s %s %s %s\n", vrbl_metadata_col, fctr_lvl_1, fctr_lvl_2, is_match))
-          # choose only samples with one of the two factors for this column
-          chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
-          predictor <- smpl_metadata_facC[chosen_samples]
-          # extract only the significantly-varying features and the chosen samples
-          fully_significant   <- 1 == vrbl_metadata[,vrbl_metadata_col] * ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE )
-          col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ]
-          my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
-          my_cor_cov <- do_detail_plot(
-            x_dataMatrix  = my_matrix
-          , x_predictor   = predictor
-          , x_is_match    = is_match
-          , x_algorithm   = algoC
-          , x_prefix      = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features"
-          , x_show_labels = labelFeatures
-          , x_show_loado_labels = labelOrthoFeatures
-          , x_progress    = progress_action
-          , x_crossval_i  = min(7, length(chosen_samples))
-          , x_env         = calc_env
-          )
-          if ( is.null(my_cor_cov) ) {
-            progress_action("NOTHING TO PLOT.")
+          if (is_match) {
+            progress_action(
+              sprintf("calculating/plotting contrast of %s vs. %s."
+                     , fctr_lvl_1, fctr_lvl_2
+              )
+            )
+            # choose only samples with one of the two factors for this column
+            chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
+            predictor <- smpl_metadata_facC[chosen_samples]
+            # extract only the significantly-varying features and the chosen samples
+            fully_significant   <- 1 == vrbl_metadata[,vrbl_metadata_col] *
+              ( if (intersample_sig_col %in% colnames(vrbl_metadata)) {
+                  vrbl_metadata[,intersample_sig_col]
+                } else {
+                  1
+                }
+              )
+            col_selector <- vrbl_metadata_names[
+              if ( pairSigFeatOnly ) fully_significant else overall_significant
+            ]
+            my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ]
+            my_cor_cov <- do_detail_plot(
+              x_dataMatrix  = my_matrix
+            , x_predictor   = predictor
+            , x_is_match    = is_match
+            , x_algorithm   = algoC
+            , x_prefix      = if (pairSigFeatOnly) {
+                                "Significantly contrasting features"
+                              } else {
+                                "Significant features"
+                              }
+            , x_show_labels = labelFeatures
+            , x_progress    = progress_action
+            , x_crossval_i  = min(7, length(chosen_samples))
+            , x_env         = calc_env
+            )
+            if ( is.null(my_cor_cov) ) {
+              progress_action("NOTHING TO PLOT.")
+            } else {
+              tsv <- my_cor_cov$tsv1
+              tsv$mz <- mz_lookup(tsv$featureID)
+              tsv$rt <- rt_lookup(tsv$featureID)
+              tsv["level1Level2Sig"] <- vrbl_metadata[
+                match(tsv$featureID, vrbl_metadata_names)
+              , vrbl_metadata_col
+              ]
+              corcov_tsv_action(tsv)
+              did_plot <- TRUE
+            }
           } else {
-            tsv <- my_cor_cov$tsv1
-            tsv$mz <- mz_lookup(tsv$featureID)
-            tsv$rt <- rt_lookup(tsv$featureID)
-            tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] 
-            corcov_tsv_action(tsv)
-            did_plot <- TRUE
+            progress_action(
+              sprintf("skipping contrast of %s vs. %s."
+                     , fctr_lvl_1, fctr_lvl_2
+              )
+            )
           }
         }
       }
     }
   } else { # tesC == "none"
+    # find all the levels for factor facC in sampleMetadata
     level_union <- unique(sort(smpl_metadata_facC))
+    # identify the selected levels for factor facC from sampleMetadata
+    level_include <- sapply(X = level_union, FUN = isLevelSelected)
+    # discard the non-selected levels for factor facC
+    level_union <- level_union[level_include]
     if ( length(level_union) > 1 ) {
       if ( length(level_union) > 2 ) {
         ## pass 1 - contrast each selected level with all other levels combined into one "super-level" ##
         completed <- c()
         lapply(
           X = level_union
-        , FUN = function(x) { 
+        , FUN = function(x) {
             fctr_lvl_1 <- x[1]
             fctr_lvl_2 <- {
               if ( fctr_lvl_1 %in% completed )
@@ -402,39 +705,50 @@
             }
             chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
             fctr_lvl_2 <- "other"
-            # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) )
-            progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
             if (length(unique(chosen_samples)) < 1) {
-              progress_action("NOTHING TO PLOT...")
+              progress_action(
+                sprintf("Skipping contrast of %s vs. %s; there are no chosen samples."
+                , fctr_lvl_1, fctr_lvl_2)
+              )
             } else {
               chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
-              predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 )
-              my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
+              predictor <- sapply(
+                X = chosen_facC
+              , FUN = function(fac) {
+                  if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2
+                }
+              )
+              my_matrix <- tdm[ chosen_samples, , drop = FALSE ]
               # only process this column if both factors are members of lvlCSV
               is_match <- isLevelSelected(fctr_lvl_1)
-              my_cor_cov <- do_detail_plot(
-                x_dataMatrix  = my_matrix
-              , x_predictor   = predictor
-              , x_is_match    = is_match
-              , x_algorithm   = algoC
-              , x_prefix      = "Features"
-              , x_show_labels = labelFeatures
-              , x_show_loado_labels = labelOrthoFeatures
-              , x_progress    = progress_action
-              , x_crossval_i  = min(7, length(chosen_samples))
-              , x_env         = calc_env
-              )
-              if ( is.null(my_cor_cov) ) {
-                progress_action("NOTHING TO PLOT")
+              if (is_match) {
+                progress_action(
+                  sprintf("Calculating/plotting contrast of %s vs. %s"
+                  , fctr_lvl_1, fctr_lvl_2)
+                )
+                my_cor_cov <- do_detail_plot(
+                  x_dataMatrix  = my_matrix
+                , x_predictor   = predictor
+                , x_is_match    = is_match
+                , x_algorithm   = algoC
+                , x_prefix      = "Features"
+                , x_show_labels = labelFeatures
+                , x_progress    = progress_action
+                , x_crossval_i  = min(7, length(chosen_samples))
+                , x_env         = calc_env
+                )
+                if ( is.null(my_cor_cov) ) {
+                  progress_action("NOTHING TO PLOT...")
+                } else {
+                  tsv <- my_cor_cov$tsv1
+                  tsv$mz <- mz_lookup(tsv$featureID)
+                  tsv$rt <- rt_lookup(tsv$featureID)
+                  corcov_tsv_action(tsv)
+                  did_plot <<- TRUE
+                }
               } else {
-                tsv <- my_cor_cov$tsv1
-                tsv$mz <- mz_lookup(tsv$featureID)
-                tsv$rt <- rt_lookup(tsv$featureID)
-                corcov_tsv_action(tsv)
-                did_plot <<- TRUE
               }
             }
-            #print("baz")
             "dummy" # need to return a value; otherwise combn fails with an error
           }
         )
@@ -444,52 +758,67 @@
       utils::combn(
         x = level_union
       , m = 2
-      , FUN = function(x) { 
+      , FUN = function(x) {
           fctr_lvl_1 <- x[1]
           fctr_lvl_2 <- x[2]
           chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
-          # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) )
-          progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
           if (length(unique(chosen_samples)) < 1) {
-            progress_action("NOTHING TO PLOT...")
+            progress_action(
+              sprintf("Skipping contrast of %s vs. %s. - There are no chosen samples."
+                     , fctr_lvl_1, fctr_lvl_2
+              )
+            )
           } else {
             chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
             predictor <- chosen_facC
-            my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
+            my_matrix <- tdm[ chosen_samples, , drop = FALSE ]
             # only process this column if both factors are members of lvlCSV
             is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
-            my_cor_cov <- do_detail_plot(
-              x_dataMatrix  = my_matrix
-            , x_predictor   = predictor
-            , x_is_match    = is_match
-            , x_algorithm   = algoC
-            , x_prefix      = "Features"
-            , x_show_labels = labelFeatures
-            , x_show_loado_labels = labelOrthoFeatures
-            , x_progress    = progress_action
-            , x_crossval_i  = min(7, length(chosen_samples))
-            , x_env         = calc_env
-            )
-            if ( is.null(my_cor_cov) ) {
-              progress_action("NOTHING TO PLOT")
+            if (is_match) {
+              progress_action(
+                sprintf("Calculating/plotting contrast of %s vs. %s."
+                       , fctr_lvl_1, fctr_lvl_2)
+                )
+              my_cor_cov <- do_detail_plot(
+                x_dataMatrix  = my_matrix
+              , x_predictor   = predictor
+              , x_is_match    = is_match
+              , x_algorithm   = algoC
+              , x_prefix      = "Features"
+              , x_show_labels = labelFeatures
+              , x_progress    = progress_action
+              , x_crossval_i  = min(7, length(chosen_samples))
+              , x_env         = calc_env
+              )
+              if ( is.null(my_cor_cov) ) {
+                progress_action("NOTHING TO PLOT.....")
+              } else {
+                tsv <- my_cor_cov$tsv1
+                tsv$mz <- mz_lookup(tsv$featureID)
+                tsv$rt <- rt_lookup(tsv$featureID)
+                corcov_tsv_action(tsv)
+                did_plot <<- TRUE
+              }
             } else {
-              tsv <- my_cor_cov$tsv1
-              tsv$mz <- mz_lookup(tsv$featureID)
-              tsv$rt <- rt_lookup(tsv$featureID)
-              corcov_tsv_action(tsv)
-              did_plot <<- TRUE
+              progress_action(
+                sprintf("Skipping contrast of %s vs. %s."
+                       , fctr_lvl_1, fctr_lvl_2
+                )
+              )
             }
           }
-          #print("baz")
           "dummy" # need to return a value; otherwise combn fails with an error
         }
       )
     } else {
-      progress_action("NOTHING TO PLOT....")
+      progress_action("NOTHING TO PLOT......")
     }
   }
   if (!did_plot) {
-    failure_action(sprintf("bad parameter!  sampleMetadata must have at least two levels of factor '%s' matching '%s'", facC, originalLevCSV))
+    failure_action(
+      sprintf(
+        "bad parameter!  sampleMetadata must have at least two levels of factor '%s' matching '%s'"
+      , facC, originalLevCSV))
     return ( FALSE )
   }
   return ( TRUE )
@@ -500,27 +829,67 @@
 #     Wiklund_2008 doi:10.1021/ac0713510
 #     Galindo_Prieto_2014 doi:10.1002/cem.2627
 #     https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R
-cor_vs_cov <- function(matrix_x, ropls_x) {
+cor_vs_cov <- function(
+  matrix_x
+, ropls_x
+, predictor_projection_x = TRUE
+, x_progress = print
+) {
+  tryCatch({
+    return(
+      cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress)
+    )
+  }, error = function(e) {
+    x_progress(
+      sprintf(
+        "cor_vs_cov fatal error - %s"
+      , as.character(e) # e$message
+      )
+    )
+    return ( NULL )
+  })
+}
+
+cor_vs_cov_try <- function(
+  matrix_x
+, ropls_x
+, predictor_projection_x = TRUE
+, x_progress = print
+) {
   x_class <- class(ropls_x)
-  if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) 
-    stop( "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class ", as.character(x_class) )
+  if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) )
+    stop(
+      paste(
+        "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class "
+      , as.character(x_class)
+      )
+    )
   }
   result <- list()
+  result$projection <- projection <- if (predictor_projection_x) 1 else 2
   # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS
   if ( ropls_x@suppLs$algoC == "nipals") {
     # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510
     mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional))
     mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x]))
-    score_matrix <- ropls_x@scoreMN
+    if (predictor_projection_x)
+       score_matrix <- ropls_x@scoreMN
+    else
+       score_matrix <- ropls_x@orthoScoreMN
     score_matrix_transposed <- t(score_matrix)
     score_matrix_magnitude <- mag(score_matrix)
-    result$covariance <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude )
-    result$correlation <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi )
+    result$covariance <-
+      score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude )
+    result$correlation <-
+      score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi )
   } else {
     # WARNING - untested code - I don't have test data to exercise this branch
     # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510
     # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F
-    score_matrix <- ropls_x@scoreMN
+    if (predictor_projection_x)
+       score_matrix <- ropls_x@scoreMN
+    else
+       score_matrix <- ropls_x@orthoScoreMN
     score_matrix_transposed <- t(score_matrix)
     cov_divisor <- nrow(matrix_x) - 1
     result$covariance <- sapply(
@@ -540,8 +909,8 @@
       }
     )
   }
-  result$correlation <- result$correlation[1,,drop = TRUE]
-  result$covariance  <- result$covariance[1,,drop = TRUE]
+  result$correlation <- result$correlation[ 1, , drop = TRUE ]
+  result$covariance  <- result$covariance [ 1, , drop = TRUE ]
 
   # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014
   #    Length = number of features; labels = feature identifiers.  (The same is true for $correlation and $covariance.)
@@ -556,15 +925,42 @@
   feature_count    <- length(ropls_x@vipVn)
   result$level1    <- rep.int(x = fctr_lvl_1, times = feature_count)
   result$level2    <- rep.int(x = fctr_lvl_2, times = feature_count)
-  # print(sprintf("sd(covariance) = %f; sd(correlation) = %f", sd(result$covariance), sd(result$correlation)))
   superresult <- list()
   if (length(result$vip4o) == 0) result$vip4o <- NA
-  greaterLevel <- sapply( X = result$correlation, FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 )
-  superresult$tsv1 <- data.frame(
-    featureID           = names(ropls_x@vipVn)
+  greaterLevel <- sapply(
+    X = result$correlation
+  , FUN = function(my_corr)
+      tryCatch({
+          if ( is.nan( my_corr ) ) {
+            print("my_corr is NaN")
+            NA 
+          } else {
+            if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2
+          }
+        }, error = function(e) {
+          x_progress(
+            sprintf(
+              "cor_vs_cov -> sapply:  error - substituting NA - %s"
+            , as.character(e)
+            )
+          )
+          NA
+        })
+  )
+
+  # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
+  featureID          <- names(ropls_x@vipVn)
+  greaterLevel       <- greaterLevel[featureID]
+  result$correlation <- result$correlation[featureID]
+  result$covariance  <- result$covariance[featureID]
+  # end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
+
+  tsv1 <- data.frame(
+    featureID           = featureID
   , factorLevel1        = result$level1
   , factorLevel2        = result$level2
   , greaterLevel        = greaterLevel
+  , projection          = result$projection
   , correlation         = result$correlation
   , covariance          = result$covariance
   , vip4p               = result$vip4p
@@ -573,7 +969,11 @@
   , loado               = result$loado
   , row.names           = NULL
   )
-  rownames(superresult$tsv1) <- superresult$tsv1$featureID
+  tsv1 <- tsv1[!is.na(tsv1$correlation),]
+  tsv1 <- tsv1[!is.na(tsv1$covariance),]
+  superresult$tsv1 <- tsv1
+  rownames(superresult$tsv1) <- tsv1$featureID
+  superresult$projection <- result$projection
   superresult$covariance <- result$covariance
   superresult$correlation <- result$correlation
   superresult$vip4p <- result$vip4p
@@ -581,12 +981,11 @@
   superresult$loadp <- result$loadp
   superresult$loado <- result$loado
   superresult$details <- result
-  # #print(superresult$tsv1)
   result$superresult <- superresult
   # Include thise in case future consumers of this routine want to use it in currently unanticipated ways
-  result$oplsda    <- ropls_x          
+  result$oplsda    <- ropls_x
   result$predictor <- ropls_x@suppLs$y   # in case future consumers of this routine want to use it in currently unanticipated ways
   return (superresult)
 }
 
-
+# vim: sw=2 ts=2 et :
author	eschen42
date	Wed, 05 Sep 2018 19:24:47 -0400
parents	1d046f648b47
children	ca9938f2eb6a