diff w4mcorcov_calc.R @ 1:e25fd8a13665 draft

planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit bd26542b811de06c1a877337a2840a9f899c2b94
author eschen42
date Mon, 16 Oct 2017 09:18:29 -0400
parents 50a07adddfbd
children a06344808ffc
line wrap: on
line diff
--- a/w4mcorcov_calc.R	Mon Oct 09 15:46:13 2017 -0400
+++ b/w4mcorcov_calc.R	Mon Oct 16 09:18:29 2017 -0400
@@ -42,19 +42,18 @@
         # print("main_label")
         # print(main_label)
         main_cex <- min(1.0, 46.0/nchar(main_label))
-        # " It is generally accepted that a variable should be selected if vj>1, [27–29], but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." (Mehmood 2012 doi:10.1016/j.chemolab.2004.12.011)
+        # "It is generally accepted that a variable should be selected if vj>1, [27–29],
+        #   but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]."
+        #   (Mehmood 2012 doi:10.1186/1748-7188-6-27)
         vipco <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83)))
         alpha <- 0.1 + 0.4 * vipco
         red  <- as.numeric(correlation < 0) * vipco
         blue <- as.numeric(correlation > 0) * vipco
         minus_cor <- -correlation
         minus_cov <- -covariance
-        # x_progress("head(names(minus_cor)): ", head(names(minus_cor)))
-        cex <- salience_lookup(feature = names(minus_cor))
-        # x_progress("head(cex.1): ", head(cex))
-        # cex <- 0.25 + (0.75 * cex / max(cex))
-        cex <- 0.25 + (1.25 * cex / max(cex))
-        # x_progress("head(cex.2): ", head(cex))
+        # cex <- salience_lookup(feature = names(minus_cor))
+        # cex <- 0.25 + (1.25 * cex / max(cex))
+        cex <- 0.75
         plot(
           y = minus_cor
         , x = minus_cov
@@ -129,15 +128,19 @@
 }
 
 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510
-corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = NULL) {
+corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = function(t){}, salience_tsv_action = function(t){}) {
   if ( ! is.environment(calc_env) ) {
     failure_action("corcov_calc: fatal error - 'calc_env' is not an environment")
     return ( FALSE )
   }
-  if ( is.null(corcov_tsv_action) || ! is.function(corcov_tsv_action) ) {
+  if ( ! is.function(corcov_tsv_action) ) {
     failure_action("corcov_calc: fatal error - 'corcov_tsv_action' is not a function")
     return ( FALSE )
   }
+  if ( ! is.function(salience_tsv_action) ) {
+    failure_action("salience_calc: fatal error - 'salience_tsv_action' is not a function")
+    return ( FALSE )
+  }
 
   # extract parameters from the environment
   vrbl_metadata <- calc_env$vrbl_metadata
@@ -165,6 +168,15 @@
   , sample_class   = smpl_metadata[,facC]
   , failure_action = failure_action
   )
+  salience_tsv_action({
+    my_df <- data.frame(
+      featureID    = salience_df$feature
+    , salientLevel = salience_df$max_level
+    , salientRCV   = salience_df$salient_rcv
+    , salience     = salience_df$salience
+    )
+    my_df[order(-my_df$salience),]
+  })
   salience             <- salience_df$salience
   names(salience)      <- salience_df$feature
   salience_lookup      <- calc_env$salience_lookup <- function(feature) unname(salience[feature])
@@ -231,7 +243,7 @@
   if (tesC != "none") {
     # for each column name, extract the parts of the name matched by 'col_pattern', if any
     the_colnames <- colnames(vrbl_metadata)
-    if (!Reduce(f = "||", x = grepl(tesC, the_colnames))) {
+    if ( ! Reduce( f = "||", x = grepl(tesC, the_colnames) ) ) {
       failure_action(sprintf("bad parameter!  variableMetadata must contain results of W4M Univariate test '%s'.", tesC))
       return ( FALSE )
     }
@@ -241,6 +253,59 @@
         regmatches( x, regexec(col_pattern, x) )[[1]]
       }
     )
+    ## first contrast each selected level with all other levels combined into one "super-level" ##
+    # process columns matching the pattern
+    level_union <- c()
+    for (i in 1:length(col_matches)) {
+      col_match <- col_matches[[i]]
+      if (length(col_match) > 0) {
+        # it's an actual match; extract the pieces, e.g., k10_kruskal_k4.k3_sig
+        vrbl_metadata_col <- col_match[1]               # ^^^^^^^^^^^^^^^^^^^^^  # Column name
+        fctr_lvl_1        <- col_match[2]               #             ^^         # Factor-level 1
+        fctr_lvl_2        <- col_match[3]               #                ^^      # Factor-level 2
+        # only process this column if both factors are members of lvlCSV
+        is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
+        if (is_match) {
+          level_union <- c(level_union, col_match[2], col_match[3])
+        }
+      }
+    }
+    level_union <- unique(sort(level_union))
+    overall_significant <- 1 == vrbl_metadata[,intersample_sig_col]
+    if ( length(level_union) > 1 ) {
+      chosen_samples <- smpl_metadata_facC %in% level_union
+      chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
+      col_selector <- vrbl_metadata_names[ overall_significant ]
+      my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
+      fctr_lvl_2 <- "other"
+      for ( fctr_lvl_1 in level_union[1:length(level_union)] ) {
+        progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
+        predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 )
+        my_cor_cov <- do_detail_plot(
+          x_dataMatrix  = my_matrix
+        , x_predictor   = predictor
+        , x_is_match    = is_match
+        , x_algorithm   = algoC
+        , x_prefix      = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features"
+        , x_show_labels = labelFeatures
+        , x_progress    = progress_action
+        , x_env         = calc_env
+        )
+        if ( is.null(my_cor_cov) ) {
+          progress_action("NOTHING TO PLOT.")
+        } else {
+          tsv <- my_cor_cov$tsv1
+          # tsv$salientLevel <- salient_level_lookup(tsv$featureID)
+          # tsv$salientRCV   <- salient_rcv_lookup(tsv$featureID)
+          # tsv$salience     <- salience_lookup(tsv$featureID)
+          tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] 
+          corcov_tsv_action(tsv)
+          did_plot <- TRUE
+        }
+      }
+    }
+
+    ## next, contrast each selected level with each of the other levels individually ##
     # process columns matching the pattern
     for (i in 1:length(col_matches)) {
       # for each potential match of the pattern
@@ -260,7 +325,6 @@
         predictor <- smpl_metadata_facC[chosen_samples]
         # extract only the significantly-varying features and the chosen samples
         fully_significant   <- 1 == vrbl_metadata[,vrbl_metadata_col] * vrbl_metadata[,intersample_sig_col]
-        overall_significant <- 1 == vrbl_metadata[,intersample_sig_col]
         col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ]
         my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
         my_cor_cov <- do_detail_plot(
@@ -277,9 +341,9 @@
           progress_action("NOTHING TO PLOT.")
         } else {
           tsv <- my_cor_cov$tsv1
-          tsv$salientLevel <- salient_level_lookup(tsv$featureID)
-          tsv$salientRCV   <- salient_rcv_lookup(tsv$featureID)
-          tsv$salience     <- salience_lookup(tsv$featureID)
+          # tsv$salientLevel <- salient_level_lookup(tsv$featureID)
+          # tsv$salientRCV   <- salient_rcv_lookup(tsv$featureID)
+          # tsv$salience     <- salience_lookup(tsv$featureID)
           tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] 
           corcov_tsv_action(tsv)
           did_plot <- TRUE
@@ -287,46 +351,105 @@
       }
     }
   } else { # tesC == "none"
-    utils::combn(
-      x = unique(sort(smpl_metadata_facC))
-    , m = 2
-    , FUN = function(x) { 
-        fctr_lvl_1 <- x[1]
-        fctr_lvl_2 <- x[2]
-        progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
-        chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
-        if (length(unique(chosen_samples)) < 1) {
-          progress_action("NOTHING TO PLOT...")
-        } else {
-          predictor <- smpl_metadata_facC[chosen_samples]
-          my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
-          # only process this column if both factors are members of lvlCSV
-          is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
-          my_cor_cov <- do_detail_plot(
-            x_dataMatrix  = my_matrix
-          , x_predictor   = predictor
-          , x_is_match    = is_match
-          , x_algorithm   = algoC
-          , x_prefix      = "Features"
-          , x_show_labels = labelFeatures
-          , x_progress    = progress_action
-          , x_env         = calc_env
-          )
-          if ( is.null(my_cor_cov) ) {
-            progress_action("NOTHING TO PLOT")
+    level_union <- unique(sort(smpl_metadata_facC))
+    if ( length(level_union) > 1 ) {
+      ## pass 1 - contrast each selected level with all other levels combined into one "super-level" ##
+      completed <- c()
+      lapply(
+        X = level_union
+      , FUN = function(x) { 
+          fctr_lvl_1 <- x[1]
+          fctr_lvl_2 <- {
+            if ( fctr_lvl_1 %in% completed )
+              return("DUMMY")
+            # strF(completed)
+            completed <<- c(completed, fctr_lvl_1)
+            setdiff(level_union, fctr_lvl_1)
+          }
+          chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
+          fctr_lvl_2 <- "other"
+          # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) )
+          progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
+          if (length(unique(chosen_samples)) < 1) {
+            progress_action("NOTHING TO PLOT...")
           } else {
-            tsv <- my_cor_cov$tsv1
-            tsv$salientLevel <- salient_level_lookup(tsv$featureID)
-            tsv$salientRCV   <- salient_rcv_lookup(tsv$featureID)
-            tsv$salience     <- salience_lookup(tsv$featureID)
-            corcov_tsv_action(tsv)
-            did_plot <<- TRUE
+            chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
+            predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 )
+            my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
+            # only process this column if both factors are members of lvlCSV
+            is_match <- isLevelSelected(fctr_lvl_1)
+            my_cor_cov <- do_detail_plot(
+              x_dataMatrix  = my_matrix
+            , x_predictor   = predictor
+            , x_is_match    = is_match
+            , x_algorithm   = algoC
+            , x_prefix      = "Features"
+            , x_show_labels = labelFeatures
+            , x_progress    = progress_action
+            , x_env         = calc_env
+            )
+            if ( is.null(my_cor_cov) ) {
+              progress_action("NOTHING TO PLOT")
+            } else {
+              tsv <- my_cor_cov$tsv1
+              # tsv$salientLevel <- salient_level_lookup(tsv$featureID)
+              # tsv$salientRCV   <- salient_rcv_lookup(tsv$featureID)
+              # tsv$salience     <- salience_lookup(tsv$featureID)
+              corcov_tsv_action(tsv)
+              did_plot <<- TRUE
+            }
           }
+          #print("baz")
+          "dummy" # need to return a value; otherwise combn fails with an error
         }
-        #print("baz")
-        "dummy" # need to return a value; otherwise combn fails with an error
-      }
-    )
+      )
+      ## pass 2 - contrast each selected level with each of the other levels individually ##
+      completed <- c()
+      utils::combn(
+        x = level_union
+      , m = 2
+      , FUN = function(x) { 
+          fctr_lvl_1 <- x[1]
+          fctr_lvl_2 <- x[2]
+          chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
+          # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) )
+          progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
+          if (length(unique(chosen_samples)) < 1) {
+            progress_action("NOTHING TO PLOT...")
+          } else {
+            chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
+            predictor <- chosen_facC
+            my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
+            # only process this column if both factors are members of lvlCSV
+            is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
+            my_cor_cov <- do_detail_plot(
+              x_dataMatrix  = my_matrix
+            , x_predictor   = predictor
+            , x_is_match    = is_match
+            , x_algorithm   = algoC
+            , x_prefix      = "Features"
+            , x_show_labels = labelFeatures
+            , x_progress    = progress_action
+            , x_env         = calc_env
+            )
+            if ( is.null(my_cor_cov) ) {
+              progress_action("NOTHING TO PLOT")
+            } else {
+              tsv <- my_cor_cov$tsv1
+              # tsv$salientLevel <- salient_level_lookup(tsv$featureID)
+              # tsv$salientRCV   <- salient_rcv_lookup(tsv$featureID)
+              # tsv$salience     <- salience_lookup(tsv$featureID)
+              corcov_tsv_action(tsv)
+              did_plot <<- TRUE
+            }
+          }
+          #print("baz")
+          "dummy" # need to return a value; otherwise combn fails with an error
+        }
+      )
+    } else {
+      progress_action("NOTHING TO PLOT....")
+    }
   }
   if (!did_plot) {
     failure_action(sprintf("bad parameter!  sampleMetadata must have at least two levels of factor '%s' matching '%s'", facC, originalLevCSV))