w4mcorcov: w4mcorcov_calc.R comparison

comparison w4mcorcov_calc.R @ 1:e25fd8a13665 draft

planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit bd26542b811de06c1a877337a2840a9f899c2b94

author	eschen42
date	Mon, 16 Oct 2017 09:18:29 -0400
parents	50a07adddfbd
children	a06344808ffc

comparison

equal deleted inserted replaced

-:50a07adddfbd
+:e25fd8a13665
 lim_x <- 1.2
 main_label <- sprintf("%s for levels %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2)
 # print("main_label")
 # print(main_label)
 main_cex <- min(1.0, 46.0/nchar(main_label))
-# " It is generally accepted that a variable should be selected if vj>1, [27–29], but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." (Mehmood 2012 doi:10.1016/j.chemolab.2004.12.011)
+# "It is generally accepted that a variable should be selected if vj>1, [27–29],
+#   but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]."
+#   (Mehmood 2012 doi:10.1186/1748-7188-6-27)
 vipco <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83)))
 alpha <- 0.1 + 0.4 * vipco
 red  <- as.numeric(correlation < 0) * vipco
 blue <- as.numeric(correlation > 0) * vipco
 minus_cor <- -correlation
 minus_cov <- -covariance
-# x_progress("head(names(minus_cor)): ", head(names(minus_cor)))
+# cex <- salience_lookup(feature = names(minus_cor))
-cex <- salience_lookup(feature = names(minus_cor))
+# cex <- 0.25 + (1.25 * cex / max(cex))
-# x_progress("head(cex.1): ", head(cex))
+cex <- 0.75
-# cex <- 0.25 + (0.75 * cex / max(cex))
-cex <- 0.25 + (1.25 * cex / max(cex))
-# x_progress("head(cex.2): ", head(cex))
 plot(
 y = minus_cor
 , x = minus_cov
 , type="p"
 , xlim=c(-lim_x, lim_x + off(0.1))
 return (NULL)
 }
 }
 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510
-corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = NULL) {
+corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = function(t){}, salience_tsv_action = function(t){}) {
 if ( ! is.environment(calc_env) ) {
 failure_action("corcov_calc: fatal error - 'calc_env' is not an environment")
 return ( FALSE )
 }
-if ( is.null(corcov_tsv_action) || ! is.function(corcov_tsv_action) ) {
+if ( ! is.function(corcov_tsv_action) ) {
 failure_action("corcov_calc: fatal error - 'corcov_tsv_action' is not a function")
+return ( FALSE )
+}
+if ( ! is.function(salience_tsv_action) ) {
+failure_action("salience_calc: fatal error - 'salience_tsv_action' is not a function")
 return ( FALSE )
 }
 # extract parameters from the environment
 vrbl_metadata <- calc_env$vrbl_metadata
 salience_df <- calc_env$salience_df <- w4msalience(
 data_matrix    = data_matrix
 , sample_class   = smpl_metadata[,facC]
 , failure_action = failure_action
 )
+salience_tsv_action({
+my_df <- data.frame(
+featureID    = salience_df$feature
+, salientLevel = salience_df$max_level
+, salientRCV   = salience_df$salient_rcv
+, salience     = salience_df$salience
+)
+my_df[order(-my_df$salience),]
+})
 salience             <- salience_df$salience
 names(salience)      <- salience_df$feature
 salience_lookup      <- calc_env$salience_lookup <- function(feature) unname(salience[feature])
 salient_rcv          <- salience_df$salient_rcv
 names(salient_rcv)   <- salience_df$feature
 did_plot <- FALSE
 if (tesC != "none") {
 # for each column name, extract the parts of the name matched by 'col_pattern', if any
 the_colnames <- colnames(vrbl_metadata)
-if (!Reduce(f = "||", x = grepl(tesC, the_colnames))) {
+if ( ! Reduce( f = "||", x = grepl(tesC, the_colnames) ) ) {
 failure_action(sprintf("bad parameter!  variableMetadata must contain results of W4M Univariate test '%s'.", tesC))
 return ( FALSE )
 }
 col_matches <- lapply(
 X = the_colnames,
 FUN = function(x) {
 regmatches( x, regexec(col_pattern, x) )[[1]]
 }
 )
+## first contrast each selected level with all other levels combined into one "super-level" ##
+# process columns matching the pattern
+level_union <- c()
+for (i in 1:length(col_matches)) {
+col_match <- col_matches[[i]]
+if (length(col_match) > 0) {
+# it's an actual match; extract the pieces, e.g., k10_kruskal_k4.k3_sig
+vrbl_metadata_col <- col_match[1]               # ^^^^^^^^^^^^^^^^^^^^^  # Column name
+fctr_lvl_1        <- col_match[2]               #             ^^         # Factor-level 1
+fctr_lvl_2        <- col_match[3]               #                ^^      # Factor-level 2
+# only process this column if both factors are members of lvlCSV
+is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
+if (is_match) {
+level_union <- c(level_union, col_match[2], col_match[3])
+}
+}
+}
+level_union <- unique(sort(level_union))
+overall_significant <- 1 == vrbl_metadata[,intersample_sig_col]
+if ( length(level_union) > 1 ) {
+chosen_samples <- smpl_metadata_facC %in% level_union
+chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
+col_selector <- vrbl_metadata_names[ overall_significant ]
+my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
+fctr_lvl_2 <- "other"
+for ( fctr_lvl_1 in level_union[1:length(level_union)] ) {
+progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
+predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 )
+my_cor_cov <- do_detail_plot(
+x_dataMatrix  = my_matrix
+, x_predictor   = predictor
+, x_is_match    = is_match
+, x_algorithm   = algoC
+, x_prefix      = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features"
+, x_show_labels = labelFeatures
+, x_progress    = progress_action
+, x_env         = calc_env
+)
+if ( is.null(my_cor_cov) ) {
+progress_action("NOTHING TO PLOT.")
+} else {
+tsv <- my_cor_cov$tsv1
+# tsv$salientLevel <- salient_level_lookup(tsv$featureID)
+# tsv$salientRCV   <- salient_rcv_lookup(tsv$featureID)
+# tsv$salience     <- salience_lookup(tsv$featureID)
+tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ]
+corcov_tsv_action(tsv)
+did_plot <- TRUE
+}
+}
+}
+## next, contrast each selected level with each of the other levels individually ##
 # process columns matching the pattern
 for (i in 1:length(col_matches)) {
 # for each potential match of the pattern
 col_match <- col_matches[[i]]
 if (length(col_match) > 0) {
 # choose only samples with one of the two factors for this column
 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
 predictor <- smpl_metadata_facC[chosen_samples]
 # extract only the significantly-varying features and the chosen samples
 fully_significant   <- 1 == vrbl_metadata[,vrbl_metadata_col] * vrbl_metadata[,intersample_sig_col]
-overall_significant <- 1 == vrbl_metadata[,intersample_sig_col]
 col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ]
 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
 my_cor_cov <- do_detail_plot(
 x_dataMatrix  = my_matrix
 , x_predictor   = predictor
 )
 if ( is.null(my_cor_cov) ) {
 progress_action("NOTHING TO PLOT.")
 } else {
 tsv <- my_cor_cov$tsv1
-tsv$salientLevel <- salient_level_lookup(tsv$featureID)
+# tsv$salientLevel <- salient_level_lookup(tsv$featureID)
-tsv$salientRCV   <- salient_rcv_lookup(tsv$featureID)
+# tsv$salientRCV   <- salient_rcv_lookup(tsv$featureID)
-tsv$salience     <- salience_lookup(tsv$featureID)
+# tsv$salience     <- salience_lookup(tsv$featureID)
 tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ]
 corcov_tsv_action(tsv)
 did_plot <- TRUE
 }
 }
 }
 } else { # tesC == "none"
-utils::combn(
+level_union <- unique(sort(smpl_metadata_facC))
-x = unique(sort(smpl_metadata_facC))
+if ( length(level_union) > 1 ) {
-, m = 2
+## pass 1 - contrast each selected level with all other levels combined into one "super-level" ##
-, FUN = function(x) {
+completed <- c()
-fctr_lvl_1 <- x[1]
+lapply(
-fctr_lvl_2 <- x[2]
+X = level_union
-progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
+, FUN = function(x) {
-chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
+fctr_lvl_1 <- x[1]
-if (length(unique(chosen_samples)) < 1) {
+fctr_lvl_2 <- {
-progress_action("NOTHING TO PLOT...")
+if ( fctr_lvl_1 %in% completed )
-} else {
+return("DUMMY")
-predictor <- smpl_metadata_facC[chosen_samples]
+# strF(completed)
-my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
+completed <<- c(completed, fctr_lvl_1)
-# only process this column if both factors are members of lvlCSV
+setdiff(level_union, fctr_lvl_1)
-is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
+}
-my_cor_cov <- do_detail_plot(
+chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
-x_dataMatrix  = my_matrix
+fctr_lvl_2 <- "other"
-, x_predictor   = predictor
+# print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) )
-, x_is_match    = is_match
+progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
-, x_algorithm   = algoC
+if (length(unique(chosen_samples)) < 1) {
-, x_prefix      = "Features"
+progress_action("NOTHING TO PLOT...")
-, x_show_labels = labelFeatures
-, x_progress    = progress_action
-, x_env         = calc_env
-)
-if ( is.null(my_cor_cov) ) {
-progress_action("NOTHING TO PLOT")
 } else {
-tsv <- my_cor_cov$tsv1
+chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
-tsv$salientLevel <- salient_level_lookup(tsv$featureID)
+predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 )
-tsv$salientRCV   <- salient_rcv_lookup(tsv$featureID)
+my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
-tsv$salience     <- salience_lookup(tsv$featureID)
+# only process this column if both factors are members of lvlCSV
-corcov_tsv_action(tsv)
+is_match <- isLevelSelected(fctr_lvl_1)
-did_plot <<- TRUE
+my_cor_cov <- do_detail_plot(
+x_dataMatrix  = my_matrix
+, x_predictor   = predictor
+, x_is_match    = is_match
+, x_algorithm   = algoC
+, x_prefix      = "Features"
+, x_show_labels = labelFeatures
+, x_progress    = progress_action
+, x_env         = calc_env
+)
+if ( is.null(my_cor_cov) ) {
+progress_action("NOTHING TO PLOT")
+} else {
+tsv <- my_cor_cov$tsv1
+# tsv$salientLevel <- salient_level_lookup(tsv$featureID)
+# tsv$salientRCV   <- salient_rcv_lookup(tsv$featureID)
+# tsv$salience     <- salience_lookup(tsv$featureID)
+corcov_tsv_action(tsv)
+did_plot <<- TRUE
+}
 }
+#print("baz")
+"dummy" # need to return a value; otherwise combn fails with an error
 }
-#print("baz")
+)
-"dummy" # need to return a value; otherwise combn fails with an error
+## pass 2 - contrast each selected level with each of the other levels individually ##
-}
+completed <- c()
-)
+utils::combn(
+x = level_union
+, m = 2
+, FUN = function(x) {
+fctr_lvl_1 <- x[1]
+fctr_lvl_2 <- x[2]
+chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
+# print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) )
+progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
+if (length(unique(chosen_samples)) < 1) {
+progress_action("NOTHING TO PLOT...")
+} else {
+chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
+predictor <- chosen_facC
+my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
+# only process this column if both factors are members of lvlCSV
+is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
+my_cor_cov <- do_detail_plot(
+x_dataMatrix  = my_matrix
+, x_predictor   = predictor
+, x_is_match    = is_match
+, x_algorithm   = algoC
+, x_prefix      = "Features"
+, x_show_labels = labelFeatures
+, x_progress    = progress_action
+, x_env         = calc_env
+)
+if ( is.null(my_cor_cov) ) {
+progress_action("NOTHING TO PLOT")
+} else {
+tsv <- my_cor_cov$tsv1
+# tsv$salientLevel <- salient_level_lookup(tsv$featureID)
+# tsv$salientRCV   <- salient_rcv_lookup(tsv$featureID)
+# tsv$salience     <- salience_lookup(tsv$featureID)
+corcov_tsv_action(tsv)
+did_plot <<- TRUE
+}
+}
+#print("baz")
+"dummy" # need to return a value; otherwise combn fails with an error
+}
+)
+} else {
+progress_action("NOTHING TO PLOT....")
+}
 }
 if (!did_plot) {
 failure_action(sprintf("bad parameter!  sampleMetadata must have at least two levels of factor '%s' matching '%s'", facC, originalLevCSV))
 return ( FALSE )
 }

Mercurial > repos > eschen42 > w4mcorcov

comparison w4mcorcov_calc.R @ 1:e25fd8a13665 draft