Mercurial > repos > eschen42 > w4mcorcov
diff w4mcorcov_calc.R @ 1:e25fd8a13665 draft
planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit bd26542b811de06c1a877337a2840a9f899c2b94
author | eschen42 |
---|---|
date | Mon, 16 Oct 2017 09:18:29 -0400 |
parents | 50a07adddfbd |
children | a06344808ffc |
line wrap: on
line diff
--- a/w4mcorcov_calc.R Mon Oct 09 15:46:13 2017 -0400 +++ b/w4mcorcov_calc.R Mon Oct 16 09:18:29 2017 -0400 @@ -42,19 +42,18 @@ # print("main_label") # print(main_label) main_cex <- min(1.0, 46.0/nchar(main_label)) - # " It is generally accepted that a variable should be selected if vj>1, [27–29], but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." (Mehmood 2012 doi:10.1016/j.chemolab.2004.12.011) + # "It is generally accepted that a variable should be selected if vj>1, [27–29], + # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." + # (Mehmood 2012 doi:10.1186/1748-7188-6-27) vipco <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) alpha <- 0.1 + 0.4 * vipco red <- as.numeric(correlation < 0) * vipco blue <- as.numeric(correlation > 0) * vipco minus_cor <- -correlation minus_cov <- -covariance - # x_progress("head(names(minus_cor)): ", head(names(minus_cor))) - cex <- salience_lookup(feature = names(minus_cor)) - # x_progress("head(cex.1): ", head(cex)) - # cex <- 0.25 + (0.75 * cex / max(cex)) - cex <- 0.25 + (1.25 * cex / max(cex)) - # x_progress("head(cex.2): ", head(cex)) + # cex <- salience_lookup(feature = names(minus_cor)) + # cex <- 0.25 + (1.25 * cex / max(cex)) + cex <- 0.75 plot( y = minus_cor , x = minus_cov @@ -129,15 +128,19 @@ } # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510 -corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = NULL) { +corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = function(t){}, salience_tsv_action = function(t){}) { if ( ! is.environment(calc_env) ) { failure_action("corcov_calc: fatal error - 'calc_env' is not an environment") return ( FALSE ) } - if ( is.null(corcov_tsv_action) || ! is.function(corcov_tsv_action) ) { + if ( ! is.function(corcov_tsv_action) ) { failure_action("corcov_calc: fatal error - 'corcov_tsv_action' is not a function") return ( FALSE ) } + if ( ! is.function(salience_tsv_action) ) { + failure_action("salience_calc: fatal error - 'salience_tsv_action' is not a function") + return ( FALSE ) + } # extract parameters from the environment vrbl_metadata <- calc_env$vrbl_metadata @@ -165,6 +168,15 @@ , sample_class = smpl_metadata[,facC] , failure_action = failure_action ) + salience_tsv_action({ + my_df <- data.frame( + featureID = salience_df$feature + , salientLevel = salience_df$max_level + , salientRCV = salience_df$salient_rcv + , salience = salience_df$salience + ) + my_df[order(-my_df$salience),] + }) salience <- salience_df$salience names(salience) <- salience_df$feature salience_lookup <- calc_env$salience_lookup <- function(feature) unname(salience[feature]) @@ -231,7 +243,7 @@ if (tesC != "none") { # for each column name, extract the parts of the name matched by 'col_pattern', if any the_colnames <- colnames(vrbl_metadata) - if (!Reduce(f = "||", x = grepl(tesC, the_colnames))) { + if ( ! Reduce( f = "||", x = grepl(tesC, the_colnames) ) ) { failure_action(sprintf("bad parameter! variableMetadata must contain results of W4M Univariate test '%s'.", tesC)) return ( FALSE ) } @@ -241,6 +253,59 @@ regmatches( x, regexec(col_pattern, x) )[[1]] } ) + ## first contrast each selected level with all other levels combined into one "super-level" ## + # process columns matching the pattern + level_union <- c() + for (i in 1:length(col_matches)) { + col_match <- col_matches[[i]] + if (length(col_match) > 0) { + # it's an actual match; extract the pieces, e.g., k10_kruskal_k4.k3_sig + vrbl_metadata_col <- col_match[1] # ^^^^^^^^^^^^^^^^^^^^^ # Column name + fctr_lvl_1 <- col_match[2] # ^^ # Factor-level 1 + fctr_lvl_2 <- col_match[3] # ^^ # Factor-level 2 + # only process this column if both factors are members of lvlCSV + is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) + if (is_match) { + level_union <- c(level_union, col_match[2], col_match[3]) + } + } + } + level_union <- unique(sort(level_union)) + overall_significant <- 1 == vrbl_metadata[,intersample_sig_col] + if ( length(level_union) > 1 ) { + chosen_samples <- smpl_metadata_facC %in% level_union + chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) + col_selector <- vrbl_metadata_names[ overall_significant ] + my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] + fctr_lvl_2 <- "other" + for ( fctr_lvl_1 in level_union[1:length(level_union)] ) { + progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) + predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 ) + my_cor_cov <- do_detail_plot( + x_dataMatrix = my_matrix + , x_predictor = predictor + , x_is_match = is_match + , x_algorithm = algoC + , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" + , x_show_labels = labelFeatures + , x_progress = progress_action + , x_env = calc_env + ) + if ( is.null(my_cor_cov) ) { + progress_action("NOTHING TO PLOT.") + } else { + tsv <- my_cor_cov$tsv1 + # tsv$salientLevel <- salient_level_lookup(tsv$featureID) + # tsv$salientRCV <- salient_rcv_lookup(tsv$featureID) + # tsv$salience <- salience_lookup(tsv$featureID) + tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] + corcov_tsv_action(tsv) + did_plot <- TRUE + } + } + } + + ## next, contrast each selected level with each of the other levels individually ## # process columns matching the pattern for (i in 1:length(col_matches)) { # for each potential match of the pattern @@ -260,7 +325,6 @@ predictor <- smpl_metadata_facC[chosen_samples] # extract only the significantly-varying features and the chosen samples fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * vrbl_metadata[,intersample_sig_col] - overall_significant <- 1 == vrbl_metadata[,intersample_sig_col] col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ] my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] my_cor_cov <- do_detail_plot( @@ -277,9 +341,9 @@ progress_action("NOTHING TO PLOT.") } else { tsv <- my_cor_cov$tsv1 - tsv$salientLevel <- salient_level_lookup(tsv$featureID) - tsv$salientRCV <- salient_rcv_lookup(tsv$featureID) - tsv$salience <- salience_lookup(tsv$featureID) + # tsv$salientLevel <- salient_level_lookup(tsv$featureID) + # tsv$salientRCV <- salient_rcv_lookup(tsv$featureID) + # tsv$salience <- salience_lookup(tsv$featureID) tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] corcov_tsv_action(tsv) did_plot <- TRUE @@ -287,46 +351,105 @@ } } } else { # tesC == "none" - utils::combn( - x = unique(sort(smpl_metadata_facC)) - , m = 2 - , FUN = function(x) { - fctr_lvl_1 <- x[1] - fctr_lvl_2 <- x[2] - progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) - chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) - if (length(unique(chosen_samples)) < 1) { - progress_action("NOTHING TO PLOT...") - } else { - predictor <- smpl_metadata_facC[chosen_samples] - my_matrix <- scdm[ chosen_samples, , drop = FALSE ] - # only process this column if both factors are members of lvlCSV - is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) - my_cor_cov <- do_detail_plot( - x_dataMatrix = my_matrix - , x_predictor = predictor - , x_is_match = is_match - , x_algorithm = algoC - , x_prefix = "Features" - , x_show_labels = labelFeatures - , x_progress = progress_action - , x_env = calc_env - ) - if ( is.null(my_cor_cov) ) { - progress_action("NOTHING TO PLOT") + level_union <- unique(sort(smpl_metadata_facC)) + if ( length(level_union) > 1 ) { + ## pass 1 - contrast each selected level with all other levels combined into one "super-level" ## + completed <- c() + lapply( + X = level_union + , FUN = function(x) { + fctr_lvl_1 <- x[1] + fctr_lvl_2 <- { + if ( fctr_lvl_1 %in% completed ) + return("DUMMY") + # strF(completed) + completed <<- c(completed, fctr_lvl_1) + setdiff(level_union, fctr_lvl_1) + } + chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) + fctr_lvl_2 <- "other" + # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) ) + progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) + if (length(unique(chosen_samples)) < 1) { + progress_action("NOTHING TO PLOT...") } else { - tsv <- my_cor_cov$tsv1 - tsv$salientLevel <- salient_level_lookup(tsv$featureID) - tsv$salientRCV <- salient_rcv_lookup(tsv$featureID) - tsv$salience <- salience_lookup(tsv$featureID) - corcov_tsv_action(tsv) - did_plot <<- TRUE + chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) + predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 ) + my_matrix <- scdm[ chosen_samples, , drop = FALSE ] + # only process this column if both factors are members of lvlCSV + is_match <- isLevelSelected(fctr_lvl_1) + my_cor_cov <- do_detail_plot( + x_dataMatrix = my_matrix + , x_predictor = predictor + , x_is_match = is_match + , x_algorithm = algoC + , x_prefix = "Features" + , x_show_labels = labelFeatures + , x_progress = progress_action + , x_env = calc_env + ) + if ( is.null(my_cor_cov) ) { + progress_action("NOTHING TO PLOT") + } else { + tsv <- my_cor_cov$tsv1 + # tsv$salientLevel <- salient_level_lookup(tsv$featureID) + # tsv$salientRCV <- salient_rcv_lookup(tsv$featureID) + # tsv$salience <- salience_lookup(tsv$featureID) + corcov_tsv_action(tsv) + did_plot <<- TRUE + } } + #print("baz") + "dummy" # need to return a value; otherwise combn fails with an error } - #print("baz") - "dummy" # need to return a value; otherwise combn fails with an error - } - ) + ) + ## pass 2 - contrast each selected level with each of the other levels individually ## + completed <- c() + utils::combn( + x = level_union + , m = 2 + , FUN = function(x) { + fctr_lvl_1 <- x[1] + fctr_lvl_2 <- x[2] + chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) + # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) ) + progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) + if (length(unique(chosen_samples)) < 1) { + progress_action("NOTHING TO PLOT...") + } else { + chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) + predictor <- chosen_facC + my_matrix <- scdm[ chosen_samples, , drop = FALSE ] + # only process this column if both factors are members of lvlCSV + is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) + my_cor_cov <- do_detail_plot( + x_dataMatrix = my_matrix + , x_predictor = predictor + , x_is_match = is_match + , x_algorithm = algoC + , x_prefix = "Features" + , x_show_labels = labelFeatures + , x_progress = progress_action + , x_env = calc_env + ) + if ( is.null(my_cor_cov) ) { + progress_action("NOTHING TO PLOT") + } else { + tsv <- my_cor_cov$tsv1 + # tsv$salientLevel <- salient_level_lookup(tsv$featureID) + # tsv$salientRCV <- salient_rcv_lookup(tsv$featureID) + # tsv$salience <- salience_lookup(tsv$featureID) + corcov_tsv_action(tsv) + did_plot <<- TRUE + } + } + #print("baz") + "dummy" # need to return a value; otherwise combn fails with an error + } + ) + } else { + progress_action("NOTHING TO PLOT....") + } } if (!did_plot) { failure_action(sprintf("bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'", facC, originalLevCSV))