w4mcorcov: w4mcorcov_calc.R comparison

comparison w4mcorcov_calc.R @ 2:a06344808ffc draft

planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit ce5178ce51b80f242d24db555044e6afc530ac99

author	eschen42
date	Sat, 11 Nov 2017 00:08:20 -0500
parents	e25fd8a13665
children	61935618f92c

comparison

equal deleted inserted replaced

-:e25fd8a13665
+:a06344808ffc
 }
 #### OPLS-DA
 algoC <- "nipals"
-do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_progress = print, x_env) {
+do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_show_loado_labels, x_progress = print, x_env) {
-off <- function(x) if (x_show_labels) x else 0
+off <- function(x) if (x_show_labels == "0") 0 else x
-salience_lookup <- x_env$salience_lookup
-salient_rcv_lookup <- x_env$salient_rcv_lookup
-# x_progress("head(salience_df): ", head(salience_df))
-# x_progress("head(salience): ", head(salience))
 if (x_is_match && ncol(x_dataMatrix) > 0 && length(unique(x_predictor))> 1) {
 my_oplsda <- opls(
 x      = x_dataMatrix
 , y      = x_predictor
 , algoC  = x_algorithm
 max_x <- max(covariance)
 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs))
 covariance <- covariance / lim_x
 lim_x <- 1.2
 main_label <- sprintf("%s for levels %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2)
-# print("main_label")
-# print(main_label)
 main_cex <- min(1.0, 46.0/nchar(main_label))
 # "It is generally accepted that a variable should be selected if vj>1, [27–29],
 #   but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]."
 #   (Mehmood 2012 doi:10.1186/1748-7188-6-27)
 vipco <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83)))
 alpha <- 0.1 + 0.4 * vipco
-red  <- as.numeric(correlation < 0) * vipco
+red  <- as.numeric(correlation > 0) * vipco
-blue <- as.numeric(correlation > 0) * vipco
+blue <- as.numeric(correlation < 0) * vipco
-minus_cor <- -correlation
+plus_cor <- correlation
-minus_cov <- -covariance
+plus_cov <- covariance
-# cex <- salience_lookup(feature = names(minus_cor))
-# cex <- 0.25 + (1.25 * cex / max(cex))
 cex <- 0.75
 plot(
-y = minus_cor
+y = plus_cor
-, x = minus_cov
+, x = plus_cov
 , type="p"
-, xlim=c(-lim_x, lim_x + off(0.1))
+, xlim=c(-lim_x, lim_x + off(0.2))
-, ylim=c(-1.0 - off(0.1), 1.0)
+, ylim=c(-1.0 - off(0.2), 1.0)
 , xlab = sprintf("relative covariance(feature,t1)")
 , ylab = sprintf("correlation(feature,t1)")
 , main = main_label
 , cex.main = main_cex
 , cex = cex
 , pch = 16
 , col = rgb(blue = blue, red = red, green = 0, alpha = alpha)
 )
 low_x <- -0.7 * lim_x
 high_x <- 0.7 * lim_x
-text(x = low_x, y = -0.15, labels =  fctr_lvl_1)
+text(x = low_x, y = -0.05, labels =  fctr_lvl_1, col = "blue")
-text(x = high_x, y = 0.15, labels =  fctr_lvl_2)
+text(x = high_x, y = 0.05, labels =  fctr_lvl_2, col = "red")
-if (x_show_labels) {
+if ( x_show_labels != "0" ) {
+my_loadp <- loadp
+my_loado <- loado
+names(my_loadp) <- tsv1$featureID
+names(my_loado) <- tsv1$featureID
+if ( x_show_labels == "ALL" ) {
+n_labels <- length(loadp)
+} else {
+n_labels <- as.numeric(x_show_labels)
+}
+n_labels <- min( n_labels, (1 + length(loadp)) / 2 )
+labels_to_show <- c(
+names(head(sort(my_loadp),n = n_labels))
+, names(tail(sort(my_loadp),n = n_labels))
+)
+if ( x_show_loado_labels ) {
+labels_to_show <- c(
+labels_to_show
+, names(head(sort(my_loado),n = n_labels))
+, names(tail(sort(my_loado),n = n_labels))
+)
+}
+labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" ))
 text(
-y = minus_cor - 0.013
+y = plus_cor - 0.013
-, x = minus_cov + 0.020
+, x = plus_cov + 0.020
 , cex = 0.3
-, labels = tsv1$featureID
+, labels = labels
 , col = rgb(blue = blue, red = red, green = 0, alpha = 0.2 + 0.8 * alpha)
 , srt = -30 # slant 30 degrees downward
 , adj = 0   # left-justified
 )
 }
 # extract the levels from the environment
 originalLevCSV <- levCSV <- calc_env$levCSV
 # matchingC is one of { "none", "wildcard", "regex" }
 matchingC <- calc_env$matchingC
 labelFeatures <- calc_env$labelFeatures
+labelOrthoFeatures <- calc_env$labelOrthoFeatures
 # arg/env checking
 if (!(facC %in% names(smpl_metadata))) {
 failure_action(sprintf("bad parameter!  Factor name '%s' not found in sampleMetadata", facC))
 return ( FALSE )
 }
+mz             <- vrbl_metadata$mz
+names(mz)      <- vrbl_metadata$variableMetadata
+mz_lookup      <- function(feature) unname(mz[feature])
+rt             <- vrbl_metadata$rt
+names(rt)      <- vrbl_metadata$variableMetadata
+rt_lookup      <- function(feature) unname(rt[feature])
 # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv)
 salience_df <- calc_env$salience_df <- w4msalience(
 data_matrix    = data_matrix
 , sample_class   = smpl_metadata[,facC]
 , failure_action = failure_action
 my_df <- data.frame(
 featureID    = salience_df$feature
 , salientLevel = salience_df$max_level
 , salientRCV   = salience_df$salient_rcv
 , salience     = salience_df$salience
+, mz           = mz_lookup(salience_df$feature)
+, rt           = rt_lookup(salience_df$feature)
 )
 my_df[order(-my_df$salience),]
 })
-salience             <- salience_df$salience
-names(salience)      <- salience_df$feature
-salience_lookup      <- calc_env$salience_lookup <- function(feature) unname(salience[feature])
-salient_rcv          <- salience_df$salient_rcv
-names(salient_rcv)   <- salience_df$feature
-salient_rcv_lookup   <- calc_env$salient_rcv_lookup <- function(feature) unname(salient_rcv[feature])
-salient_level        <- salience_df$max_level
-names(salient_level) <- salience_df$feature
-salient_level_lookup <- calc_env$salient_level_lookup <- function(feature) unname(salient_level[feature])
 # transform wildcards to regexen
 if (matchingC == "wildcard") {
 # strsplit(x = "hello,wild,world", split = ",")
 levCSV <- gsub("[.]", "[.]", levCSV)
 levCSV <- strsplit(x = levCSV, split = ",")
 level_union <- c(level_union, col_match[2], col_match[3])
 }
 }
 }
 level_union <- unique(sort(level_union))
-overall_significant <- 1 == vrbl_metadata[,intersample_sig_col]
+overall_significant <- 1 == ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE )
-if ( length(level_union) > 1 ) {
+if ( length(level_union) > 2 ) {
 chosen_samples <- smpl_metadata_facC %in% level_union
 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
 col_selector <- vrbl_metadata_names[ overall_significant ]
 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
-fctr_lvl_2 <- "other"
+plot_action <- function(fctr_lvl_1, fctr_lvl_2) {
-for ( fctr_lvl_1 in level_union[1:length(level_union)] ) {
 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
 predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 )
 my_cor_cov <- do_detail_plot(
 x_dataMatrix  = my_matrix
 , x_predictor   = predictor
 , x_is_match    = is_match
 , x_algorithm   = algoC
 , x_prefix      = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features"
 , x_show_labels = labelFeatures
+, x_show_loado_labels = labelOrthoFeatures
 , x_progress    = progress_action
 , x_env         = calc_env
 )
 if ( is.null(my_cor_cov) ) {
 progress_action("NOTHING TO PLOT.")
 } else {
-tsv <- my_cor_cov$tsv1
+my_tsv <- my_cor_cov$tsv1
-# tsv$salientLevel <- salient_level_lookup(tsv$featureID)
+my_tsv$mz <- mz_lookup(my_tsv$featureID)
-# tsv$salientRCV   <- salient_rcv_lookup(tsv$featureID)
+my_tsv$rt <- rt_lookup(my_tsv$featureID)
-# tsv$salience     <- salience_lookup(tsv$featureID)
+my_tsv["level1Level2Sig"] <- vrbl_metadata[ match(my_tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ]
-tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ]
+tsv <<- my_tsv
 corcov_tsv_action(tsv)
-did_plot <- TRUE
+did_plot <<- TRUE
 }
 }
-}
+if ( length(level_union) != 2 ) {
+fctr_lvl_2 <- "other"
-## next, contrast each selected level with each of the other levels individually ##
+for ( fctr_lvl_1 in level_union[1:length(level_union)] ) {
-# process columns matching the pattern
+plot_action(fctr_lvl_1, fctr_lvl_2)
-for (i in 1:length(col_matches)) {
+}
-# for each potential match of the pattern
+} else {
-col_match <- col_matches[[i]]
+plot_action(fctr_lvl_1 = level_union[1], fctr_lvl_2 = level_union[2])
-if (length(col_match) > 0) {
+}
-# it's an actual match; extract the pieces, e.g., k10_kruskal_k4.k3_sig
+}
-vrbl_metadata_col <- col_match[1]               # ^^^^^^^^^^^^^^^^^^^^^  # Column name
-fctr_lvl_1        <- col_match[2]               #             ^^         # Factor-level 1
+if ( length(level_union) > 1 ) {
-fctr_lvl_2        <- col_match[3]               #                ^^      # Factor-level 2
+## next, contrast each selected level with each of the other levels individually ##
-# only process this column if both factors are members of lvlCSV
+# process columns matching the pattern
-is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
+for (i in 1:length(col_matches)) {
-progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
+# for each potential match of the pattern
-# TODO delete next line displaying currently-processed column
+col_match <- col_matches[[i]]
-# cat(sprintf("%s %s %s %s\n", vrbl_metadata_col, fctr_lvl_1, fctr_lvl_2, is_match))
+if (length(col_match) > 0) {
-# choose only samples with one of the two factors for this column
+# it's an actual match; extract the pieces, e.g., k10_kruskal_k4.k3_sig
-chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
+vrbl_metadata_col <- col_match[1]               # ^^^^^^^^^^^^^^^^^^^^^  # Column name
-predictor <- smpl_metadata_facC[chosen_samples]
+fctr_lvl_1        <- col_match[2]               #             ^^         # Factor-level 1
-# extract only the significantly-varying features and the chosen samples
+fctr_lvl_2        <- col_match[3]               #                ^^      # Factor-level 2
-fully_significant   <- 1 == vrbl_metadata[,vrbl_metadata_col] * vrbl_metadata[,intersample_sig_col]
+# only process this column if both factors are members of lvlCSV
-col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ]
+is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
-my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
+progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
-my_cor_cov <- do_detail_plot(
+# TODO delete next line displaying currently-processed column
-x_dataMatrix  = my_matrix
+# cat(sprintf("%s %s %s %s\n", vrbl_metadata_col, fctr_lvl_1, fctr_lvl_2, is_match))
-, x_predictor   = predictor
+# choose only samples with one of the two factors for this column
-, x_is_match    = is_match
+chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
-, x_algorithm   = algoC
+predictor <- smpl_metadata_facC[chosen_samples]
-, x_prefix      = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features"
+# extract only the significantly-varying features and the chosen samples
-, x_show_labels = labelFeatures
+fully_significant   <- 1 == vrbl_metadata[,vrbl_metadata_col] * ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE )
-, x_progress    = progress_action
+col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ]
-, x_env         = calc_env
+my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
-)
+my_cor_cov <- do_detail_plot(
-if ( is.null(my_cor_cov) ) {
+x_dataMatrix  = my_matrix
-progress_action("NOTHING TO PLOT.")
+, x_predictor   = predictor
-} else {
+, x_is_match    = is_match
-tsv <- my_cor_cov$tsv1
+, x_algorithm   = algoC
-# tsv$salientLevel <- salient_level_lookup(tsv$featureID)
+, x_prefix      = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features"
-# tsv$salientRCV   <- salient_rcv_lookup(tsv$featureID)
+, x_show_labels = labelFeatures
-# tsv$salience     <- salience_lookup(tsv$featureID)
+, x_show_loado_labels = labelOrthoFeatures
-tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ]
+, x_progress    = progress_action
-corcov_tsv_action(tsv)
+, x_env         = calc_env
-did_plot <- TRUE
+)
+if ( is.null(my_cor_cov) ) {
+progress_action("NOTHING TO PLOT.")
+} else {
+tsv <- my_cor_cov$tsv1
+tsv$mz <- mz_lookup(tsv$featureID)
+tsv$rt <- rt_lookup(tsv$featureID)
+tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ]
+corcov_tsv_action(tsv)
+did_plot <- TRUE
+}
 }
 }
 }
 } else { # tesC == "none"
 level_union <- unique(sort(smpl_metadata_facC))
 if ( length(level_union) > 1 ) {
-## pass 1 - contrast each selected level with all other levels combined into one "super-level" ##
+if ( length(level_union) > 2 ) {
-completed <- c()
+## pass 1 - contrast each selected level with all other levels combined into one "super-level" ##
-lapply(
+completed <- c()
-X = level_union
+lapply(
-, FUN = function(x) {
+X = level_union
-fctr_lvl_1 <- x[1]
+, FUN = function(x) {
-fctr_lvl_2 <- {
+fctr_lvl_1 <- x[1]
-if ( fctr_lvl_1 %in% completed )
+fctr_lvl_2 <- {
-return("DUMMY")
+if ( fctr_lvl_1 %in% completed )
-# strF(completed)
+return("DUMMY")
-completed <<- c(completed, fctr_lvl_1)
+# strF(completed)
-setdiff(level_union, fctr_lvl_1)
+completed <<- c(completed, fctr_lvl_1)
+setdiff(level_union, fctr_lvl_1)
+}
+chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
+fctr_lvl_2 <- "other"
+# print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) )
+progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
+if (length(unique(chosen_samples)) < 1) {
+progress_action("NOTHING TO PLOT...")
+} else {
+chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
+predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 )
+my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
+# only process this column if both factors are members of lvlCSV
+is_match <- isLevelSelected(fctr_lvl_1)
+my_cor_cov <- do_detail_plot(
+x_dataMatrix  = my_matrix
+, x_predictor   = predictor
+, x_is_match    = is_match
+, x_algorithm   = algoC
+, x_prefix      = "Features"
+, x_show_labels = labelFeatures
+, x_show_loado_labels = labelOrthoFeatures
+, x_progress    = progress_action
+, x_env         = calc_env
+)
+if ( is.null(my_cor_cov) ) {
+progress_action("NOTHING TO PLOT")
+} else {
+tsv <- my_cor_cov$tsv1
+tsv$mz <- mz_lookup(tsv$featureID)
+tsv$rt <- rt_lookup(tsv$featureID)
+corcov_tsv_action(tsv)
+did_plot <<- TRUE
+}
+}
+#print("baz")
+"dummy" # need to return a value; otherwise combn fails with an error
 }
-chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
+)
-fctr_lvl_2 <- "other"
+}
-# print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) )
-progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
-if (length(unique(chosen_samples)) < 1) {
-progress_action("NOTHING TO PLOT...")
-} else {
-chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
-predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 )
-my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
-# only process this column if both factors are members of lvlCSV
-is_match <- isLevelSelected(fctr_lvl_1)
-my_cor_cov <- do_detail_plot(
-x_dataMatrix  = my_matrix
-, x_predictor   = predictor
-, x_is_match    = is_match
-, x_algorithm   = algoC
-, x_prefix      = "Features"
-, x_show_labels = labelFeatures
-, x_progress    = progress_action
-, x_env         = calc_env
-)
-if ( is.null(my_cor_cov) ) {
-progress_action("NOTHING TO PLOT")
-} else {
-tsv <- my_cor_cov$tsv1
-# tsv$salientLevel <- salient_level_lookup(tsv$featureID)
-# tsv$salientRCV   <- salient_rcv_lookup(tsv$featureID)
-# tsv$salience     <- salience_lookup(tsv$featureID)
-corcov_tsv_action(tsv)
-did_plot <<- TRUE
-}
-}
-#print("baz")
-"dummy" # need to return a value; otherwise combn fails with an error
-}
-)
 ## pass 2 - contrast each selected level with each of the other levels individually ##
 completed <- c()
 utils::combn(
 x = level_union
 , m = 2
 , x_predictor   = predictor
 , x_is_match    = is_match
 , x_algorithm   = algoC
 , x_prefix      = "Features"
 , x_show_labels = labelFeatures
+, x_show_loado_labels = labelOrthoFeatures
 , x_progress    = progress_action
 , x_env         = calc_env
 )
 if ( is.null(my_cor_cov) ) {
 progress_action("NOTHING TO PLOT")
 } else {
 tsv <- my_cor_cov$tsv1
-# tsv$salientLevel <- salient_level_lookup(tsv$featureID)
+tsv$mz <- mz_lookup(tsv$featureID)
-# tsv$salientRCV   <- salient_rcv_lookup(tsv$featureID)
+tsv$rt <- rt_lookup(tsv$featureID)
-# tsv$salience     <- salience_lookup(tsv$featureID)
 corcov_tsv_action(tsv)
 did_plot <<- TRUE
 }
 }
 #print("baz")
 # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014
 #    Length = number of features; labels = feature identifiers.  (The same is true for $correlation and $covariance.)
 result$vip4p     <- as.numeric(ropls_x@vipVn)
 result$vip4o     <- as.numeric(ropls_x@orthoVipVn)
+result$loadp     <- as.numeric(ropls_x@loadingMN)
+result$loado     <- as.numeric(ropls_x@orthoLoadingMN)
 # get the level names
 level_names      <- sort(levels(as.factor(ropls_x@suppLs$y)))
+fctr_lvl_1       <- level_names[1]
+fctr_lvl_2       <- level_names[2]
 feature_count    <- length(ropls_x@vipVn)
-result$level1    <- rep.int(x = level_names[1], times = feature_count)
+result$level1    <- rep.int(x = fctr_lvl_1, times = feature_count)
-result$level2    <- rep.int(x = level_names[2], times = feature_count)
+result$level2    <- rep.int(x = fctr_lvl_2, times = feature_count)
 # print(sprintf("sd(covariance) = %f; sd(correlation) = %f", sd(result$covariance), sd(result$correlation)))
 superresult <- list()
 if (length(result$vip4o) == 0) result$vip4o <- NA
+greaterLevel <- sapply( X = result$correlation, FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 )
 superresult$tsv1 <- data.frame(
 featureID           = names(ropls_x@vipVn)
 , factorLevel1        = result$level1
 , factorLevel2        = result$level2
+, greaterLevel        = greaterLevel
 , correlation         = result$correlation
 , covariance          = result$covariance
 , vip4p               = result$vip4p
 , vip4o               = result$vip4o
+, loadp               = result$loadp
+, loado               = result$loado
 , row.names           = NULL
 )
 rownames(superresult$tsv1) <- superresult$tsv1$featureID
 superresult$covariance <- result$covariance
 superresult$correlation <- result$correlation
 superresult$vip4p <- result$vip4p
 superresult$vip4o <- result$vip4o
+superresult$loadp <- result$loadp
+superresult$loado <- result$loado
 superresult$details <- result
 # #print(superresult$tsv1)
 result$superresult <- superresult
 # Include thise in case future consumers of this routine want to use it in currently unanticipated ways
 result$oplsda    <- ropls_x

Mercurial > repos > eschen42 > w4mcorcov

comparison w4mcorcov_calc.R @ 2:a06344808ffc draft