Mercurial > repos > eschen42 > w4mcorcov
comparison w4mcorcov_calc.R @ 7:ca9938f2eb6a draft default tip
"planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit 5fd9687d543a48a715b1180caf93abebebd58b0e"
| author | eschen42 | 
|---|---|
| date | Tue, 17 Nov 2020 23:29:59 +0000 | 
| parents | 0b49916c5c52 | 
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 6:0b49916c5c52 | 7:ca9938f2eb6a | 
|---|---|
| 1 # center with 'colMeans()' - ref: http://gastonsanchez.com/visually-enforced/how-to/2014/01/15/Center-data-in-R/ | 1 # compute and output detail plots | 
| 2 center_colmeans <- function(x) { | |
| 3 xcenter = colMeans(x) | |
| 4 x - rep(xcenter, rep.int(nrow(x), ncol(x))) | |
| 5 } | |
| 6 | |
| 7 #### OPLS-DA | |
| 8 algoC <- "nipals" | |
| 9 | |
| 10 do_detail_plot <- function( | 2 do_detail_plot <- function( | 
| 11 x_dataMatrix | 3 x_dataMatrix | 
| 12 , x_predictor | 4 , x_predictor | 
| 13 , x_is_match | 5 , x_is_match | 
| 14 , x_algorithm | 6 , x_algorithm | 
| 19 , x_crossval_i | 11 , x_crossval_i | 
| 20 ) { | 12 ) { | 
| 21 off <- function(x) if (x_show_labels == "0") 0 else x | 13 off <- function(x) if (x_show_labels == "0") 0 else x | 
| 22 if ( x_is_match | 14 if ( x_is_match | 
| 23 && ncol(x_dataMatrix) > 0 | 15 && ncol(x_dataMatrix) > 0 | 
| 24 && length(unique(x_predictor))> 1 | 16 && length(unique(x_predictor)) > 1 | 
| 25 && x_crossval_i < nrow(x_dataMatrix) | 17 && x_crossval_i < nrow(x_dataMatrix) | 
| 26 ) { | 18 ) { | 
| 27 my_oplsda <- opls( | 19 my_oplsda <- opls( | 
| 28 x = x_dataMatrix | 20 x = x_dataMatrix | 
| 29 , y = x_predictor | 21 , y = x_predictor | 
| 30 , algoC = x_algorithm | 22 , algoC = x_algorithm | 
| 31 , predI = 1 | 23 , predI = 1 | 
| 32 , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0 | 24 , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0 | 
| 33 , printL = FALSE | 25 , fig.pdfC = 'none' | 
| 34 , plotL = FALSE | 26 , info.txtC = 'none' | 
| 35 , crossvalI = x_crossval_i | 27 , crossvalI = x_crossval_i | 
| 36 , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2. | 28 , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2. | 
| 37 ) | 29 ) | 
| 38 # strip out variables having negligible variance | 30 # strip out variables having negligible variance | 
| 39 x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE] | 31 x_dataMatrix <- x_dataMatrix[ , names(my_oplsda@vipVn), drop = FALSE] | 
| 40 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) | 32 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) | 
| 41 # x_progress(strF(x_dataMatrix)) | 33 | 
| 42 # x_progress(strF(my_oplsda)) | |
| 43 #x_progress(head(my_oplsda_suppLs_y_levels)) | |
| 44 #x_progress(unique(my_oplsda_suppLs_y_levels)) | |
| 45 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] | 34 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] | 
| 46 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] | 35 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] | 
| 47 do_s_plot <- function( | 36 do_s_plot <- function( | 
| 48 x_env | 37 x_env | 
| 49 , predictor_projection_x = TRUE | 38 , predictor_projection_x = TRUE | 
| 50 , cplot_x = FALSE | 39 , cplot_x = FALSE | 
| 51 , cor_vs_cov_x = NULL | 40 , cor_vs_cov_x = NULL | 
| 52 ) | 41 ) { | 
| 53 { | |
| 54 #print(ls(x_env)) # "cplot_y" etc | |
| 55 #print(str(x_env$cplot_y)) # chr "covariance" | |
| 56 if (cplot_x) { | 42 if (cplot_x) { | 
| 57 #print(x_env$cplot_y) # "covariance" | |
| 58 cplot_y_correlation <- (x_env$cplot_y == "correlation") | 43 cplot_y_correlation <- (x_env$cplot_y == "correlation") | 
| 59 #print(cplot_y_correlation) # FALSE | 44 default_lim_x <- 10 | 
| 45 } else { | |
| 46 default_lim_x <- 1.2 | |
| 60 } | 47 } | 
| 61 if (is.null(cor_vs_cov_x)) { | 48 if (is.null(cor_vs_cov_x)) { | 
| 62 my_cor_vs_cov <- cor_vs_cov( | 49 my_cor_vs_cov <- cor_vs_cov( | 
| 63 matrix_x = x_dataMatrix | 50 matrix_x = x_dataMatrix | 
| 64 , ropls_x = my_oplsda | 51 , ropls_x = my_oplsda | 
| 65 , predictor_projection_x = predictor_projection_x | 52 , predictor_projection_x = predictor_projection_x | 
| 66 , x_progress | 53 , x_progress | 
| 54 , x_env | |
| 67 ) | 55 ) | 
| 68 } else { | 56 } else { | 
| 69 my_cor_vs_cov <- cor_vs_cov_x | 57 my_cor_vs_cov <- cor_vs_cov_x | 
| 70 } | 58 } | 
| 71 # print("str(my_cor_vs_cov)") | 59 | 
| 72 # str(my_cor_vs_cov) | |
| 73 if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { | 60 if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { | 
| 74 if (is.null(cor_vs_cov_x)) { | 61 if (is.null(cor_vs_cov_x)) { | 
| 75 x_progress("No cor_vs_cov data produced") | 62 x_progress( | 
| 63 sprintf("No cor_vs_cov data produced for level %s versus %s", fctr_lvl_1, fctr_lvl_2) | |
| 64 ) | |
| 76 } | 65 } | 
| 77 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") | 66 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") | 
| 78 text(x=1, y=1, labels="too few covariance data") | 67 text(x=1, y=1, labels="too few covariance data") | 
| 79 return(my_cor_vs_cov) | 68 return(my_cor_vs_cov) | 
| 80 } | 69 } | 
| 82 my_cor_vs_cov | 71 my_cor_vs_cov | 
| 83 , { | 72 , { | 
| 84 min_x <- min(covariance, na.rm = TRUE) | 73 min_x <- min(covariance, na.rm = TRUE) | 
| 85 max_x <- max(covariance, na.rm = TRUE) | 74 max_x <- max(covariance, na.rm = TRUE) | 
| 86 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) | 75 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) | 
| 87 covariance <- covariance / lim_x | 76 | 
| 88 lim_x <- 1.2 | 77 # Regarding using VIP as a guide to selecting a biomarker: | 
| 89 # "It is generally accepted that a variable should be selected if vj>1, [27–29], | 78 # "It is generally accepted that a variable should be selected if vj>1, [27–29], | 
| 90 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." | 79 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." | 
| 91 # (Mehmood 2012 doi:10.1186/1748-7188-6-27) | 80 # (Mehmood 2012 doi:10.1186/1748-7188-6-27) | 
| 92 plus_cor <- correlation | 81 plus_cor <- correlation | 
| 93 plus_cov <- covariance | 82 plus_cov <- covariance | 
| 94 cex <- 0.65 | 83 if (length(plus_cor) != 0 && length(plus_cor) != 0) { | 
| 95 which_projection <- if (projection == 1) "t1" else "o1" | 84 cex <- 0.65 | 
| 96 which_loading <- if (projection == 1) "parallel" else "orthogonal" | 85 if (projection == 1) { | 
| 97 if (projection == 1) { # predictor-projection | 86 # predictor-projection | 
| 98 vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) | 87 vipcp <- pmax(0, pmin(1, (vip4p - 0.83) / (1.21 - 0.83))) | 
| 99 if (!cplot_x) { # S-plot predictor-projection | 88 if (!cplot_x) { | 
| 100 my_xlab <- "relative covariance(feature,t1)" | 89 # S-plot predictor-projection | 
| 101 my_x <- plus_cov | 90 my_xlab <- "covariance(feature,t1)" | 
| 102 my_ylab <- "correlation(feature,t1)" | 91 my_x <- plus_cov | 
| 103 my_y <- plus_cor | |
| 104 } else { # C-plot predictor-projection | |
| 105 my_xlab <- "variable importance in predictor-projection" | |
| 106 my_x <- vip4p | |
| 107 if (cplot_y_correlation) { | |
| 108 my_ylab <- "correlation(feature,t1)" | 92 my_ylab <- "correlation(feature,t1)" | 
| 109 my_y <- plus_cor | 93 my_y <- plus_cor | 
| 94 # X,Y limits for S-PLOT | |
| 95 my_xlim <- c( -lim_x, lim_x ) * (1.0 + off(0.3)) | |
| 96 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) ) | |
| 110 } else { | 97 } else { | 
| 111 my_ylab <- "relative covariance(feature,t1)" | 98 # C-plot predictor-projection | 
| 112 my_y <- plus_cov | 99 my_xlab <- "variable importance in predictor-projection" | 
| 100 my_x <- vip4p | |
| 101 if (cplot_y_correlation) { | |
| 102 my_ylab <- "correlation(feature,t1)" | |
| 103 my_y <- plus_cor | |
| 104 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) ) | |
| 105 } else { | |
| 106 my_ylab <- "covariance(feature,t1)" | |
| 107 my_y <- plus_cov | |
| 108 my_ylim <- max(abs(plus_cov)) | |
| 109 my_ylim <- c( -my_ylim, my_ylim ) * (1.0 + off(0.2) ) | |
| 110 } | |
| 111 # X,Y limits for C-plot | |
| 112 lim_x <- max(my_x, na.rm = TRUE) * 1.1 | |
| 113 lim_x <- min(lim_x, default_lim_x) | |
| 114 my_xlim <- c( 0, lim_x ) # + off(0.2) ) | |
| 113 } | 115 } | 
| 114 } | 116 my_load_distal <- loadp | 
| 115 if (cplot_x) { | 117 my_load_proximal <- loado | 
| 116 lim_x <- max(my_x, na.rm = TRUE) * 1.1 | 118 red <- as.numeric(correlation > 0) * vipcp | 
| 117 my_xlim <- c( 0, lim_x + off(0.2) ) | 119 blue <- as.numeric(correlation < 0) * vipcp | 
| 120 alpha <- 0.1 + 0.4 * vipcp | |
| 121 red[is.na(red)] <- 0 | |
| 122 blue[is.na(blue)] <- 0 | |
| 123 alpha[is.na(alpha)] <- 0 | |
| 124 my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha) | |
| 125 main_label <- sprintf("%s for level %s versus %s" | |
| 126 , x_prefix, fctr_lvl_1, fctr_lvl_2) | |
| 118 } else { | 127 } else { | 
| 119 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) | 128 # orthogonal projection | 
| 120 } | 129 vipco <- pmax(0, pmin(1, (vip4o - 0.83) / (1.21 - 0.83))) | 
| 121 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) | 130 if (!cplot_x) { | 
| 122 my_load_distal <- loadp | 131 # S-PLOT orthogonal-projection | 
| 123 my_load_proximal <- loado | 132 my_xlab <- "covariance(feature,to1)" | 
| 124 red <- as.numeric(correlation > 0) * vipcp | 133 my_x <- -plus_cov | 
| 125 blue <- as.numeric(correlation < 0) * vipcp | 134 # X,Y limits for S-PLOT | 
| 126 alpha <- 0.1 + 0.4 * vipcp | 135 my_xlim <- c( -lim_x, lim_x ) * (1.0 + off(0.3)) | 
| 127 red[is.na(red)] <- 0 | |
| 128 blue[is.na(blue)] <- 0 | |
| 129 alpha[is.na(alpha)] <- 0 | |
| 130 my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha) | |
| 131 main_label <- sprintf("%s for level %s versus %s" | |
| 132 , x_prefix, fctr_lvl_1, fctr_lvl_2) | |
| 133 } else { # orthogonal projection | |
| 134 vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83))) | |
| 135 if (!cplot_x) { | |
| 136 my_xlab <- "relative covariance(feature,to1)" | |
| 137 my_x <- -plus_cov | |
| 138 } else { | |
| 139 my_xlab <- "variable importance in orthogonal projection" | |
| 140 my_x <- vip4o | |
| 141 } | |
| 142 if (!cplot_x) { # S-plot orthogonal projection | |
| 143 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) | |
| 144 my_ylab <- "correlation(feature,to1)" | |
| 145 my_y <- plus_cor | |
| 146 } else { # C-plot orthogonal projection | |
| 147 lim_x <- max(my_x, na.rm = TRUE) * 1.1 | |
| 148 my_xlim <- c( 0, lim_x + off(0.2) ) | |
| 149 if (cplot_y_correlation) { | |
| 150 my_ylab <- "correlation(feature,to1)" | 136 my_ylab <- "correlation(feature,to1)" | 
| 151 my_y <- plus_cor | 137 my_y <- plus_cor | 
| 138 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) ) | |
| 152 } else { | 139 } else { | 
| 153 my_ylab <- "relative covariance(feature,to1)" | 140 # C-plot orthogonal-projection | 
| 154 my_y <- plus_cov | 141 my_xlab <- "variable importance in orthogonal projection" | 
| 142 my_x <- vip4o | |
| 143 # C-plot orthogonal projection | |
| 144 # X,Y limits for C-plot | |
| 145 lim_x <- max(my_x, na.rm = TRUE) * 1.1 | |
| 146 my_xlim <- c( 0, lim_x ) # + off(0.2) ) | |
| 147 if (cplot_y_correlation) { | |
| 148 my_ylab <- "correlation(feature,to1)" | |
| 149 my_y <- plus_cor | |
| 150 my_ylim <- c( -1.0, 1.0 ) * (1.0 + off(0.2) ) | |
| 151 } else { | |
| 152 my_ylab <- "covariance(feature,to1)" | |
| 153 my_y <- plus_cov | |
| 154 my_ylim <- max(abs(plus_cov)) | |
| 155 my_ylim <- c( -my_ylim, my_ylim ) * (1.0 + off(0.2) ) | |
| 156 } | |
| 155 } | 157 } | 
| 158 my_load_distal <- loado | |
| 159 my_load_proximal <- loadp | |
| 160 alpha <- 0.1 + 0.4 * vipco | |
| 161 alpha[is.na(alpha)] <- 0 | |
| 162 my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha) | |
| 163 main_label <- sprintf( | |
| 164 "Features influencing orthogonal projection for %s versus %s" | |
| 165 , fctr_lvl_1, fctr_lvl_2) | |
| 156 } | 166 } | 
| 157 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) | 167 main_cex <- min(1.0, 46.0/nchar(main_label)) | 
| 158 my_load_distal <- loado | 168 my_feature_label_slant <- -30 # slant feature labels 30 degrees downward | 
| 159 my_load_proximal <- loadp | 169 my_pch <- sapply(X = cor_p_value, function(x) if (x < 0.01) 16 else if (x < 0.05) 17 else 18) | 
| 160 alpha <- 0.1 + 0.4 * vipco | 170 if ( sum(is.infinite(my_xlim)) == 0 ) { | 
| 161 alpha[is.na(alpha)] <- 0 | 171 plot( | 
| 162 my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha) | 172 y = my_y | 
| 163 main_label <- sprintf( | 173 , x = my_x | 
| 164 "Features influencing orthogonal projection for %s versus %s" | 174 , type = "p" | 
| 165 , fctr_lvl_1, fctr_lvl_2) | 175 , xlim = my_xlim | 
| 166 } | 176 , ylim = my_ylim | 
| 167 main_cex <- min(1.0, 46.0/nchar(main_label)) | 177 , xlab = my_xlab | 
| 168 my_feature_label_slant <- -30 # slant feature labels 30 degrees downward | 178 , ylab = my_ylab | 
| 169 plot( | 179 , main = main_label | 
| 170 y = my_y | 180 , cex.main = main_cex | 
| 171 , x = my_x | 181 , cex = cex | 
| 172 , type = "p" | 182 , pch = my_pch | 
| 173 , xlim = my_xlim | 183 , col = my_col | 
| 174 , ylim = my_ylim | |
| 175 , xlab = my_xlab | |
| 176 , ylab = my_ylab | |
| 177 , main = main_label | |
| 178 , cex.main = main_cex | |
| 179 , cex = cex | |
| 180 , pch = 16 | |
| 181 , col = my_col | |
| 182 ) | |
| 183 low_x <- -0.7 * lim_x | |
| 184 high_x <- 0.7 * lim_x | |
| 185 if (projection == 1 && !cplot_x) { | |
| 186 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") | |
| 187 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") | |
| 188 } | |
| 189 if ( x_show_labels != "0" ) { | |
| 190 names(my_load_distal) <- tsv1$featureID | |
| 191 names(my_load_proximal) <- tsv1$featureID | |
| 192 if ( x_show_labels == "ALL" ) { | |
| 193 n_labels <- length(my_load_distal) | |
| 194 } else { | |
| 195 n_labels <- as.numeric(x_show_labels) | |
| 196 } | |
| 197 n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 ) | |
| 198 labels_to_show <- c( | |
| 199 names(head(sort(my_load_distal),n = n_labels)) | |
| 200 , names(tail(sort(my_load_distal),n = n_labels)) | |
| 201 ) | |
| 202 labels <- unname( | |
| 203 sapply( | |
| 204 X = tsv1$featureID | |
| 205 , FUN = function(x) if( x %in% labels_to_show ) x else "" | |
| 206 ) | 184 ) | 
| 207 ) | 185 low_x <- -0.7 * lim_x | 
| 208 x_text_offset <- 0.024 | 186 high_x <- 0.7 * lim_x | 
| 209 y_text_off <- 0.017 | 187 if (projection == 1 && !cplot_x) { | 
| 210 if (!cplot_x) { # S-plot | 188 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") | 
| 211 y_text_offset <- if (projection == 1) -y_text_off else y_text_off | 189 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") | 
| 212 } else { # C-plot | 190 } | 
| 213 y_text_offset <- | 191 if ( x_show_labels != "0" ) { | 
| 214 sapply( | 192 names(my_load_distal) <- tsv1$featureID | 
| 215 X = (my_y > 0) | 193 names(my_load_proximal) <- tsv1$featureID | 
| 216 , FUN = function(x) { if (x) y_text_off else -y_text_off } | 194 if ( x_show_labels == "ALL" ) { | 
| 195 n_labels <- length(my_load_distal) | |
| 196 } else { | |
| 197 n_labels <- as.numeric(x_show_labels) | |
| 198 } | |
| 199 n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 ) | |
| 200 labels_to_show <- c( | |
| 201 names(head(sort(my_load_distal), n = n_labels)) | |
| 202 , names(tail(sort(my_load_distal), n = n_labels)) | |
| 217 ) | 203 ) | 
| 218 } | 204 labels <- unname( | 
| 219 label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { | 205 sapply( | 
| 220 # print("str(x_arg)") | 206 X = tsv1$featureID | 
| 221 # print(str(x_arg)) | 207 , FUN = function(x) if ( x %in% labels_to_show ) x else "" | 
| 222 # print("str(y_arg)") | |
| 223 # print(str(y_arg)) | |
| 224 # print("str(labels_arg)") | |
| 225 # print(str(labels_arg)) | |
| 226 if (length(labels_arg) > 0) { | |
| 227 unique_slant <- unique(slant_arg) | |
| 228 if (length(unique_slant) == 1) { | |
| 229 text( | |
| 230 y = y_arg | |
| 231 , x = x_arg + x_text_offset | |
| 232 , cex = 0.4 | |
| 233 , labels = labels_arg | |
| 234 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels | |
| 235 , srt = slant_arg | |
| 236 , adj = 0 # left-justified | |
| 237 ) | 208 ) | 
| 209 ) | |
| 210 x_text_offset <- 0.024 | |
| 211 y_text_off <- 0.017 | |
| 212 if (!cplot_x) { | |
| 213 # S-plot | |
| 214 y_text_offset <- if (projection == 1) -y_text_off else y_text_off | |
| 238 } else { | 215 } else { | 
| 239 for (slant in unique_slant) { | 216 # C-plot | 
| 240 text( | 217 y_text_offset <- | 
| 241 y = y_arg[slant_arg == slant] | 218 sapply( | 
| 242 , x = x_arg[slant_arg == slant] + x_text_offset | 219 X = (my_y > 0) | 
| 243 , cex = 0.4 | 220 , FUN = function(x) { | 
| 244 , labels = labels_arg[slant_arg == slant] | 221 if (x) y_text_off else -y_text_off | 
| 245 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels | 222 } | 
| 246 , srt = slant | 223 ) | 
| 247 , adj = 0 # left-justified | 224 } | 
| 225 label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { | |
| 226 if (length(labels_arg) > 0) { | |
| 227 unique_slant <- unique(slant_arg) | |
| 228 if (length(unique_slant) == 1) { | |
| 229 text( | |
| 230 y = y_arg | |
| 231 , x = x_arg + x_text_offset | |
| 232 , cex = 0.4 | |
| 233 , labels = labels_arg | |
| 234 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels | |
| 235 , srt = slant_arg | |
| 236 , adj = 0 # left-justified | |
| 237 ) | |
| 238 } else { | |
| 239 for (slant in unique_slant) { | |
| 240 text( | |
| 241 y = y_arg[slant_arg == slant] | |
| 242 , x = x_arg[slant_arg == slant] + x_text_offset | |
| 243 , cex = 0.4 | |
| 244 , labels = labels_arg[slant_arg == slant] | |
| 245 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels | |
| 246 , srt = slant | |
| 247 , adj = 0 # left-justified | |
| 248 ) | |
| 249 } | |
| 250 } | |
| 251 } | |
| 252 } | |
| 253 if (!cplot_x) { | |
| 254 my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant | |
| 255 } else { | |
| 256 my_slant <- sapply( | |
| 257 X = (my_y > 0) | |
| 258 , FUN = function(x) if (x) 2 else -2 | |
| 259 ) * my_feature_label_slant | |
| 260 } | |
| 261 if (length(my_x) > 1) { | |
| 262 label_features( | |
| 263 x_arg = my_x [my_x > 0] | |
| 264 , y_arg = my_y [my_x > 0] - y_text_offset | |
| 265 , labels_arg = labels[my_x > 0] | |
| 266 , slant_arg = (if (!cplot_x) -my_slant else (my_slant)) | |
| 267 ) | |
| 268 if (!cplot_x) { | |
| 269 label_features( | |
| 270 x_arg = my_x [my_x < 0] | |
| 271 , y_arg = my_y [my_x < 0] + y_text_offset | |
| 272 , labels_arg = labels[my_x < 0] | |
| 273 , slant_arg = my_slant | |
| 248 ) | 274 ) | 
| 249 } | 275 } | 
| 250 } | 276 } else { | 
| 251 } | 277 if (!cplot_x) { | 
| 252 } | 278 my_slant <- (if (my_x > 1) -1 else 1) * my_slant | 
| 253 if (!cplot_x) { | 279 my_y_arg <- my_y + (if (my_x > 1) -1 else 1) * y_text_offset | 
| 254 my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant | 280 } else { | 
| 281 my_slant <- (if (my_y > 1) -1 else 1) * my_slant | |
| 282 my_y_arg <- my_y + (if (my_y > 1) -1 else 1) * y_text_offset | |
| 283 } | |
| 284 label_features( | |
| 285 x_arg = my_x | |
| 286 , y_arg = my_y_arg | |
| 287 , labels_arg = labels | |
| 288 , slant_arg = my_slant | |
| 289 ) | |
| 290 } # end if (length(my_x) > 1) | |
| 291 } # end if ( x_show_labels != "0" ) | |
| 255 } else { | 292 } else { | 
| 256 my_slant <- sapply( | 293 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") | 
| 257 X = (my_y > 0) | 294 text(x=1, y=1, labels="no S-plot is possible") | 
| 258 , FUN = function(x) if (x) 2 else -2 | 295 } # end if (sum(is.infinte(my_xlim))==0) | 
| 259 ) * my_feature_label_slant | 296 } else { | 
| 260 } | 297 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") | 
| 261 if (length(my_x) > 1) { | 298 text(x=1, y=1, labels="no S-plot is possible") | 
| 262 label_features( | 299 } # end if (length(plus_cor) != 0 && length(plus_cor) != 0) | 
| 263 x_arg = my_x [my_x > 0] | 300 } # end action | 
| 264 , y_arg = my_y [my_x > 0] - y_text_offset | 301 ) # end with( my_cor_vs_cov, action ) | 
| 265 , labels_arg = labels[my_x > 0] | |
| 266 , slant_arg = (if (!cplot_x) -my_slant else (my_slant)) | |
| 267 ) | |
| 268 if (!cplot_x) { | |
| 269 label_features( | |
| 270 x_arg = my_x [my_x < 0] | |
| 271 , y_arg = my_y [my_x < 0] + y_text_offset | |
| 272 , labels_arg = labels[my_x < 0] | |
| 273 , slant_arg = my_slant | |
| 274 ) | |
| 275 } | |
| 276 } else { | |
| 277 if (!cplot_x) { | |
| 278 my_slant <- (if (my_x > 1) -1 else 1) * my_slant | |
| 279 my_y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset | |
| 280 } else { | |
| 281 my_slant <- (if (my_y > 1) -1 else 1) * my_slant | |
| 282 my_y_arg = my_y + (if (my_y > 1) -1 else 1) * y_text_offset | |
| 283 } | |
| 284 label_features( | |
| 285 x_arg = my_x | |
| 286 , y_arg = my_y_arg | |
| 287 , labels_arg = labels | |
| 288 , slant_arg = my_slant | |
| 289 ) | |
| 290 } | |
| 291 } | |
| 292 } | |
| 293 ) | |
| 294 return (my_cor_vs_cov) | 302 return (my_cor_vs_cov) | 
| 295 } | 303 } # end function do_s_plot | 
| 296 my_cor_vs_cov <- do_s_plot( | 304 my_cor_vs_cov <- do_s_plot( | 
| 297 x_env = x_env | 305 x_env = x_env | 
| 298 , predictor_projection_x = TRUE | 306 , predictor_projection_x = TRUE | 
| 299 , cplot_x = FALSE | 307 , cplot_x = FALSE | 
| 300 ) | 308 ) | 
| 301 typeVc <- c("correlation", # 1 | 309 typevc <- c("correlation", # 1 | 
| 302 "outlier", # 2 | 310 "outlier", # 2 | 
| 303 "overview", # 3 | 311 "overview", # 3 | 
| 304 "permutation", # 4 | 312 "permutation", # 4 | 
| 305 "predict-train", # 5 | 313 "predict-train", # 5 | 
| 306 "predict-test", # 6 | 314 "predict-test", # 6 | 
| 310 "x-variance", # 10 | 318 "x-variance", # 10 | 
| 311 "xy-score", # 11 | 319 "xy-score", # 11 | 
| 312 "xy-weight" # 12 | 320 "xy-weight" # 12 | 
| 313 ) # [c(3,8,9)] # [c(4,3,8,9)] | 321 ) # [c(3,8,9)] # [c(4,3,8,9)] | 
| 314 if ( length(my_oplsda@orthoVipVn) > 0 ) { | 322 if ( length(my_oplsda@orthoVipVn) > 0 ) { | 
| 315 my_typevc <- typeVc[c(9,3,8)] | 323 my_typevc <- typevc[c(9,3,8)] | 
| 316 } else { | 324 } else { | 
| 317 my_typevc <- c("(dummy)","overview","(dummy)") | 325 my_typevc <- c("(dummy)","overview","(dummy)") | 
| 318 } | 326 } | 
| 319 my_ortho_cor_vs_cov_exists <- FALSE | 327 my_ortho_cor_vs_cov_exists <- FALSE | 
| 320 for (my_type in my_typevc) { | 328 for (my_type in my_typevc) { | 
| 321 if (my_type %in% typeVc) { | 329 if (my_type %in% typevc) { | 
| 322 tryCatch({ | 330 tryCatch({ | 
| 323 if ( my_type != "x-loading" ) { | 331 if ( my_type != "x-loading" ) { | 
| 324 plot( | 332 plot( | 
| 325 x = my_oplsda | 333 x = my_oplsda | 
| 326 , typeVc = my_type | 334 , typeVc = my_type | 
| 327 , parCexN = 0.4 | 335 , parCexN = 0.4 | 
| 328 , parDevNewL = FALSE | 336 , parDevNewL = FALSE | 
| 329 , parLayL = TRUE | 337 , parLayL = TRUE | 
| 330 , parEllipsesL = TRUE | 338 , parEllipsesL = TRUE | 
| 331 ) | 339 ) | 
| 332 if (my_type == "overview") { | 340 if (my_type == "overview") { | 
| 333 sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2) | 341 sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2) | 
| 334 title(sub = sub_label) | 342 title(sub = sub_label) | 
| 335 } | 343 } | 
| 336 } else { | 344 } else { | 
| 337 my_ortho_cor_vs_cov <- do_s_plot( | 345 my_ortho_cor_vs_cov <- do_s_plot( | 
| 338 x_env = x_env | 346 x_env = x_env | 
| 339 , predictor_projection_x = FALSE | 347 , predictor_projection_x = FALSE | 
| 340 , cplot_x = FALSE | 348 , cplot_x = FALSE | 
| 341 ) | 349 ) | 
| 342 my_ortho_cor_vs_cov_exists <- TRUE | 350 my_ortho_cor_vs_cov_exists <- TRUE | 
| 351 } | |
| 343 } | 352 } | 
| 344 }, error = function(e) { | 353 , error = function(e) { | 
| 345 x_progress( | 354 x_progress( | 
| 346 sprintf( | 355 sprintf( | 
| 347 "factor level %s or %s may have only one sample - %s" | 356 "factor level %s or %s may have only one sample - %s" | 
| 348 , fctr_lvl_1 | 357 , fctr_lvl_1 | 
| 349 , fctr_lvl_2 | 358 , fctr_lvl_2 | 
| 358 } | 367 } | 
| 359 cplot_p <- x_env$cplot_p | 368 cplot_p <- x_env$cplot_p | 
| 360 cplot_o <- x_env$cplot_o | 369 cplot_o <- x_env$cplot_o | 
| 361 if (cplot_p || cplot_o) { | 370 if (cplot_p || cplot_o) { | 
| 362 if (cplot_p) { | 371 if (cplot_p) { | 
| 363 do_s_plot( | 372 if (!is.null(my_cor_vs_cov)) { | 
| 364 x_env = x_env | 373 do_s_plot( | 
| 365 , predictor_projection_x = TRUE | 374 x_env = x_env | 
| 366 , cplot_x = TRUE | 375 , predictor_projection_x = TRUE | 
| 367 , cor_vs_cov_x = my_cor_vs_cov | 376 , cplot_x = TRUE | 
| 368 ) | 377 , cor_vs_cov_x = my_cor_vs_cov | 
| 378 ) | |
| 379 } else { | |
| 380 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") | |
| 381 text(x=1, y=1, labels="no predictor projection is possible") | |
| 382 } | |
| 369 did_plots <- 1 | 383 did_plots <- 1 | 
| 370 } else { | 384 } else { | 
| 371 did_plots <- 0 | 385 did_plots <- 0 | 
| 372 } | 386 } | 
| 373 if (cplot_o) { | 387 if (cplot_o) { | 
| 416 return ( FALSE ) | 430 return ( FALSE ) | 
| 417 } | 431 } | 
| 418 | 432 | 
| 419 # extract parameters from the environment | 433 # extract parameters from the environment | 
| 420 vrbl_metadata <- calc_env$vrbl_metadata | 434 vrbl_metadata <- calc_env$vrbl_metadata | 
| 421 vrbl_metadata_names <- vrbl_metadata[,1] | 435 vrbl_metadata_names <- vrbl_metadata[, 1] | 
| 422 smpl_metadata <- calc_env$smpl_metadata | 436 smpl_metadata <- calc_env$smpl_metadata | 
| 423 data_matrix <- calc_env$data_matrix | 437 data_matrix <- calc_env$data_matrix | 
| 424 pairSigFeatOnly <- calc_env$pairSigFeatOnly | 438 pair_significant_features_only <- calc_env$pairSigFeatOnly | 
| 425 facC <- calc_env$facC | 439 facC <- calc_env$facC | 
| 426 tesC <- calc_env$tesC | 440 tesC <- calc_env$tesC | 
| 427 # extract the levels from the environment | 441 # extract the levels from the environment | 
| 428 originalLevCSV <- levCSV <- calc_env$levCSV | 442 originalLevCSV <- levCSV <- calc_env$levCSV | 
| 429 # matchingC is one of { "none", "wildcard", "regex" } | 443 # matchingC is one of { "none", "wildcard", "regex" } | 
| 430 matchingC <- calc_env$matchingC | 444 matchingC <- calc_env$matchingC | 
| 431 labelFeatures <- calc_env$labelFeatures | 445 labelFeatures <- calc_env$labelFeatures | 
| 446 minCrossvalI <- as.integer(calc_env$min_crossval_i) | |
| 432 | 447 | 
| 433 # arg/env checking | 448 # arg/env checking | 
| 434 if (!(facC %in% names(smpl_metadata))) { | 449 if (!(facC %in% names(smpl_metadata))) { | 
| 435 failure_action( | 450 failure_action( | 
| 436 sprintf("bad parameter! Factor name '%s' not found in sampleMetadata" | 451 sprintf("bad parameter! Factor name '%s' not found in sampleMetadata" | 
| 444 | 459 | 
| 445 rt <- vrbl_metadata$rt | 460 rt <- vrbl_metadata$rt | 
| 446 names(rt) <- vrbl_metadata$variableMetadata | 461 names(rt) <- vrbl_metadata$variableMetadata | 
| 447 rt_lookup <- function(feature) unname(rt[feature]) | 462 rt_lookup <- function(feature) unname(rt[feature]) | 
| 448 | 463 | 
| 449 # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv) | 464 # calculate salience_df as data.frame(feature, max_level, max_median, salience_rcv, mean_median, salience, salience_rcv) | 
| 450 salience_df <- calc_env$salience_df <- w4msalience( | 465 salience_df <- calc_env$salience_df <- w4msalience( | 
| 451 data_matrix = data_matrix | 466 data_matrix = data_matrix | 
| 452 , sample_class = smpl_metadata[,facC] | 467 , sample_class = smpl_metadata[,facC] | 
| 453 , failure_action = failure_action | 468 , failure_action = failure_action | 
| 454 ) | 469 ) | 
| 455 salience_tsv_action({ | 470 salience_tsv_action({ | 
| 456 my_df <- data.frame( | 471 with ( | 
| 457 featureID = salience_df$feature | 472 salience_df | 
| 458 , salientLevel = salience_df$max_level | 473 , { | 
| 459 , salientRCV = salience_df$salient_rcv | 474 my_df <<- data.frame( | 
| 460 , salience = salience_df$salience | 475 featureID = feature | 
| 461 , mz = mz_lookup(salience_df$feature) | 476 , salientLevel = max_level | 
| 462 , rt = rt_lookup(salience_df$feature) | 477 , salienceRCV = salience_rcv | 
| 463 ) | 478 , relativeSalientDistance = relative_salient_distance | 
| 464 my_df[order(-my_df$salience),] | 479 , salience = salience | 
| 480 , mz = mz_lookup(feature) | |
| 481 , rt = rt_lookup(feature) | |
| 482 ) | |
| 483 }) | |
| 484 my_df[order(-my_df$relativeSalientDistance),] | |
| 465 }) | 485 }) | 
| 466 | 486 | 
| 467 # transform wildcards to regexen | 487 # transform wildcards to regexen | 
| 468 if (matchingC == "wildcard") { | 488 if (matchingC == "wildcard") { | 
| 469 # strsplit(x = "hello,wild,world", split = ",") | 489 # strsplit(x = "hello,wild,world", split = ",") | 
| 571 ) | 591 ) | 
| 572 my_cor_cov <- do_detail_plot( | 592 my_cor_cov <- do_detail_plot( | 
| 573 x_dataMatrix = my_matrix | 593 x_dataMatrix = my_matrix | 
| 574 , x_predictor = predictor | 594 , x_predictor = predictor | 
| 575 , x_is_match = TRUE | 595 , x_is_match = TRUE | 
| 576 , x_algorithm = algoC | 596 , x_algorithm = "nipals" | 
| 577 , x_prefix = if (pairSigFeatOnly) { | 597 , x_prefix = if (pair_significant_features_only) { | 
| 578 "Significantly contrasting features" | 598 "Significantly contrasting features" | 
| 579 } else { | 599 } else { | 
| 580 "Significant features" | 600 "Significant features" | 
| 581 } | 601 } | 
| 582 , x_show_labels = labelFeatures | 602 , x_show_labels = labelFeatures | 
| 583 , x_progress = progress_action | 603 , x_progress = progress_action | 
| 584 , x_crossval_i = min(7, length(chosen_samples)) | 604 , x_crossval_i = min(minCrossvalI, length(chosen_samples)) | 
| 585 , x_env = calc_env | 605 , x_env = calc_env | 
| 586 ) | 606 ) | 
| 587 if ( is.null(my_cor_cov) ) { | 607 if ( is.null(my_cor_cov) ) { | 
| 588 progress_action("NOTHING TO PLOT") | 608 progress_action("NOTHING TO PLOT") | 
| 589 } else { | 609 } else { | 
| 638 } else { | 658 } else { | 
| 639 1 | 659 1 | 
| 640 } | 660 } | 
| 641 ) | 661 ) | 
| 642 col_selector <- vrbl_metadata_names[ | 662 col_selector <- vrbl_metadata_names[ | 
| 643 if ( pairSigFeatOnly ) fully_significant else overall_significant | 663 if (pair_significant_features_only) fully_significant else overall_significant | 
| 644 ] | 664 ] | 
| 645 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] | 665 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] | 
| 646 my_cor_cov <- do_detail_plot( | 666 my_cor_cov <- do_detail_plot( | 
| 647 x_dataMatrix = my_matrix | 667 x_dataMatrix = my_matrix | 
| 648 , x_predictor = predictor | 668 , x_predictor = predictor | 
| 649 , x_is_match = is_match | 669 , x_is_match = is_match | 
| 650 , x_algorithm = algoC | 670 , x_algorithm = "nipals" | 
| 651 , x_prefix = if (pairSigFeatOnly) { | 671 , x_prefix = if (pair_significant_features_only) { | 
| 652 "Significantly contrasting features" | 672 "Significantly contrasting features" | 
| 653 } else { | 673 } else { | 
| 654 "Significant features" | 674 "Significant features" | 
| 655 } | 675 } | 
| 656 , x_show_labels = labelFeatures | 676 , x_show_labels = labelFeatures | 
| 657 , x_progress = progress_action | 677 , x_progress = progress_action | 
| 658 , x_crossval_i = min(7, length(chosen_samples)) | 678 , x_crossval_i = min(minCrossvalI, length(chosen_samples)) | 
| 659 , x_env = calc_env | 679 , x_env = calc_env | 
| 660 ) | 680 ) | 
| 661 if ( is.null(my_cor_cov) ) { | 681 if ( is.null(my_cor_cov) ) { | 
| 662 progress_action("NOTHING TO PLOT.") | 682 progress_action("NOTHING TO PLOT.") | 
| 663 } else { | 683 } else { | 
| 679 ) | 699 ) | 
| 680 } | 700 } | 
| 681 } | 701 } | 
| 682 } | 702 } | 
| 683 } | 703 } | 
| 684 } else { # tesC == "none" | 704 } else { | 
| 705 # tesC == "none" | |
| 685 # find all the levels for factor facC in sampleMetadata | 706 # find all the levels for factor facC in sampleMetadata | 
| 686 level_union <- unique(sort(smpl_metadata_facC)) | 707 level_union <- unique(sort(smpl_metadata_facC)) | 
| 687 # identify the selected levels for factor facC from sampleMetadata | 708 # identify the selected levels for factor facC from sampleMetadata | 
| 688 level_include <- sapply(X = level_union, FUN = isLevelSelected) | 709 level_include <- sapply(X = level_union, FUN = isLevelSelected) | 
| 689 # discard the non-selected levels for factor facC | 710 # discard the non-selected levels for factor facC | 
| 697 , FUN = function(x) { | 718 , FUN = function(x) { | 
| 698 fctr_lvl_1 <- x[1] | 719 fctr_lvl_1 <- x[1] | 
| 699 fctr_lvl_2 <- { | 720 fctr_lvl_2 <- { | 
| 700 if ( fctr_lvl_1 %in% completed ) | 721 if ( fctr_lvl_1 %in% completed ) | 
| 701 return("DUMMY") | 722 return("DUMMY") | 
| 702 # strF(completed) | |
| 703 completed <<- c(completed, fctr_lvl_1) | 723 completed <<- c(completed, fctr_lvl_1) | 
| 704 setdiff(level_union, fctr_lvl_1) | 724 setdiff(level_union, fctr_lvl_1) | 
| 705 } | 725 } | 
| 706 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) | 726 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) | 
| 707 fctr_lvl_2 <- "other" | 727 fctr_lvl_2 <- "other" | 
| 728 ) | 748 ) | 
| 729 my_cor_cov <- do_detail_plot( | 749 my_cor_cov <- do_detail_plot( | 
| 730 x_dataMatrix = my_matrix | 750 x_dataMatrix = my_matrix | 
| 731 , x_predictor = predictor | 751 , x_predictor = predictor | 
| 732 , x_is_match = is_match | 752 , x_is_match = is_match | 
| 733 , x_algorithm = algoC | 753 , x_algorithm = "nipals" | 
| 734 , x_prefix = "Features" | 754 , x_prefix = "Features" | 
| 735 , x_show_labels = labelFeatures | 755 , x_show_labels = labelFeatures | 
| 736 , x_progress = progress_action | 756 , x_progress = progress_action | 
| 737 , x_crossval_i = min(7, length(chosen_samples)) | 757 , x_crossval_i = min(minCrossvalI, length(chosen_samples)) | 
| 738 , x_env = calc_env | 758 , x_env = calc_env | 
| 739 ) | 759 ) | 
| 740 if ( is.null(my_cor_cov) ) { | 760 if ( is.null(my_cor_cov) ) { | 
| 741 progress_action("NOTHING TO PLOT...") | 761 progress_action("NOTHING TO PLOT...") | 
| 742 } else { | 762 } else { | 
| 781 ) | 801 ) | 
| 782 my_cor_cov <- do_detail_plot( | 802 my_cor_cov <- do_detail_plot( | 
| 783 x_dataMatrix = my_matrix | 803 x_dataMatrix = my_matrix | 
| 784 , x_predictor = predictor | 804 , x_predictor = predictor | 
| 785 , x_is_match = is_match | 805 , x_is_match = is_match | 
| 786 , x_algorithm = algoC | 806 , x_algorithm = "nipals" | 
| 787 , x_prefix = "Features" | 807 , x_prefix = "Features" | 
| 788 , x_show_labels = labelFeatures | 808 , x_show_labels = labelFeatures | 
| 789 , x_progress = progress_action | 809 , x_progress = progress_action | 
| 790 , x_crossval_i = min(7, length(chosen_samples)) | 810 , x_crossval_i = min(minCrossvalI, length(chosen_samples)) | 
| 791 , x_env = calc_env | 811 , x_env = calc_env | 
| 792 ) | 812 ) | 
| 793 if ( is.null(my_cor_cov) ) { | 813 if ( is.null(my_cor_cov) ) { | 
| 794 progress_action("NOTHING TO PLOT.....") | 814 progress_action("NOTHING TO PLOT.....") | 
| 795 } else { | 815 } else { | 
| 815 } | 835 } | 
| 816 } | 836 } | 
| 817 if (!did_plot) { | 837 if (!did_plot) { | 
| 818 failure_action( | 838 failure_action( | 
| 819 sprintf( | 839 sprintf( | 
| 820 "bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'" | 840 "Did not plot. Does sampleMetadata have at least two levels of factor '%s' matching '%s' ?" | 
| 821 , facC, originalLevCSV)) | 841 , facC, originalLevCSV)) | 
| 822 return ( FALSE ) | 842 return ( FALSE ) | 
| 823 } | 843 } | 
| 824 return ( TRUE ) | 844 return ( TRUE ) | 
| 825 } | 845 } | 
| 832 cor_vs_cov <- function( | 852 cor_vs_cov <- function( | 
| 833 matrix_x | 853 matrix_x | 
| 834 , ropls_x | 854 , ropls_x | 
| 835 , predictor_projection_x = TRUE | 855 , predictor_projection_x = TRUE | 
| 836 , x_progress = print | 856 , x_progress = print | 
| 857 , x_env | |
| 837 ) { | 858 ) { | 
| 838 tryCatch({ | 859 tryCatch({ | 
| 839 return( | 860 return( | 
| 840 cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress) | 861 cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress, x_env) | 
| 841 ) | 862 ) | 
| 842 }, error = function(e) { | 863 } | 
| 864 , error = function(e) { | |
| 843 x_progress( | 865 x_progress( | 
| 844 sprintf( | 866 sprintf( | 
| 845 "cor_vs_cov fatal error - %s" | 867 "cor_vs_cov fatal error - %s" | 
| 846 , as.character(e) # e$message | 868 , as.character(e) # e$message | 
| 847 ) | 869 ) | 
| 849 return ( NULL ) | 871 return ( NULL ) | 
| 850 }) | 872 }) | 
| 851 } | 873 } | 
| 852 | 874 | 
| 853 cor_vs_cov_try <- function( | 875 cor_vs_cov_try <- function( | 
| 854 matrix_x | 876 matrix_x # rows are samples; columns, features | 
| 855 , ropls_x | 877 , ropls_x # an instance of ropls::opls | 
| 856 , predictor_projection_x = TRUE | 878 , predictor_projection_x = TRUE # TRUE for predictor projection; FALSE for orthogonal projection | 
| 857 , x_progress = print | 879 , x_progress = print # function to produce progress and error messages | 
| 880 , x_env | |
| 858 ) { | 881 ) { | 
| 882 my_matrix_x <- matrix_x | |
| 883 my_matrix_x[my_matrix_x==0] <- NA | |
| 884 fdr_features <- x_env$fdr_features | |
| 885 | |
| 859 x_class <- class(ropls_x) | 886 x_class <- class(ropls_x) | 
| 860 if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) | 887 if ( !( as.character(x_class) == "opls" ) ) { | 
| 861 stop( | 888 stop( | 
| 862 paste( | 889 paste( | 
| 863 "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class " | 890 "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class " | 
| 864 , as.character(x_class) | 891 , as.character(x_class) | 
| 865 ) | 892 ) | 
| 866 ) | 893 ) | 
| 867 } | 894 } | 
| 895 if ( !ropls_x@suppLs$algoC == "nipals" ) { | |
| 896 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS | |
| 897 stop( | |
| 898 paste( | |
| 899 "cor_vs_cov: Expected ropls::opls instance to have been computed by the NIPALS algorithm rather than " | |
| 900 , ropls_x@suppLs$algoC | |
| 901 ) | |
| 902 ) | |
| 903 } | |
| 868 result <- list() | 904 result <- list() | 
| 869 result$projection <- projection <- if (predictor_projection_x) 1 else 2 | 905 result$projection <- projection <- if (predictor_projection_x) 1 else 2 | 
| 870 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS | 906 | 
| 871 if ( ropls_x@suppLs$algoC == "nipals") { | 907 # I used equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 | 
| 872 # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510 | 908 # (and not from the supplement despite the statement that, for the NIPALS algorithm, | 
| 873 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) | 909 # the equations from the supplement should be used) because of the definition of the | 
| 874 mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x])) | 910 # Pearson/Galton coefficient of correlation is defined as | 
| 875 if (predictor_projection_x) | 911 # $$ | 
| 876 score_matrix <- ropls_x@scoreMN | 912 # \rho_{X,Y}= \frac{\operatorname{cov}(X,Y)}{\sigma_X \sigma_Y} | 
| 877 else | 913 # $$ | 
| 878 score_matrix <- ropls_x@orthoScoreMN | 914 # as described (among other places) on Wikipedia at | 
| 879 score_matrix_transposed <- t(score_matrix) | 915 # https://en.wikipedia.org/wiki/Pearson_correlation_coefficient#For_a_population | 
| 880 score_matrix_magnitude <- mag(score_matrix) | 916 # The equations in the supplement said to use, for the predictive component t1, | 
| 881 result$covariance <- | 917 # \rho_{t1,X_i}= \frac{\operatorname{cov}(t1,X_i)}{(\operatorname{mag}(t1))(\operatorname{mag}(X_i))} | 
| 882 score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) | 918 # but the results that I got were dramatically different from published results for S-PLOTs; | 
| 883 result$correlation <- | 919 # perhaps my data are not centered exactly the same way that theirs were. | 
| 884 score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi ) | 920 # The correlations calculated here are in agreement with those calculated with the code from | 
| 921 # page 22 of https://cran.r-project.org/web/packages/muma/muma.pdf | |
| 922 | |
| 923 | |
| 924 # count the features/variables (one column for each sample) | |
| 925 # count the features/variables (one column for each sample) | |
| 926 n_features <- ncol(my_matrix_x) | |
| 927 all_n_features <- x_env$fdr_features | |
| 928 if (length(grep("^[0-9][0-9]*$", all_n_features)) > 0) { | |
| 929 all_n_features <- as.integer(all_n_features) | |
| 885 } else { | 930 } else { | 
| 886 # WARNING - untested code - I don't have test data to exercise this branch | 931 all_n_features <- n_features | 
| 887 # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 | 932 } | 
| 888 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F | 933 fdr_n_features <- max(n_features, all_n_features) | 
| 889 if (predictor_projection_x) | 934 # print("n_features") | 
| 890 score_matrix <- ropls_x@scoreMN | 935 # print(n_features) | 
| 891 else | 936 | 
| 892 score_matrix <- ropls_x@orthoScoreMN | 937 # count the samples/observations (one row for each sample) | 
| 893 score_matrix_transposed <- t(score_matrix) | 938 n_observations <- nrow(my_matrix_x) | 
| 894 cov_divisor <- nrow(matrix_x) - 1 | 939 | 
| 895 result$covariance <- sapply( | 940 # choose whether to plot the predictive score vector or orthogonal score vector | 
| 896 X = 1:ncol(matrix_x) | 941 if (predictor_projection_x) | 
| 897 , FUN = function(x) score_matrix_transposed %*% matrix_x[,x] / cov_divisor | 942 score_vector <- ropls_x@scoreMN | 
| 943 else | |
| 944 score_vector <- ropls_x@orthoScoreMN | |
| 945 | |
| 946 # compute the covariance of each feature with the score vector | |
| 947 result$covariance <- | |
| 948 sapply( | |
| 949 X = 1:n_features | |
| 950 , FUN = function(i) { | |
| 951 cov(score_vector, my_matrix_x[ , i, drop = TRUE], use = "pairwise.complete.obs") | |
| 952 } | |
| 898 ) | 953 ) | 
| 899 score_sd <- sapply( | 954 # access covariance by feature name | 
| 900 X = 1:ncol(score_matrix) | 955 names(result$covariance) <- colnames(my_matrix_x) | 
| 901 , FUN = function(x) sd(score_matrix[,x]) | 956 | 
| 957 # compute the correlation of each feature with the score vector | |
| 958 result$correlation <- | |
| 959 sapply( | |
| 960 X = 1:n_features | |
| 961 , FUN = function(i) { | |
| 962 cor(score_vector, my_matrix_x[ , i, drop = TRUE], use = "pairwise.complete.obs") | |
| 963 } | |
| 902 ) | 964 ) | 
| 903 # xSdVn - Numerical vector: variable standard deviations of the 'x' matrix | 965 # access correlation by feature name | 
| 904 xSdVn <- ropls_x@xSdVn | 966 names(result$correlation) <- colnames(my_matrix_x) | 
| 905 result$correlation <- sapply( | 967 | 
| 906 X = 1:ncol(matrix_x) | 968 # eliminate NAs in either correlation or covariance | 
| 907 , FUN = function(x) { | 969 nas <- is.na(result$correlation) | is.na(result$covariance) | 
| 908 ( score_matrix_transposed / score_sd ) %*% ( matrix_x[,x] / (xSdVn[x] * cov_divisor) ) | 970 featureID <- names(ropls_x@vipVn) | 
| 909 } | 971 featureID <- featureID[!nas] | 
| 910 ) | 972 result$correlation <- result$correlation[!nas] | 
| 911 } | 973 result$covariance <- result$covariance[!nas] | 
| 912 result$correlation <- result$correlation[ 1, , drop = TRUE ] | 974 n_features <- length(featureID) | 
| 913 result$covariance <- result$covariance [ 1, , drop = TRUE ] | 975 | 
| 914 | 976 correl_pci <- lapply( | 
| 915 # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014 | 977 X = 1:n_features | 
| 978 , FUN = function(i) { | |
| 979 correl.ci( | |
| 980 r = result$correlation[i] | |
| 981 , n_obs = n_observations | |
| 982 , n_vars = fdr_n_features | |
| 983 ) | |
| 984 } | |
| 985 ) | |
| 986 result$p_value_raw <- sapply( | |
| 987 X = 1:n_features | |
| 988 , FUN = function(i) correl_pci[[i]]$p.value.raw | |
| 989 ) | |
| 990 result$p_value_raw[is.na(result$p_value_raw)] <- 1.0 | |
| 991 result$p_value <- sapply( | |
| 992 X = 1:n_features | |
| 993 , FUN = function(i) correl_pci[[i]]$p.value | |
| 994 ) | |
| 995 result$p_value[is.na(result$p_value)] <- 1.0 | |
| 996 result$ci_lower <- sapply( | |
| 997 X = 1:n_features | |
| 998 , FUN = function(i) correl_pci[[i]]$CI["lower"] | |
| 999 ) | |
| 1000 result$ci_upper <- sapply( | |
| 1001 X = 1:n_features | |
| 1002 , FUN = function(i) correl_pci[[i]]$CI["upper"] | |
| 1003 ) | |
| 1004 | |
| 1005 | |
| 1006 # extract "variant 4 of Variable Influence on Projection for OPLS" (see Galindo_Prieto_2014, DOI 10.1002/cem.2627) | |
| 916 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) | 1007 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) | 
| 917 result$vip4p <- as.numeric(ropls_x@vipVn) | 1008 result$vip4p <- as.numeric(ropls_x@vipVn)[!nas] | 
| 918 result$vip4o <- as.numeric(ropls_x@orthoVipVn) | 1009 result$vip4o <- as.numeric(ropls_x@orthoVipVn)[!nas] | 
| 919 result$loadp <- as.numeric(ropls_x@loadingMN) | 1010 if (length(result$vip4o) == 0) result$vip4o <- NA | 
| 920 result$loado <- as.numeric(ropls_x@orthoLoadingMN) | 1011 # extract the loadings | 
| 1012 result$loadp <- as.numeric(ropls_x@loadingMN)[!nas] | |
| 1013 result$loado <- as.numeric(ropls_x@orthoLoadingMN)[!nas] | |
| 921 # get the level names | 1014 # get the level names | 
| 922 level_names <- sort(levels(as.factor(ropls_x@suppLs$y))) | 1015 level_names <- sort(levels(as.factor(ropls_x@suppLs$y))) | 
| 923 fctr_lvl_1 <- level_names[1] | 1016 fctr_lvl_1 <- level_names[1] | 
| 924 fctr_lvl_2 <- level_names[2] | 1017 fctr_lvl_2 <- level_names[2] | 
| 925 feature_count <- length(ropls_x@vipVn) | 1018 result$level1 <- rep.int(x = fctr_lvl_1, times = n_features) | 
| 926 result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count) | 1019 result$level2 <- rep.int(x = fctr_lvl_2, times = n_features) | 
| 927 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) | |
| 928 superresult <- list() | |
| 929 if (length(result$vip4o) == 0) result$vip4o <- NA | |
| 930 greaterLevel <- sapply( | 1020 greaterLevel <- sapply( | 
| 931 X = result$correlation | 1021 X = result$correlation | 
| 932 , FUN = function(my_corr) | 1022 , FUN = function(my_corr) { | 
| 933 tryCatch({ | 1023 tryCatch({ | 
| 934 if ( is.nan( my_corr ) ) { | 1024 if ( is.na(my_corr) || ! is.numeric( my_corr ) ) { | 
| 935 print("my_corr is NaN") | 1025 NA | 
| 936 NA | |
| 937 } else { | 1026 } else { | 
| 938 if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 | 1027 if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 | 
| 939 } | 1028 } | 
| 940 }, error = function(e) { | 1029 } | 
| 1030 , error = function(e) { | |
| 1031 print(my_corr) | |
| 941 x_progress( | 1032 x_progress( | 
| 942 sprintf( | 1033 sprintf( | 
| 943 "cor_vs_cov -> sapply: error - substituting NA - %s" | 1034 "cor_vs_cov -> sapply: error - substituting NA - %s" | 
| 944 , as.character(e) | 1035 , as.character(e) | 
| 945 ) | 1036 ) | 
| 946 ) | 1037 ) | 
| 947 NA | 1038 NA | 
| 948 }) | 1039 } | 
| 1040 ) | |
| 1041 } | |
| 949 ) | 1042 ) | 
| 950 | 1043 | 
| 951 # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 | 1044 # build a data frame to hold the content for the tab-separated values file | 
| 952 featureID <- names(ropls_x@vipVn) | |
| 953 greaterLevel <- greaterLevel[featureID] | |
| 954 result$correlation <- result$correlation[featureID] | |
| 955 result$covariance <- result$covariance[featureID] | |
| 956 # end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 | |
| 957 | |
| 958 tsv1 <- data.frame( | 1045 tsv1 <- data.frame( | 
| 959 featureID = featureID | 1046 featureID = featureID | 
| 960 , factorLevel1 = result$level1 | 1047 , factorLevel1 = result$level1 | 
| 961 , factorLevel2 = result$level2 | 1048 , factorLevel2 = result$level2 | 
| 962 , greaterLevel = greaterLevel | 1049 , greaterLevel = greaterLevel | 
| 963 , projection = result$projection | 1050 , projection = result$projection | 
| 964 , correlation = result$correlation | 1051 , correlation = result$correlation | 
| 965 , covariance = result$covariance | 1052 , covariance = result$covariance | 
| 966 , vip4p = result$vip4p | 1053 , vip4p = result$vip4p | 
| 967 , vip4o = result$vip4o | 1054 , vip4o = result$vip4o | 
| 968 , loadp = result$loadp | 1055 , loadp = result$loadp | 
| 969 , loado = result$loado | 1056 , loado = result$loado | 
| 970 , row.names = NULL | 1057 , cor_p_val_raw = result$p_value_raw | 
| 1058 , cor_p_value = result$p_value | |
| 1059 , cor_ci_lower = result$ci_lower | |
| 1060 , cor_ci_upper = result$ci_upper | |
| 971 ) | 1061 ) | 
| 972 tsv1 <- tsv1[!is.na(tsv1$correlation),] | 1062 rownames(tsv1) <- tsv1$featureID | 
| 973 tsv1 <- tsv1[!is.na(tsv1$covariance),] | 1063 | 
| 974 superresult$tsv1 <- tsv1 | 1064 # build the superresult, i.e., the result returned by this function | 
| 975 rownames(superresult$tsv1) <- tsv1$featureID | 1065 superresult <- list() | 
| 976 superresult$projection <- result$projection | 1066 superresult$projection <- result$projection | 
| 977 superresult$covariance <- result$covariance | 1067 superresult$covariance <- result$covariance | 
| 978 superresult$correlation <- result$correlation | 1068 superresult$correlation <- result$correlation | 
| 979 superresult$vip4p <- result$vip4p | 1069 superresult$vip4p <- result$vip4p | 
| 980 superresult$vip4o <- result$vip4o | 1070 superresult$vip4o <- result$vip4o | 
| 981 superresult$loadp <- result$loadp | 1071 superresult$loadp <- result$loadp | 
| 982 superresult$loado <- result$loado | 1072 superresult$loado <- result$loado | 
| 1073 superresult$cor_p_value <- tsv1$cor_p_value | |
| 983 superresult$details <- result | 1074 superresult$details <- result | 
| 984 result$superresult <- superresult | 1075 | 
| 985 # Include thise in case future consumers of this routine want to use it in currently unanticipated ways | 1076 # remove any rows having NA for covariance or correlation | 
| 986 result$oplsda <- ropls_x | 1077 tsv1 <- tsv1[!is.na(tsv1$correlation),] | 
| 987 result$predictor <- ropls_x@suppLs$y # in case future consumers of this routine want to use it in currently unanticipated ways | 1078 tsv1 <- tsv1[!is.na(tsv1$covariance),] | 
| 1079 superresult$tsv1 <- tsv1 | |
| 1080 | |
| 1081 # # I did not include these but left them commentd out in case future | |
| 1082 # # consumers of this routine want to use it in currently unanticipated ways | |
| 1083 # result$superresult <- superresult | |
| 1084 # result$oplsda <- ropls_x | |
| 1085 # result$predictor <- ropls_x@suppLs$y | |
| 1086 | |
| 988 return (superresult) | 1087 return (superresult) | 
| 989 } | 1088 } | 
| 990 | 1089 | 
| 991 # vim: sw=2 ts=2 et : | 1090 # Code for correl.ci was adapted from correl function from: | 
| 1091 # @book{ | |
| 1092 # Tsagris_2018, | |
| 1093 # author = {Tsagris, Michail}, | |
| 1094 # year = {2018}, | |
| 1095 # link = {https://www.researchgate.net/publication/324363311_Multivariate_data_analysis_in_R}, | |
| 1096 # title = {Multivariate data analysis in R} | |
| 1097 # } | |
| 1098 # which follows | |
| 1099 # https://en.wikipedia.org/wiki/Fisher_transformation#Definition | |
| 1100 | |
| 1101 correl.ci <- function(r, n_obs, n_vars, a = 0.05, rho = 0) { | |
| 1102 ## r is the calculated correlation coefficient for n_obs pairs of observations of one variable | |
| 1103 ## a is the significance level | |
| 1104 ## rho is the hypothesised correlation | |
| 1105 zh0 <- atanh(rho) # 0.5*log((1+rho)/(1-rho)), i.e., Fisher's z-transformation for Ho | |
| 1106 zh1 <- atanh(r) # 0.5*log((1+r)/(1-r)), i.e., Fisher's z-transformation for H1 | |
| 1107 se <- (1 - r^2)/sqrt(n_obs - 3) ## standard error for Fisher's z-transformation of Ho | |
| 1108 test <- (zh1 - zh0)/se ### test statistic | |
| 1109 pvalue <- 2*(1 - pnorm(abs(test))) ## p-value | |
| 1110 pvalue.adj <- p.adjust(p = pvalue, method = "BY", n = n_vars) | |
| 1111 z_L <- zh1 - qnorm(1 - a/2)*se | |
| 1112 z_h <- zh1 + qnorm(1 - a/2)*se | |
| 1113 fish_l <- tanh(z_L) # (exp(2*z_l)-1)/(exp(2*z_l)+1), i.e., lower confidence limit | |
| 1114 fish_h <- tanh(z_h) # (exp(2*z_h)-1)/(exp(2*z_h)+1), i.e., upper confidence limit | |
| 1115 ci <- c(fish_l, fish_h) | |
| 1116 names(ci) <- c("lower", "upper") | |
| 1117 list( | |
| 1118 correlation = r | |
| 1119 , p.value.raw = pvalue | |
| 1120 , p.value = pvalue.adj | |
| 1121 , CI = ci | |
| 1122 ) | |
| 1123 } | |
| 1124 | |
| 1125 # vim: sw=2 ts=2 et ai : | 
