comparison w4mcorcov_calc.R @ 6:0b49916c5c52 draft

planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit 4428e3252d54c8a8e0e5d85e8eaaeb13e9b21de7
author eschen42
date Wed, 05 Sep 2018 19:24:47 -0400
parents 1d046f648b47
children ca9938f2eb6a
comparison
equal deleted inserted replaced
5:1d046f648b47 6:0b49916c5c52
5 } 5 }
6 6
7 #### OPLS-DA 7 #### OPLS-DA
8 algoC <- "nipals" 8 algoC <- "nipals"
9 9
10 do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_show_loado_labels, x_progress = print, x_env, x_crossval_i) { 10 do_detail_plot <- function(
11 x_dataMatrix
12 , x_predictor
13 , x_is_match
14 , x_algorithm
15 , x_prefix
16 , x_show_labels
17 , x_progress = print
18 , x_env
19 , x_crossval_i
20 ) {
11 off <- function(x) if (x_show_labels == "0") 0 else x 21 off <- function(x) if (x_show_labels == "0") 0 else x
12 if ( x_is_match && ncol(x_dataMatrix) > 0 && length(unique(x_predictor))> 1 && x_crossval_i < nrow(x_dataMatrix) ) { 22 if ( x_is_match
23 && ncol(x_dataMatrix) > 0
24 && length(unique(x_predictor))> 1
25 && x_crossval_i < nrow(x_dataMatrix)
26 ) {
13 my_oplsda <- opls( 27 my_oplsda <- opls(
14 x = x_dataMatrix 28 x = x_dataMatrix
15 , y = x_predictor 29 , y = x_predictor
16 , algoC = x_algorithm 30 , algoC = x_algorithm
17 , predI = 1 31 , predI = 1
18 , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0 32 , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0
19 , printL = FALSE 33 , printL = FALSE
20 , plotL = FALSE 34 , plotL = FALSE
21 , crossvalI = x_crossval_i 35 , crossvalI = x_crossval_i
36 , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2.
22 ) 37 )
38 # strip out variables having negligible variance
39 x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE]
23 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) 40 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y))
41 # x_progress(strF(x_dataMatrix))
42 # x_progress(strF(my_oplsda))
43 #x_progress(head(my_oplsda_suppLs_y_levels))
44 #x_progress(unique(my_oplsda_suppLs_y_levels))
24 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] 45 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1]
25 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] 46 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2]
26 my_cor_vs_cov <- cor_vs_cov( 47 do_s_plot <- function(
27 matrix_x = x_dataMatrix 48 x_env
28 , ropls_x = my_oplsda 49 , predictor_projection_x = TRUE
50 , cplot_x = FALSE
51 , cor_vs_cov_x = NULL
29 ) 52 )
30 with( 53 {
31 my_cor_vs_cov 54 #print(ls(x_env)) # "cplot_y" etc
32 , { 55 #print(str(x_env$cplot_y)) # chr "covariance"
33 min_x <- min(covariance) 56 if (cplot_x) {
34 max_x <- max(covariance) 57 #print(x_env$cplot_y) # "covariance"
35 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) 58 cplot_y_correlation <- (x_env$cplot_y == "correlation")
36 covariance <- covariance / lim_x 59 #print(cplot_y_correlation) # FALSE
37 lim_x <- 1.2 60 }
38 main_label <- sprintf("%s for level %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2) 61 if (is.null(cor_vs_cov_x)) {
39 main_cex <- min(1.0, 46.0/nchar(main_label)) 62 my_cor_vs_cov <- cor_vs_cov(
40 # "It is generally accepted that a variable should be selected if vj>1, [27–29], 63 matrix_x = x_dataMatrix
41 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." 64 , ropls_x = my_oplsda
42 # (Mehmood 2012 doi:10.1186/1748-7188-6-27) 65 , predictor_projection_x = predictor_projection_x
43 vipco <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) 66 , x_progress
44 alpha <- 0.1 + 0.4 * vipco 67 )
45 red <- as.numeric(correlation > 0) * vipco 68 } else {
46 blue <- as.numeric(correlation < 0) * vipco 69 my_cor_vs_cov <- cor_vs_cov_x
47 plus_cor <- correlation 70 }
48 plus_cov <- covariance 71 # print("str(my_cor_vs_cov)")
49 cex <- 0.75 72 # str(my_cor_vs_cov)
50 plot( 73 if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) {
51 y = plus_cor 74 if (is.null(cor_vs_cov_x)) {
52 , x = plus_cov 75 x_progress("No cor_vs_cov data produced")
53 , type="p" 76 }
54 , xlim=c( -lim_x - off(0.2), lim_x + off(0.2) ) 77 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
55 , ylim=c( -1.0 - off(0.2), 1.0 + off(0.2) ) 78 text(x=1, y=1, labels="too few covariance data")
56 , xlab = sprintf("relative covariance(feature,t1)") 79 return(my_cor_vs_cov)
57 , ylab = sprintf("correlation(feature,t1)") 80 }
58 , main = main_label 81 with(
59 , cex.main = main_cex 82 my_cor_vs_cov
60 , cex = cex 83 , {
61 , pch = 16 84 min_x <- min(covariance, na.rm = TRUE)
62 , col = rgb(blue = blue, red = red, green = 0, alpha = alpha) 85 max_x <- max(covariance, na.rm = TRUE)
63 ) 86 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs))
64 low_x <- -0.7 * lim_x 87 covariance <- covariance / lim_x
65 high_x <- 0.7 * lim_x 88 lim_x <- 1.2
66 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") 89 # "It is generally accepted that a variable should be selected if vj>1, [27–29],
67 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") 90 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]."
68 if ( x_show_labels != "0" ) { 91 # (Mehmood 2012 doi:10.1186/1748-7188-6-27)
69 my_loadp <- loadp 92 plus_cor <- correlation
70 my_loado <- loado 93 plus_cov <- covariance
71 names(my_loadp) <- tsv1$featureID 94 cex <- 0.65
72 names(my_loado) <- tsv1$featureID 95 which_projection <- if (projection == 1) "t1" else "o1"
73 if ( x_show_labels == "ALL" ) { 96 which_loading <- if (projection == 1) "parallel" else "orthogonal"
74 n_labels <- length(loadp) 97 if (projection == 1) { # predictor-projection
75 } else { 98 vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83)))
76 n_labels <- as.numeric(x_show_labels) 99 if (!cplot_x) { # S-plot predictor-projection
100 my_xlab <- "relative covariance(feature,t1)"
101 my_x <- plus_cov
102 my_ylab <- "correlation(feature,t1)"
103 my_y <- plus_cor
104 } else { # C-plot predictor-projection
105 my_xlab <- "variable importance in predictor-projection"
106 my_x <- vip4p
107 if (cplot_y_correlation) {
108 my_ylab <- "correlation(feature,t1)"
109 my_y <- plus_cor
110 } else {
111 my_ylab <- "relative covariance(feature,t1)"
112 my_y <- plus_cov
113 }
114 }
115 if (cplot_x) {
116 lim_x <- max(my_x, na.rm = TRUE) * 1.1
117 my_xlim <- c( 0, lim_x + off(0.2) )
118 } else {
119 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
120 }
121 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) )
122 my_load_distal <- loadp
123 my_load_proximal <- loado
124 red <- as.numeric(correlation > 0) * vipcp
125 blue <- as.numeric(correlation < 0) * vipcp
126 alpha <- 0.1 + 0.4 * vipcp
127 red[is.na(red)] <- 0
128 blue[is.na(blue)] <- 0
129 alpha[is.na(alpha)] <- 0
130 my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha)
131 main_label <- sprintf("%s for level %s versus %s"
132 , x_prefix, fctr_lvl_1, fctr_lvl_2)
133 } else { # orthogonal projection
134 vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83)))
135 if (!cplot_x) {
136 my_xlab <- "relative covariance(feature,to1)"
137 my_x <- -plus_cov
138 } else {
139 my_xlab <- "variable importance in orthogonal projection"
140 my_x <- vip4o
141 }
142 if (!cplot_x) { # S-plot orthogonal projection
143 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) )
144 my_ylab <- "correlation(feature,to1)"
145 my_y <- plus_cor
146 } else { # C-plot orthogonal projection
147 lim_x <- max(my_x, na.rm = TRUE) * 1.1
148 my_xlim <- c( 0, lim_x + off(0.2) )
149 if (cplot_y_correlation) {
150 my_ylab <- "correlation(feature,to1)"
151 my_y <- plus_cor
152 } else {
153 my_ylab <- "relative covariance(feature,to1)"
154 my_y <- plus_cov
155 }
156 }
157 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) )
158 my_load_distal <- loado
159 my_load_proximal <- loadp
160 alpha <- 0.1 + 0.4 * vipco
161 alpha[is.na(alpha)] <- 0
162 my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha)
163 main_label <- sprintf(
164 "Features influencing orthogonal projection for %s versus %s"
165 , fctr_lvl_1, fctr_lvl_2)
77 } 166 }
78 n_labels <- min( n_labels, (1 + length(loadp)) / 2 ) 167 main_cex <- min(1.0, 46.0/nchar(main_label))
79 labels_to_show <- c( 168 my_feature_label_slant <- -30 # slant feature labels 30 degrees downward
80 names(head(sort(my_loadp),n = n_labels)) 169 plot(
81 , names(tail(sort(my_loadp),n = n_labels)) 170 y = my_y
171 , x = my_x
172 , type = "p"
173 , xlim = my_xlim
174 , ylim = my_ylim
175 , xlab = my_xlab
176 , ylab = my_ylab
177 , main = main_label
178 , cex.main = main_cex
179 , cex = cex
180 , pch = 16
181 , col = my_col
82 ) 182 )
83 if ( x_show_loado_labels ) { 183 low_x <- -0.7 * lim_x
184 high_x <- 0.7 * lim_x
185 if (projection == 1 && !cplot_x) {
186 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue")
187 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red")
188 }
189 if ( x_show_labels != "0" ) {
190 names(my_load_distal) <- tsv1$featureID
191 names(my_load_proximal) <- tsv1$featureID
192 if ( x_show_labels == "ALL" ) {
193 n_labels <- length(my_load_distal)
194 } else {
195 n_labels <- as.numeric(x_show_labels)
196 }
197 n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 )
84 labels_to_show <- c( 198 labels_to_show <- c(
85 labels_to_show 199 names(head(sort(my_load_distal),n = n_labels))
86 , names(head(sort(my_loado),n = n_labels)) 200 , names(tail(sort(my_load_distal),n = n_labels))
87 , names(tail(sort(my_loado),n = n_labels))
88 ) 201 )
202 labels <- unname(
203 sapply(
204 X = tsv1$featureID
205 , FUN = function(x) if( x %in% labels_to_show ) x else ""
206 )
207 )
208 x_text_offset <- 0.024
209 y_text_off <- 0.017
210 if (!cplot_x) { # S-plot
211 y_text_offset <- if (projection == 1) -y_text_off else y_text_off
212 } else { # C-plot
213 y_text_offset <-
214 sapply(
215 X = (my_y > 0)
216 , FUN = function(x) { if (x) y_text_off else -y_text_off }
217 )
218 }
219 label_features <- function(x_arg, y_arg, labels_arg, slant_arg) {
220 # print("str(x_arg)")
221 # print(str(x_arg))
222 # print("str(y_arg)")
223 # print(str(y_arg))
224 # print("str(labels_arg)")
225 # print(str(labels_arg))
226 if (length(labels_arg) > 0) {
227 unique_slant <- unique(slant_arg)
228 if (length(unique_slant) == 1) {
229 text(
230 y = y_arg
231 , x = x_arg + x_text_offset
232 , cex = 0.4
233 , labels = labels_arg
234 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
235 , srt = slant_arg
236 , adj = 0 # left-justified
237 )
238 } else {
239 for (slant in unique_slant) {
240 text(
241 y = y_arg[slant_arg == slant]
242 , x = x_arg[slant_arg == slant] + x_text_offset
243 , cex = 0.4
244 , labels = labels_arg[slant_arg == slant]
245 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels
246 , srt = slant
247 , adj = 0 # left-justified
248 )
249 }
250 }
251 }
252 }
253 if (!cplot_x) {
254 my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant
255 } else {
256 my_slant <- sapply(
257 X = (my_y > 0)
258 , FUN = function(x) if (x) 2 else -2
259 ) * my_feature_label_slant
260 }
261 if (length(my_x) > 1) {
262 label_features(
263 x_arg = my_x [my_x > 0]
264 , y_arg = my_y [my_x > 0] - y_text_offset
265 , labels_arg = labels[my_x > 0]
266 , slant_arg = (if (!cplot_x) -my_slant else (my_slant))
267 )
268 if (!cplot_x) {
269 label_features(
270 x_arg = my_x [my_x < 0]
271 , y_arg = my_y [my_x < 0] + y_text_offset
272 , labels_arg = labels[my_x < 0]
273 , slant_arg = my_slant
274 )
275 }
276 } else {
277 if (!cplot_x) {
278 my_slant <- (if (my_x > 1) -1 else 1) * my_slant
279 my_y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset
280 } else {
281 my_slant <- (if (my_y > 1) -1 else 1) * my_slant
282 my_y_arg = my_y + (if (my_y > 1) -1 else 1) * y_text_offset
283 }
284 label_features(
285 x_arg = my_x
286 , y_arg = my_y_arg
287 , labels_arg = labels
288 , slant_arg = my_slant
289 )
290 }
89 } 291 }
90 labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" ))
91 text(
92 y = plus_cor - 0.013
93 , x = plus_cov + 0.020
94 , cex = 0.4
95 , labels = labels
96 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # rgb(blue = blue, red = red, green = 0, alpha = 0.2 + 0.8 * alpha)
97 , srt = -30 # slant 30 degrees downward
98 , adj = 0 # left-justified
99 )
100 } 292 }
101 } 293 )
294 return (my_cor_vs_cov)
295 }
296 my_cor_vs_cov <- do_s_plot(
297 x_env = x_env
298 , predictor_projection_x = TRUE
299 , cplot_x = FALSE
102 ) 300 )
103 typeVc <- c("correlation", # 1 301 typeVc <- c("correlation", # 1
104 "outlier", # 2 302 "outlier", # 2
105 "overview", # 3 303 "overview", # 3
106 "permutation", # 4 304 "permutation", # 4
116 if ( length(my_oplsda@orthoVipVn) > 0 ) { 314 if ( length(my_oplsda@orthoVipVn) > 0 ) {
117 my_typevc <- typeVc[c(9,3,8)] 315 my_typevc <- typeVc[c(9,3,8)]
118 } else { 316 } else {
119 my_typevc <- c("(dummy)","overview","(dummy)") 317 my_typevc <- c("(dummy)","overview","(dummy)")
120 } 318 }
319 my_ortho_cor_vs_cov_exists <- FALSE
121 for (my_type in my_typevc) { 320 for (my_type in my_typevc) {
122 if (my_type %in% typeVc) { 321 if (my_type %in% typeVc) {
123 # print(sprintf("plotting type %s", my_type))
124 tryCatch({ 322 tryCatch({
125 plot( 323 if ( my_type != "x-loading" ) {
126 x = my_oplsda 324 plot(
127 , typeVc = my_type 325 x = my_oplsda
128 , parCexN = 0.4 326 , typeVc = my_type
129 , parDevNewL = FALSE 327 , parCexN = 0.4
130 , parLayL = TRUE 328 , parDevNewL = FALSE
131 , parEllipsesL = TRUE 329 , parLayL = TRUE
132 ) 330 , parEllipsesL = TRUE
331 )
332 if (my_type == "overview") {
333 sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2)
334 title(sub = sub_label)
335 }
336 } else {
337 my_ortho_cor_vs_cov <- do_s_plot(
338 x_env = x_env
339 , predictor_projection_x = FALSE
340 , cplot_x = FALSE
341 )
342 my_ortho_cor_vs_cov_exists <- TRUE
343 }
133 }, error = function(e) { 344 }, error = function(e) {
134 x_progress(sprintf("factor level %s or %s may have only one sample", fctr_lvl_1, fctr_lvl_2)) 345 x_progress(
346 sprintf(
347 "factor level %s or %s may have only one sample - %s"
348 , fctr_lvl_1
349 , fctr_lvl_2
350 , e$message
351 )
352 )
135 }) 353 })
136 } else { 354 } else {
137 # print("plotting dummy graph")
138 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") 355 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
139 text(x=1, y=1, labels="no orthogonal projection is possible") 356 text(x=1, y=1, labels="no orthogonal projection is possible")
357 }
358 }
359 cplot_p <- x_env$cplot_p
360 cplot_o <- x_env$cplot_o
361 if (cplot_p || cplot_o) {
362 if (cplot_p) {
363 do_s_plot(
364 x_env = x_env
365 , predictor_projection_x = TRUE
366 , cplot_x = TRUE
367 , cor_vs_cov_x = my_cor_vs_cov
368 )
369 did_plots <- 1
370 } else {
371 did_plots <- 0
372 }
373 if (cplot_o) {
374 if (my_ortho_cor_vs_cov_exists) {
375 do_s_plot(
376 x_env = x_env
377 , predictor_projection_x = FALSE
378 , cplot_x = TRUE
379 , cor_vs_cov_x = my_ortho_cor_vs_cov
380 )
381 } else {
382 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n")
383 text(x=1, y=1, labels="no orthogonal projection is possible")
384 }
385 did_plots <- 1 + did_plots
386 }
387 if (did_plots == 1) {
388 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n", fg = "white")
140 } 389 }
141 } 390 }
142 return (my_cor_vs_cov) 391 return (my_cor_vs_cov)
143 } else { 392 } else {
144 # x_progress(sprintf("x_is_match = %s, ncol(x_dataMatrix) = %d, length(unique(x_predictor)) = %d",x_is_match, ncol(x_dataMatrix), length(unique(x_predictor))))
145 return (NULL) 393 return (NULL)
146 } 394 }
147 } 395 }
148 396
149 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510 397 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510
150 corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = function(t){}, salience_tsv_action = function(t){}) { 398 corcov_calc <- function(
399 calc_env
400 , failure_action = stop
401 , progress_action = function(x) { }
402 , corcov_tsv_action = function(t) { }
403 , salience_tsv_action = function(t) { }
404 , extra_plots = c()
405 ) {
151 if ( ! is.environment(calc_env) ) { 406 if ( ! is.environment(calc_env) ) {
152 failure_action("corcov_calc: fatal error - 'calc_env' is not an environment") 407 failure_action("corcov_calc: fatal error - 'calc_env' is not an environment")
153 return ( FALSE ) 408 return ( FALSE )
154 } 409 }
155 if ( ! is.function(corcov_tsv_action) ) { 410 if ( ! is.function(corcov_tsv_action) ) {
172 # extract the levels from the environment 427 # extract the levels from the environment
173 originalLevCSV <- levCSV <- calc_env$levCSV 428 originalLevCSV <- levCSV <- calc_env$levCSV
174 # matchingC is one of { "none", "wildcard", "regex" } 429 # matchingC is one of { "none", "wildcard", "regex" }
175 matchingC <- calc_env$matchingC 430 matchingC <- calc_env$matchingC
176 labelFeatures <- calc_env$labelFeatures 431 labelFeatures <- calc_env$labelFeatures
177 labelOrthoFeatures <- calc_env$labelOrthoFeatures
178 432
179 # arg/env checking 433 # arg/env checking
180 if (!(facC %in% names(smpl_metadata))) { 434 if (!(facC %in% names(smpl_metadata))) {
181 failure_action(sprintf("bad parameter! Factor name '%s' not found in sampleMetadata", facC)) 435 failure_action(
436 sprintf("bad parameter! Factor name '%s' not found in sampleMetadata"
437 , facC))
182 return ( FALSE ) 438 return ( FALSE )
183 } 439 }
184 440
185 mz <- vrbl_metadata$mz 441 mz <- vrbl_metadata$mz
186 names(mz) <- vrbl_metadata$variableMetadata 442 names(mz) <- vrbl_metadata$variableMetadata
187 mz_lookup <- function(feature) unname(mz[feature]) 443 mz_lookup <- function(feature) unname(mz[feature])
188 444
189 rt <- vrbl_metadata$rt 445 rt <- vrbl_metadata$rt
190 names(rt) <- vrbl_metadata$variableMetadata 446 names(rt) <- vrbl_metadata$variableMetadata
191 rt_lookup <- function(feature) unname(rt[feature]) 447 rt_lookup <- function(feature) unname(rt[feature])
192 448
193 # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv) 449 # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv)
194 salience_df <- calc_env$salience_df <- w4msalience( 450 salience_df <- calc_env$salience_df <- w4msalience(
195 data_matrix = data_matrix 451 data_matrix = data_matrix
196 , sample_class = smpl_metadata[,facC] 452 , sample_class = smpl_metadata[,facC]
197 , failure_action = failure_action 453 , failure_action = failure_action
234 ) 490 )
235 ) 491 )
236 } 492 }
237 493
238 # transpose matrix because ropls matrix is the transpose of XCMS matrix 494 # transpose matrix because ropls matrix is the transpose of XCMS matrix
495 tdm <- t(data_matrix)
239 # Wiklund_2008 centers and pareto-scales data before OPLS-DA S-plot 496 # Wiklund_2008 centers and pareto-scales data before OPLS-DA S-plot
240 # center 497 # However, data should be neither centered nor pareto scaled here because ropls::opls does that; this fixes issue #2.
241 cdm <- center_colmeans(t(data_matrix))
242 # pareto-scale
243 my_scale <- sqrt(apply(cdm, 2, sd, na.rm=TRUE))
244 scdm <- sweep(cdm, 2, my_scale, "/")
245 498
246 # pattern to match column names like k10_kruskal_k4.k3_sig 499 # pattern to match column names like k10_kruskal_k4.k3_sig
247 col_pattern <- sprintf('^%s_%s_(.*)[.](.*)_sig$', facC, tesC) 500 col_pattern <- sprintf('^%s_%s_(.*)[.](.*)_sig$', facC, tesC)
248 # column name like k10_kruskal_sig 501 # column name like k10_kruskal_sig
249 intersample_sig_col <- sprintf('%s_%s_sig', facC, tesC) 502 intersample_sig_col <- sprintf('%s_%s_sig', facC, tesC)
263 did_plot <- FALSE 516 did_plot <- FALSE
264 if (tesC != "none") { 517 if (tesC != "none") {
265 # for each column name, extract the parts of the name matched by 'col_pattern', if any 518 # for each column name, extract the parts of the name matched by 'col_pattern', if any
266 the_colnames <- colnames(vrbl_metadata) 519 the_colnames <- colnames(vrbl_metadata)
267 if ( ! Reduce( f = "||", x = grepl(tesC, the_colnames) ) ) { 520 if ( ! Reduce( f = "||", x = grepl(tesC, the_colnames) ) ) {
268 failure_action(sprintf("bad parameter! variableMetadata must contain results of W4M Univariate test '%s'.", tesC)) 521 failure_action(
522 sprintf(
523 "bad parameter! variableMetadata must contain results of W4M Univariate test '%s'."
524 , tesC))
269 return ( FALSE ) 525 return ( FALSE )
270 } 526 }
271 col_matches <- lapply( 527 col_matches <- lapply(
272 X = the_colnames, 528 X = the_colnames,
273 FUN = function(x) { 529 FUN = function(x) {
290 level_union <- c(level_union, col_match[2], col_match[3]) 546 level_union <- c(level_union, col_match[2], col_match[3])
291 } 547 }
292 } 548 }
293 } 549 }
294 level_union <- unique(sort(level_union)) 550 level_union <- unique(sort(level_union))
295 overall_significant <- 1 == ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE ) 551 overall_significant <- 1 == (
552 if (intersample_sig_col %in% colnames(vrbl_metadata)) {
553 vrbl_metadata[,intersample_sig_col]
554 } else {
555 1
556 }
557 )
296 if ( length(level_union) > 2 ) { 558 if ( length(level_union) > 2 ) {
297 chosen_samples <- smpl_metadata_facC %in% level_union 559 chosen_samples <- smpl_metadata_facC %in% level_union
298 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) 560 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
299 col_selector <- vrbl_metadata_names[ overall_significant ] 561 col_selector <- vrbl_metadata_names[ overall_significant ]
300 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] 562 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ]
301 plot_action <- function(fctr_lvl_1, fctr_lvl_2) { 563 plot_action <- function(fctr_lvl_1, fctr_lvl_2) {
302 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) 564 progress_action(
303 predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 ) 565 sprintf("calculating/plotting contrast of %s vs. %s"
566 , fctr_lvl_1, fctr_lvl_2)
567 )
568 predictor <- sapply(
569 X = chosen_facC
570 , FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2
571 )
304 my_cor_cov <- do_detail_plot( 572 my_cor_cov <- do_detail_plot(
305 x_dataMatrix = my_matrix 573 x_dataMatrix = my_matrix
306 , x_predictor = predictor 574 , x_predictor = predictor
307 , x_is_match = is_match 575 , x_is_match = TRUE
308 , x_algorithm = algoC 576 , x_algorithm = algoC
309 , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" 577 , x_prefix = if (pairSigFeatOnly) {
578 "Significantly contrasting features"
579 } else {
580 "Significant features"
581 }
310 , x_show_labels = labelFeatures 582 , x_show_labels = labelFeatures
311 , x_show_loado_labels = labelOrthoFeatures
312 , x_progress = progress_action 583 , x_progress = progress_action
313 , x_crossval_i = min(7, length(chosen_samples)) 584 , x_crossval_i = min(7, length(chosen_samples))
314 , x_env = calc_env 585 , x_env = calc_env
315 ) 586 )
316 if ( is.null(my_cor_cov) ) { 587 if ( is.null(my_cor_cov) ) {
317 progress_action("NOTHING TO PLOT.") 588 progress_action("NOTHING TO PLOT")
318 } else { 589 } else {
319 my_tsv <- my_cor_cov$tsv1 590 my_tsv <- my_cor_cov$tsv1
320 my_tsv$mz <- mz_lookup(my_tsv$featureID) 591 my_tsv$mz <- mz_lookup(my_tsv$featureID)
321 my_tsv$rt <- rt_lookup(my_tsv$featureID) 592 my_tsv$rt <- rt_lookup(my_tsv$featureID)
322 my_tsv["level1Level2Sig"] <- vrbl_metadata[ match(my_tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] 593 my_tsv["level1Level2Sig"] <- vrbl_metadata[
594 match(my_tsv$featureID, vrbl_metadata_names)
595 , vrbl_metadata_col
596 ]
323 tsv <<- my_tsv 597 tsv <<- my_tsv
324 corcov_tsv_action(tsv) 598 corcov_tsv_action(tsv)
325 did_plot <<- TRUE 599 did_plot <<- TRUE
326 } 600 }
327 } 601 }
346 vrbl_metadata_col <- col_match[1] # ^^^^^^^^^^^^^^^^^^^^^ # Column name 620 vrbl_metadata_col <- col_match[1] # ^^^^^^^^^^^^^^^^^^^^^ # Column name
347 fctr_lvl_1 <- col_match[2] # ^^ # Factor-level 1 621 fctr_lvl_1 <- col_match[2] # ^^ # Factor-level 1
348 fctr_lvl_2 <- col_match[3] # ^^ # Factor-level 2 622 fctr_lvl_2 <- col_match[3] # ^^ # Factor-level 2
349 # only process this column if both factors are members of lvlCSV 623 # only process this column if both factors are members of lvlCSV
350 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) 624 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
351 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) 625 if (is_match) {
352 # TODO delete next line displaying currently-processed column 626 progress_action(
353 # cat(sprintf("%s %s %s %s\n", vrbl_metadata_col, fctr_lvl_1, fctr_lvl_2, is_match)) 627 sprintf("calculating/plotting contrast of %s vs. %s."
354 # choose only samples with one of the two factors for this column 628 , fctr_lvl_1, fctr_lvl_2
355 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) 629 )
356 predictor <- smpl_metadata_facC[chosen_samples] 630 )
357 # extract only the significantly-varying features and the chosen samples 631 # choose only samples with one of the two factors for this column
358 fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE ) 632 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
359 col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ] 633 predictor <- smpl_metadata_facC[chosen_samples]
360 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] 634 # extract only the significantly-varying features and the chosen samples
361 my_cor_cov <- do_detail_plot( 635 fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] *
362 x_dataMatrix = my_matrix 636 ( if (intersample_sig_col %in% colnames(vrbl_metadata)) {
363 , x_predictor = predictor 637 vrbl_metadata[,intersample_sig_col]
364 , x_is_match = is_match 638 } else {
365 , x_algorithm = algoC 639 1
366 , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" 640 }
367 , x_show_labels = labelFeatures 641 )
368 , x_show_loado_labels = labelOrthoFeatures 642 col_selector <- vrbl_metadata_names[
369 , x_progress = progress_action 643 if ( pairSigFeatOnly ) fully_significant else overall_significant
370 , x_crossval_i = min(7, length(chosen_samples)) 644 ]
371 , x_env = calc_env 645 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ]
372 ) 646 my_cor_cov <- do_detail_plot(
373 if ( is.null(my_cor_cov) ) { 647 x_dataMatrix = my_matrix
374 progress_action("NOTHING TO PLOT.") 648 , x_predictor = predictor
649 , x_is_match = is_match
650 , x_algorithm = algoC
651 , x_prefix = if (pairSigFeatOnly) {
652 "Significantly contrasting features"
653 } else {
654 "Significant features"
655 }
656 , x_show_labels = labelFeatures
657 , x_progress = progress_action
658 , x_crossval_i = min(7, length(chosen_samples))
659 , x_env = calc_env
660 )
661 if ( is.null(my_cor_cov) ) {
662 progress_action("NOTHING TO PLOT.")
663 } else {
664 tsv <- my_cor_cov$tsv1
665 tsv$mz <- mz_lookup(tsv$featureID)
666 tsv$rt <- rt_lookup(tsv$featureID)
667 tsv["level1Level2Sig"] <- vrbl_metadata[
668 match(tsv$featureID, vrbl_metadata_names)
669 , vrbl_metadata_col
670 ]
671 corcov_tsv_action(tsv)
672 did_plot <- TRUE
673 }
375 } else { 674 } else {
376 tsv <- my_cor_cov$tsv1 675 progress_action(
377 tsv$mz <- mz_lookup(tsv$featureID) 676 sprintf("skipping contrast of %s vs. %s."
378 tsv$rt <- rt_lookup(tsv$featureID) 677 , fctr_lvl_1, fctr_lvl_2
379 tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] 678 )
380 corcov_tsv_action(tsv) 679 )
381 did_plot <- TRUE
382 } 680 }
383 } 681 }
384 } 682 }
385 } 683 }
386 } else { # tesC == "none" 684 } else { # tesC == "none"
685 # find all the levels for factor facC in sampleMetadata
387 level_union <- unique(sort(smpl_metadata_facC)) 686 level_union <- unique(sort(smpl_metadata_facC))
687 # identify the selected levels for factor facC from sampleMetadata
688 level_include <- sapply(X = level_union, FUN = isLevelSelected)
689 # discard the non-selected levels for factor facC
690 level_union <- level_union[level_include]
388 if ( length(level_union) > 1 ) { 691 if ( length(level_union) > 1 ) {
389 if ( length(level_union) > 2 ) { 692 if ( length(level_union) > 2 ) {
390 ## pass 1 - contrast each selected level with all other levels combined into one "super-level" ## 693 ## pass 1 - contrast each selected level with all other levels combined into one "super-level" ##
391 completed <- c() 694 completed <- c()
392 lapply( 695 lapply(
393 X = level_union 696 X = level_union
394 , FUN = function(x) { 697 , FUN = function(x) {
395 fctr_lvl_1 <- x[1] 698 fctr_lvl_1 <- x[1]
396 fctr_lvl_2 <- { 699 fctr_lvl_2 <- {
397 if ( fctr_lvl_1 %in% completed ) 700 if ( fctr_lvl_1 %in% completed )
398 return("DUMMY") 701 return("DUMMY")
399 # strF(completed) 702 # strF(completed)
400 completed <<- c(completed, fctr_lvl_1) 703 completed <<- c(completed, fctr_lvl_1)
401 setdiff(level_union, fctr_lvl_1) 704 setdiff(level_union, fctr_lvl_1)
402 } 705 }
403 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) 706 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
404 fctr_lvl_2 <- "other" 707 fctr_lvl_2 <- "other"
405 # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) )
406 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
407 if (length(unique(chosen_samples)) < 1) { 708 if (length(unique(chosen_samples)) < 1) {
408 progress_action("NOTHING TO PLOT...") 709 progress_action(
710 sprintf("Skipping contrast of %s vs. %s; there are no chosen samples."
711 , fctr_lvl_1, fctr_lvl_2)
712 )
409 } else { 713 } else {
410 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) 714 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
411 predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 ) 715 predictor <- sapply(
412 my_matrix <- scdm[ chosen_samples, , drop = FALSE ] 716 X = chosen_facC
717 , FUN = function(fac) {
718 if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2
719 }
720 )
721 my_matrix <- tdm[ chosen_samples, , drop = FALSE ]
413 # only process this column if both factors are members of lvlCSV 722 # only process this column if both factors are members of lvlCSV
414 is_match <- isLevelSelected(fctr_lvl_1) 723 is_match <- isLevelSelected(fctr_lvl_1)
724 if (is_match) {
725 progress_action(
726 sprintf("Calculating/plotting contrast of %s vs. %s"
727 , fctr_lvl_1, fctr_lvl_2)
728 )
729 my_cor_cov <- do_detail_plot(
730 x_dataMatrix = my_matrix
731 , x_predictor = predictor
732 , x_is_match = is_match
733 , x_algorithm = algoC
734 , x_prefix = "Features"
735 , x_show_labels = labelFeatures
736 , x_progress = progress_action
737 , x_crossval_i = min(7, length(chosen_samples))
738 , x_env = calc_env
739 )
740 if ( is.null(my_cor_cov) ) {
741 progress_action("NOTHING TO PLOT...")
742 } else {
743 tsv <- my_cor_cov$tsv1
744 tsv$mz <- mz_lookup(tsv$featureID)
745 tsv$rt <- rt_lookup(tsv$featureID)
746 corcov_tsv_action(tsv)
747 did_plot <<- TRUE
748 }
749 } else {
750 }
751 }
752 "dummy" # need to return a value; otherwise combn fails with an error
753 }
754 )
755 }
756 ## pass 2 - contrast each selected level with each of the other levels individually ##
757 completed <- c()
758 utils::combn(
759 x = level_union
760 , m = 2
761 , FUN = function(x) {
762 fctr_lvl_1 <- x[1]
763 fctr_lvl_2 <- x[2]
764 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
765 if (length(unique(chosen_samples)) < 1) {
766 progress_action(
767 sprintf("Skipping contrast of %s vs. %s. - There are no chosen samples."
768 , fctr_lvl_1, fctr_lvl_2
769 )
770 )
771 } else {
772 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
773 predictor <- chosen_facC
774 my_matrix <- tdm[ chosen_samples, , drop = FALSE ]
775 # only process this column if both factors are members of lvlCSV
776 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
777 if (is_match) {
778 progress_action(
779 sprintf("Calculating/plotting contrast of %s vs. %s."
780 , fctr_lvl_1, fctr_lvl_2)
781 )
415 my_cor_cov <- do_detail_plot( 782 my_cor_cov <- do_detail_plot(
416 x_dataMatrix = my_matrix 783 x_dataMatrix = my_matrix
417 , x_predictor = predictor 784 , x_predictor = predictor
418 , x_is_match = is_match 785 , x_is_match = is_match
419 , x_algorithm = algoC 786 , x_algorithm = algoC
420 , x_prefix = "Features" 787 , x_prefix = "Features"
421 , x_show_labels = labelFeatures 788 , x_show_labels = labelFeatures
422 , x_show_loado_labels = labelOrthoFeatures
423 , x_progress = progress_action 789 , x_progress = progress_action
424 , x_crossval_i = min(7, length(chosen_samples)) 790 , x_crossval_i = min(7, length(chosen_samples))
425 , x_env = calc_env 791 , x_env = calc_env
426 ) 792 )
427 if ( is.null(my_cor_cov) ) { 793 if ( is.null(my_cor_cov) ) {
428 progress_action("NOTHING TO PLOT") 794 progress_action("NOTHING TO PLOT.....")
429 } else { 795 } else {
430 tsv <- my_cor_cov$tsv1 796 tsv <- my_cor_cov$tsv1
431 tsv$mz <- mz_lookup(tsv$featureID) 797 tsv$mz <- mz_lookup(tsv$featureID)
432 tsv$rt <- rt_lookup(tsv$featureID) 798 tsv$rt <- rt_lookup(tsv$featureID)
433 corcov_tsv_action(tsv) 799 corcov_tsv_action(tsv)
434 did_plot <<- TRUE 800 did_plot <<- TRUE
435 } 801 }
436 } 802 } else {
437 #print("baz") 803 progress_action(
438 "dummy" # need to return a value; otherwise combn fails with an error 804 sprintf("Skipping contrast of %s vs. %s."
805 , fctr_lvl_1, fctr_lvl_2
806 )
807 )
808 }
439 } 809 }
440 )
441 }
442 ## pass 2 - contrast each selected level with each of the other levels individually ##
443 completed <- c()
444 utils::combn(
445 x = level_union
446 , m = 2
447 , FUN = function(x) {
448 fctr_lvl_1 <- x[1]
449 fctr_lvl_2 <- x[2]
450 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
451 # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) )
452 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
453 if (length(unique(chosen_samples)) < 1) {
454 progress_action("NOTHING TO PLOT...")
455 } else {
456 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
457 predictor <- chosen_facC
458 my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
459 # only process this column if both factors are members of lvlCSV
460 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
461 my_cor_cov <- do_detail_plot(
462 x_dataMatrix = my_matrix
463 , x_predictor = predictor
464 , x_is_match = is_match
465 , x_algorithm = algoC
466 , x_prefix = "Features"
467 , x_show_labels = labelFeatures
468 , x_show_loado_labels = labelOrthoFeatures
469 , x_progress = progress_action
470 , x_crossval_i = min(7, length(chosen_samples))
471 , x_env = calc_env
472 )
473 if ( is.null(my_cor_cov) ) {
474 progress_action("NOTHING TO PLOT")
475 } else {
476 tsv <- my_cor_cov$tsv1
477 tsv$mz <- mz_lookup(tsv$featureID)
478 tsv$rt <- rt_lookup(tsv$featureID)
479 corcov_tsv_action(tsv)
480 did_plot <<- TRUE
481 }
482 }
483 #print("baz")
484 "dummy" # need to return a value; otherwise combn fails with an error 810 "dummy" # need to return a value; otherwise combn fails with an error
485 } 811 }
486 ) 812 )
487 } else { 813 } else {
488 progress_action("NOTHING TO PLOT....") 814 progress_action("NOTHING TO PLOT......")
489 } 815 }
490 } 816 }
491 if (!did_plot) { 817 if (!did_plot) {
492 failure_action(sprintf("bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'", facC, originalLevCSV)) 818 failure_action(
819 sprintf(
820 "bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'"
821 , facC, originalLevCSV))
493 return ( FALSE ) 822 return ( FALSE )
494 } 823 }
495 return ( TRUE ) 824 return ( TRUE )
496 } 825 }
497 826
498 # Calculate data for correlation-versus-covariance plot 827 # Calculate data for correlation-versus-covariance plot
499 # Adapted from: 828 # Adapted from:
500 # Wiklund_2008 doi:10.1021/ac0713510 829 # Wiklund_2008 doi:10.1021/ac0713510
501 # Galindo_Prieto_2014 doi:10.1002/cem.2627 830 # Galindo_Prieto_2014 doi:10.1002/cem.2627
502 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R 831 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R
503 cor_vs_cov <- function(matrix_x, ropls_x) { 832 cor_vs_cov <- function(
833 matrix_x
834 , ropls_x
835 , predictor_projection_x = TRUE
836 , x_progress = print
837 ) {
838 tryCatch({
839 return(
840 cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress)
841 )
842 }, error = function(e) {
843 x_progress(
844 sprintf(
845 "cor_vs_cov fatal error - %s"
846 , as.character(e) # e$message
847 )
848 )
849 return ( NULL )
850 })
851 }
852
853 cor_vs_cov_try <- function(
854 matrix_x
855 , ropls_x
856 , predictor_projection_x = TRUE
857 , x_progress = print
858 ) {
504 x_class <- class(ropls_x) 859 x_class <- class(ropls_x)
505 if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) 860 if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) )
506 stop( "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class ", as.character(x_class) ) 861 stop(
862 paste(
863 "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class "
864 , as.character(x_class)
865 )
866 )
507 } 867 }
508 result <- list() 868 result <- list()
869 result$projection <- projection <- if (predictor_projection_x) 1 else 2
509 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS 870 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS
510 if ( ropls_x@suppLs$algoC == "nipals") { 871 if ( ropls_x@suppLs$algoC == "nipals") {
511 # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510 872 # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510
512 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) 873 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional))
513 mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x])) 874 mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x]))
514 score_matrix <- ropls_x@scoreMN 875 if (predictor_projection_x)
876 score_matrix <- ropls_x@scoreMN
877 else
878 score_matrix <- ropls_x@orthoScoreMN
515 score_matrix_transposed <- t(score_matrix) 879 score_matrix_transposed <- t(score_matrix)
516 score_matrix_magnitude <- mag(score_matrix) 880 score_matrix_magnitude <- mag(score_matrix)
517 result$covariance <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) 881 result$covariance <-
518 result$correlation <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi ) 882 score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude )
883 result$correlation <-
884 score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi )
519 } else { 885 } else {
520 # WARNING - untested code - I don't have test data to exercise this branch 886 # WARNING - untested code - I don't have test data to exercise this branch
521 # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 887 # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510
522 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F 888 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F
523 score_matrix <- ropls_x@scoreMN 889 if (predictor_projection_x)
890 score_matrix <- ropls_x@scoreMN
891 else
892 score_matrix <- ropls_x@orthoScoreMN
524 score_matrix_transposed <- t(score_matrix) 893 score_matrix_transposed <- t(score_matrix)
525 cov_divisor <- nrow(matrix_x) - 1 894 cov_divisor <- nrow(matrix_x) - 1
526 result$covariance <- sapply( 895 result$covariance <- sapply(
527 X = 1:ncol(matrix_x) 896 X = 1:ncol(matrix_x)
528 , FUN = function(x) score_matrix_transposed %*% matrix_x[,x] / cov_divisor 897 , FUN = function(x) score_matrix_transposed %*% matrix_x[,x] / cov_divisor
538 , FUN = function(x) { 907 , FUN = function(x) {
539 ( score_matrix_transposed / score_sd ) %*% ( matrix_x[,x] / (xSdVn[x] * cov_divisor) ) 908 ( score_matrix_transposed / score_sd ) %*% ( matrix_x[,x] / (xSdVn[x] * cov_divisor) )
540 } 909 }
541 ) 910 )
542 } 911 }
543 result$correlation <- result$correlation[1,,drop = TRUE] 912 result$correlation <- result$correlation[ 1, , drop = TRUE ]
544 result$covariance <- result$covariance[1,,drop = TRUE] 913 result$covariance <- result$covariance [ 1, , drop = TRUE ]
545 914
546 # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014 915 # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014
547 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) 916 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.)
548 result$vip4p <- as.numeric(ropls_x@vipVn) 917 result$vip4p <- as.numeric(ropls_x@vipVn)
549 result$vip4o <- as.numeric(ropls_x@orthoVipVn) 918 result$vip4o <- as.numeric(ropls_x@orthoVipVn)
554 fctr_lvl_1 <- level_names[1] 923 fctr_lvl_1 <- level_names[1]
555 fctr_lvl_2 <- level_names[2] 924 fctr_lvl_2 <- level_names[2]
556 feature_count <- length(ropls_x@vipVn) 925 feature_count <- length(ropls_x@vipVn)
557 result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count) 926 result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count)
558 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) 927 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count)
559 # print(sprintf("sd(covariance) = %f; sd(correlation) = %f", sd(result$covariance), sd(result$correlation)))
560 superresult <- list() 928 superresult <- list()
561 if (length(result$vip4o) == 0) result$vip4o <- NA 929 if (length(result$vip4o) == 0) result$vip4o <- NA
562 greaterLevel <- sapply( X = result$correlation, FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 ) 930 greaterLevel <- sapply(
563 superresult$tsv1 <- data.frame( 931 X = result$correlation
564 featureID = names(ropls_x@vipVn) 932 , FUN = function(my_corr)
933 tryCatch({
934 if ( is.nan( my_corr ) ) {
935 print("my_corr is NaN")
936 NA
937 } else {
938 if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2
939 }
940 }, error = function(e) {
941 x_progress(
942 sprintf(
943 "cor_vs_cov -> sapply: error - substituting NA - %s"
944 , as.character(e)
945 )
946 )
947 NA
948 })
949 )
950
951 # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
952 featureID <- names(ropls_x@vipVn)
953 greaterLevel <- greaterLevel[featureID]
954 result$correlation <- result$correlation[featureID]
955 result$covariance <- result$covariance[featureID]
956 # end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1
957
958 tsv1 <- data.frame(
959 featureID = featureID
565 , factorLevel1 = result$level1 960 , factorLevel1 = result$level1
566 , factorLevel2 = result$level2 961 , factorLevel2 = result$level2
567 , greaterLevel = greaterLevel 962 , greaterLevel = greaterLevel
963 , projection = result$projection
568 , correlation = result$correlation 964 , correlation = result$correlation
569 , covariance = result$covariance 965 , covariance = result$covariance
570 , vip4p = result$vip4p 966 , vip4p = result$vip4p
571 , vip4o = result$vip4o 967 , vip4o = result$vip4o
572 , loadp = result$loadp 968 , loadp = result$loadp
573 , loado = result$loado 969 , loado = result$loado
574 , row.names = NULL 970 , row.names = NULL
575 ) 971 )
576 rownames(superresult$tsv1) <- superresult$tsv1$featureID 972 tsv1 <- tsv1[!is.na(tsv1$correlation),]
973 tsv1 <- tsv1[!is.na(tsv1$covariance),]
974 superresult$tsv1 <- tsv1
975 rownames(superresult$tsv1) <- tsv1$featureID
976 superresult$projection <- result$projection
577 superresult$covariance <- result$covariance 977 superresult$covariance <- result$covariance
578 superresult$correlation <- result$correlation 978 superresult$correlation <- result$correlation
579 superresult$vip4p <- result$vip4p 979 superresult$vip4p <- result$vip4p
580 superresult$vip4o <- result$vip4o 980 superresult$vip4o <- result$vip4o
581 superresult$loadp <- result$loadp 981 superresult$loadp <- result$loadp
582 superresult$loado <- result$loado 982 superresult$loado <- result$loado
583 superresult$details <- result 983 superresult$details <- result
584 # #print(superresult$tsv1)
585 result$superresult <- superresult 984 result$superresult <- superresult
586 # Include thise in case future consumers of this routine want to use it in currently unanticipated ways 985 # Include thise in case future consumers of this routine want to use it in currently unanticipated ways
587 result$oplsda <- ropls_x 986 result$oplsda <- ropls_x
588 result$predictor <- ropls_x@suppLs$y # in case future consumers of this routine want to use it in currently unanticipated ways 987 result$predictor <- ropls_x@suppLs$y # in case future consumers of this routine want to use it in currently unanticipated ways
589 return (superresult) 988 return (superresult)
590 } 989 }
591 990
592 991 # vim: sw=2 ts=2 et :