Mercurial > repos > eschen42 > w4mcorcov
comparison w4mcorcov_calc.R @ 6:0b49916c5c52 draft
planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit 4428e3252d54c8a8e0e5d85e8eaaeb13e9b21de7
author | eschen42 |
---|---|
date | Wed, 05 Sep 2018 19:24:47 -0400 |
parents | 1d046f648b47 |
children | ca9938f2eb6a |
comparison
equal
deleted
inserted
replaced
5:1d046f648b47 | 6:0b49916c5c52 |
---|---|
5 } | 5 } |
6 | 6 |
7 #### OPLS-DA | 7 #### OPLS-DA |
8 algoC <- "nipals" | 8 algoC <- "nipals" |
9 | 9 |
10 do_detail_plot <- function(x_dataMatrix, x_predictor, x_is_match, x_algorithm, x_prefix, x_show_labels, x_show_loado_labels, x_progress = print, x_env, x_crossval_i) { | 10 do_detail_plot <- function( |
11 x_dataMatrix | |
12 , x_predictor | |
13 , x_is_match | |
14 , x_algorithm | |
15 , x_prefix | |
16 , x_show_labels | |
17 , x_progress = print | |
18 , x_env | |
19 , x_crossval_i | |
20 ) { | |
11 off <- function(x) if (x_show_labels == "0") 0 else x | 21 off <- function(x) if (x_show_labels == "0") 0 else x |
12 if ( x_is_match && ncol(x_dataMatrix) > 0 && length(unique(x_predictor))> 1 && x_crossval_i < nrow(x_dataMatrix) ) { | 22 if ( x_is_match |
23 && ncol(x_dataMatrix) > 0 | |
24 && length(unique(x_predictor))> 1 | |
25 && x_crossval_i < nrow(x_dataMatrix) | |
26 ) { | |
13 my_oplsda <- opls( | 27 my_oplsda <- opls( |
14 x = x_dataMatrix | 28 x = x_dataMatrix |
15 , y = x_predictor | 29 , y = x_predictor |
16 , algoC = x_algorithm | 30 , algoC = x_algorithm |
17 , predI = 1 | 31 , predI = 1 |
18 , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0 | 32 , orthoI = if (ncol(x_dataMatrix) > 1) 1 else 0 |
19 , printL = FALSE | 33 , printL = FALSE |
20 , plotL = FALSE | 34 , plotL = FALSE |
21 , crossvalI = x_crossval_i | 35 , crossvalI = x_crossval_i |
36 , scaleC = "pareto" # data centered and pareto scaled here only. This line fixes issue #2. | |
22 ) | 37 ) |
38 # strip out variables having negligible variance | |
39 x_dataMatrix <- x_dataMatrix[,names(my_oplsda@vipVn), drop = FALSE] | |
23 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) | 40 my_oplsda_suppLs_y_levels <- levels(as.factor(my_oplsda@suppLs$y)) |
41 # x_progress(strF(x_dataMatrix)) | |
42 # x_progress(strF(my_oplsda)) | |
43 #x_progress(head(my_oplsda_suppLs_y_levels)) | |
44 #x_progress(unique(my_oplsda_suppLs_y_levels)) | |
24 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] | 45 fctr_lvl_1 <- my_oplsda_suppLs_y_levels[1] |
25 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] | 46 fctr_lvl_2 <- my_oplsda_suppLs_y_levels[2] |
26 my_cor_vs_cov <- cor_vs_cov( | 47 do_s_plot <- function( |
27 matrix_x = x_dataMatrix | 48 x_env |
28 , ropls_x = my_oplsda | 49 , predictor_projection_x = TRUE |
50 , cplot_x = FALSE | |
51 , cor_vs_cov_x = NULL | |
29 ) | 52 ) |
30 with( | 53 { |
31 my_cor_vs_cov | 54 #print(ls(x_env)) # "cplot_y" etc |
32 , { | 55 #print(str(x_env$cplot_y)) # chr "covariance" |
33 min_x <- min(covariance) | 56 if (cplot_x) { |
34 max_x <- max(covariance) | 57 #print(x_env$cplot_y) # "covariance" |
35 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) | 58 cplot_y_correlation <- (x_env$cplot_y == "correlation") |
36 covariance <- covariance / lim_x | 59 #print(cplot_y_correlation) # FALSE |
37 lim_x <- 1.2 | 60 } |
38 main_label <- sprintf("%s for level %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2) | 61 if (is.null(cor_vs_cov_x)) { |
39 main_cex <- min(1.0, 46.0/nchar(main_label)) | 62 my_cor_vs_cov <- cor_vs_cov( |
40 # "It is generally accepted that a variable should be selected if vj>1, [27–29], | 63 matrix_x = x_dataMatrix |
41 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." | 64 , ropls_x = my_oplsda |
42 # (Mehmood 2012 doi:10.1186/1748-7188-6-27) | 65 , predictor_projection_x = predictor_projection_x |
43 vipco <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) | 66 , x_progress |
44 alpha <- 0.1 + 0.4 * vipco | 67 ) |
45 red <- as.numeric(correlation > 0) * vipco | 68 } else { |
46 blue <- as.numeric(correlation < 0) * vipco | 69 my_cor_vs_cov <- cor_vs_cov_x |
47 plus_cor <- correlation | 70 } |
48 plus_cov <- covariance | 71 # print("str(my_cor_vs_cov)") |
49 cex <- 0.75 | 72 # str(my_cor_vs_cov) |
50 plot( | 73 if (is.null(my_cor_vs_cov) || sum(!is.na(my_cor_vs_cov$tsv1$covariance)) < 2) { |
51 y = plus_cor | 74 if (is.null(cor_vs_cov_x)) { |
52 , x = plus_cov | 75 x_progress("No cor_vs_cov data produced") |
53 , type="p" | 76 } |
54 , xlim=c( -lim_x - off(0.2), lim_x + off(0.2) ) | 77 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") |
55 , ylim=c( -1.0 - off(0.2), 1.0 + off(0.2) ) | 78 text(x=1, y=1, labels="too few covariance data") |
56 , xlab = sprintf("relative covariance(feature,t1)") | 79 return(my_cor_vs_cov) |
57 , ylab = sprintf("correlation(feature,t1)") | 80 } |
58 , main = main_label | 81 with( |
59 , cex.main = main_cex | 82 my_cor_vs_cov |
60 , cex = cex | 83 , { |
61 , pch = 16 | 84 min_x <- min(covariance, na.rm = TRUE) |
62 , col = rgb(blue = blue, red = red, green = 0, alpha = alpha) | 85 max_x <- max(covariance, na.rm = TRUE) |
63 ) | 86 lim_x <- max(sapply(X=c(min_x, max_x), FUN=abs)) |
64 low_x <- -0.7 * lim_x | 87 covariance <- covariance / lim_x |
65 high_x <- 0.7 * lim_x | 88 lim_x <- 1.2 |
66 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") | 89 # "It is generally accepted that a variable should be selected if vj>1, [27–29], |
67 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") | 90 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." |
68 if ( x_show_labels != "0" ) { | 91 # (Mehmood 2012 doi:10.1186/1748-7188-6-27) |
69 my_loadp <- loadp | 92 plus_cor <- correlation |
70 my_loado <- loado | 93 plus_cov <- covariance |
71 names(my_loadp) <- tsv1$featureID | 94 cex <- 0.65 |
72 names(my_loado) <- tsv1$featureID | 95 which_projection <- if (projection == 1) "t1" else "o1" |
73 if ( x_show_labels == "ALL" ) { | 96 which_loading <- if (projection == 1) "parallel" else "orthogonal" |
74 n_labels <- length(loadp) | 97 if (projection == 1) { # predictor-projection |
75 } else { | 98 vipcp <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) |
76 n_labels <- as.numeric(x_show_labels) | 99 if (!cplot_x) { # S-plot predictor-projection |
100 my_xlab <- "relative covariance(feature,t1)" | |
101 my_x <- plus_cov | |
102 my_ylab <- "correlation(feature,t1)" | |
103 my_y <- plus_cor | |
104 } else { # C-plot predictor-projection | |
105 my_xlab <- "variable importance in predictor-projection" | |
106 my_x <- vip4p | |
107 if (cplot_y_correlation) { | |
108 my_ylab <- "correlation(feature,t1)" | |
109 my_y <- plus_cor | |
110 } else { | |
111 my_ylab <- "relative covariance(feature,t1)" | |
112 my_y <- plus_cov | |
113 } | |
114 } | |
115 if (cplot_x) { | |
116 lim_x <- max(my_x, na.rm = TRUE) * 1.1 | |
117 my_xlim <- c( 0, lim_x + off(0.2) ) | |
118 } else { | |
119 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) | |
120 } | |
121 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) | |
122 my_load_distal <- loadp | |
123 my_load_proximal <- loado | |
124 red <- as.numeric(correlation > 0) * vipcp | |
125 blue <- as.numeric(correlation < 0) * vipcp | |
126 alpha <- 0.1 + 0.4 * vipcp | |
127 red[is.na(red)] <- 0 | |
128 blue[is.na(blue)] <- 0 | |
129 alpha[is.na(alpha)] <- 0 | |
130 my_col <- rgb(blue = blue, red = red, green = 0, alpha = alpha) | |
131 main_label <- sprintf("%s for level %s versus %s" | |
132 , x_prefix, fctr_lvl_1, fctr_lvl_2) | |
133 } else { # orthogonal projection | |
134 vipco <- pmax(0, pmin(1,(vip4o-0.83)/(1.21-0.83))) | |
135 if (!cplot_x) { | |
136 my_xlab <- "relative covariance(feature,to1)" | |
137 my_x <- -plus_cov | |
138 } else { | |
139 my_xlab <- "variable importance in orthogonal projection" | |
140 my_x <- vip4o | |
141 } | |
142 if (!cplot_x) { # S-plot orthogonal projection | |
143 my_xlim <- c( -lim_x - off(0.2), lim_x + off(0.2) ) | |
144 my_ylab <- "correlation(feature,to1)" | |
145 my_y <- plus_cor | |
146 } else { # C-plot orthogonal projection | |
147 lim_x <- max(my_x, na.rm = TRUE) * 1.1 | |
148 my_xlim <- c( 0, lim_x + off(0.2) ) | |
149 if (cplot_y_correlation) { | |
150 my_ylab <- "correlation(feature,to1)" | |
151 my_y <- plus_cor | |
152 } else { | |
153 my_ylab <- "relative covariance(feature,to1)" | |
154 my_y <- plus_cov | |
155 } | |
156 } | |
157 my_ylim <- c( -1.0 - off(0.2), 1.0 + off(0.2) ) | |
158 my_load_distal <- loado | |
159 my_load_proximal <- loadp | |
160 alpha <- 0.1 + 0.4 * vipco | |
161 alpha[is.na(alpha)] <- 0 | |
162 my_col <- rgb(blue = 0, red = 0, green = 0, alpha = alpha) | |
163 main_label <- sprintf( | |
164 "Features influencing orthogonal projection for %s versus %s" | |
165 , fctr_lvl_1, fctr_lvl_2) | |
77 } | 166 } |
78 n_labels <- min( n_labels, (1 + length(loadp)) / 2 ) | 167 main_cex <- min(1.0, 46.0/nchar(main_label)) |
79 labels_to_show <- c( | 168 my_feature_label_slant <- -30 # slant feature labels 30 degrees downward |
80 names(head(sort(my_loadp),n = n_labels)) | 169 plot( |
81 , names(tail(sort(my_loadp),n = n_labels)) | 170 y = my_y |
171 , x = my_x | |
172 , type = "p" | |
173 , xlim = my_xlim | |
174 , ylim = my_ylim | |
175 , xlab = my_xlab | |
176 , ylab = my_ylab | |
177 , main = main_label | |
178 , cex.main = main_cex | |
179 , cex = cex | |
180 , pch = 16 | |
181 , col = my_col | |
82 ) | 182 ) |
83 if ( x_show_loado_labels ) { | 183 low_x <- -0.7 * lim_x |
184 high_x <- 0.7 * lim_x | |
185 if (projection == 1 && !cplot_x) { | |
186 text(x = low_x, y = -0.05, labels = fctr_lvl_1, col = "blue") | |
187 text(x = high_x, y = 0.05, labels = fctr_lvl_2, col = "red") | |
188 } | |
189 if ( x_show_labels != "0" ) { | |
190 names(my_load_distal) <- tsv1$featureID | |
191 names(my_load_proximal) <- tsv1$featureID | |
192 if ( x_show_labels == "ALL" ) { | |
193 n_labels <- length(my_load_distal) | |
194 } else { | |
195 n_labels <- as.numeric(x_show_labels) | |
196 } | |
197 n_labels <- min( n_labels, (1 + length(my_load_distal)) / 2 ) | |
84 labels_to_show <- c( | 198 labels_to_show <- c( |
85 labels_to_show | 199 names(head(sort(my_load_distal),n = n_labels)) |
86 , names(head(sort(my_loado),n = n_labels)) | 200 , names(tail(sort(my_load_distal),n = n_labels)) |
87 , names(tail(sort(my_loado),n = n_labels)) | |
88 ) | 201 ) |
202 labels <- unname( | |
203 sapply( | |
204 X = tsv1$featureID | |
205 , FUN = function(x) if( x %in% labels_to_show ) x else "" | |
206 ) | |
207 ) | |
208 x_text_offset <- 0.024 | |
209 y_text_off <- 0.017 | |
210 if (!cplot_x) { # S-plot | |
211 y_text_offset <- if (projection == 1) -y_text_off else y_text_off | |
212 } else { # C-plot | |
213 y_text_offset <- | |
214 sapply( | |
215 X = (my_y > 0) | |
216 , FUN = function(x) { if (x) y_text_off else -y_text_off } | |
217 ) | |
218 } | |
219 label_features <- function(x_arg, y_arg, labels_arg, slant_arg) { | |
220 # print("str(x_arg)") | |
221 # print(str(x_arg)) | |
222 # print("str(y_arg)") | |
223 # print(str(y_arg)) | |
224 # print("str(labels_arg)") | |
225 # print(str(labels_arg)) | |
226 if (length(labels_arg) > 0) { | |
227 unique_slant <- unique(slant_arg) | |
228 if (length(unique_slant) == 1) { | |
229 text( | |
230 y = y_arg | |
231 , x = x_arg + x_text_offset | |
232 , cex = 0.4 | |
233 , labels = labels_arg | |
234 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels | |
235 , srt = slant_arg | |
236 , adj = 0 # left-justified | |
237 ) | |
238 } else { | |
239 for (slant in unique_slant) { | |
240 text( | |
241 y = y_arg[slant_arg == slant] | |
242 , x = x_arg[slant_arg == slant] + x_text_offset | |
243 , cex = 0.4 | |
244 , labels = labels_arg[slant_arg == slant] | |
245 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # grey semi-transparent labels | |
246 , srt = slant | |
247 , adj = 0 # left-justified | |
248 ) | |
249 } | |
250 } | |
251 } | |
252 } | |
253 if (!cplot_x) { | |
254 my_slant <- (if (projection == 1) 1 else -1) * my_feature_label_slant | |
255 } else { | |
256 my_slant <- sapply( | |
257 X = (my_y > 0) | |
258 , FUN = function(x) if (x) 2 else -2 | |
259 ) * my_feature_label_slant | |
260 } | |
261 if (length(my_x) > 1) { | |
262 label_features( | |
263 x_arg = my_x [my_x > 0] | |
264 , y_arg = my_y [my_x > 0] - y_text_offset | |
265 , labels_arg = labels[my_x > 0] | |
266 , slant_arg = (if (!cplot_x) -my_slant else (my_slant)) | |
267 ) | |
268 if (!cplot_x) { | |
269 label_features( | |
270 x_arg = my_x [my_x < 0] | |
271 , y_arg = my_y [my_x < 0] + y_text_offset | |
272 , labels_arg = labels[my_x < 0] | |
273 , slant_arg = my_slant | |
274 ) | |
275 } | |
276 } else { | |
277 if (!cplot_x) { | |
278 my_slant <- (if (my_x > 1) -1 else 1) * my_slant | |
279 my_y_arg = my_y + (if (my_x > 1) -1 else 1) * y_text_offset | |
280 } else { | |
281 my_slant <- (if (my_y > 1) -1 else 1) * my_slant | |
282 my_y_arg = my_y + (if (my_y > 1) -1 else 1) * y_text_offset | |
283 } | |
284 label_features( | |
285 x_arg = my_x | |
286 , y_arg = my_y_arg | |
287 , labels_arg = labels | |
288 , slant_arg = my_slant | |
289 ) | |
290 } | |
89 } | 291 } |
90 labels <- unname(sapply( X = tsv1$featureID, FUN = function(x) if( x %in% labels_to_show ) x else "" )) | |
91 text( | |
92 y = plus_cor - 0.013 | |
93 , x = plus_cov + 0.020 | |
94 , cex = 0.4 | |
95 , labels = labels | |
96 , col = rgb(blue = 0, red = 0, green = 0, alpha = 0.5) # rgb(blue = blue, red = red, green = 0, alpha = 0.2 + 0.8 * alpha) | |
97 , srt = -30 # slant 30 degrees downward | |
98 , adj = 0 # left-justified | |
99 ) | |
100 } | 292 } |
101 } | 293 ) |
294 return (my_cor_vs_cov) | |
295 } | |
296 my_cor_vs_cov <- do_s_plot( | |
297 x_env = x_env | |
298 , predictor_projection_x = TRUE | |
299 , cplot_x = FALSE | |
102 ) | 300 ) |
103 typeVc <- c("correlation", # 1 | 301 typeVc <- c("correlation", # 1 |
104 "outlier", # 2 | 302 "outlier", # 2 |
105 "overview", # 3 | 303 "overview", # 3 |
106 "permutation", # 4 | 304 "permutation", # 4 |
116 if ( length(my_oplsda@orthoVipVn) > 0 ) { | 314 if ( length(my_oplsda@orthoVipVn) > 0 ) { |
117 my_typevc <- typeVc[c(9,3,8)] | 315 my_typevc <- typeVc[c(9,3,8)] |
118 } else { | 316 } else { |
119 my_typevc <- c("(dummy)","overview","(dummy)") | 317 my_typevc <- c("(dummy)","overview","(dummy)") |
120 } | 318 } |
319 my_ortho_cor_vs_cov_exists <- FALSE | |
121 for (my_type in my_typevc) { | 320 for (my_type in my_typevc) { |
122 if (my_type %in% typeVc) { | 321 if (my_type %in% typeVc) { |
123 # print(sprintf("plotting type %s", my_type)) | |
124 tryCatch({ | 322 tryCatch({ |
125 plot( | 323 if ( my_type != "x-loading" ) { |
126 x = my_oplsda | 324 plot( |
127 , typeVc = my_type | 325 x = my_oplsda |
128 , parCexN = 0.4 | 326 , typeVc = my_type |
129 , parDevNewL = FALSE | 327 , parCexN = 0.4 |
130 , parLayL = TRUE | 328 , parDevNewL = FALSE |
131 , parEllipsesL = TRUE | 329 , parLayL = TRUE |
132 ) | 330 , parEllipsesL = TRUE |
331 ) | |
332 if (my_type == "overview") { | |
333 sub_label <- sprintf("%s versus %s", fctr_lvl_1, fctr_lvl_2) | |
334 title(sub = sub_label) | |
335 } | |
336 } else { | |
337 my_ortho_cor_vs_cov <- do_s_plot( | |
338 x_env = x_env | |
339 , predictor_projection_x = FALSE | |
340 , cplot_x = FALSE | |
341 ) | |
342 my_ortho_cor_vs_cov_exists <- TRUE | |
343 } | |
133 }, error = function(e) { | 344 }, error = function(e) { |
134 x_progress(sprintf("factor level %s or %s may have only one sample", fctr_lvl_1, fctr_lvl_2)) | 345 x_progress( |
346 sprintf( | |
347 "factor level %s or %s may have only one sample - %s" | |
348 , fctr_lvl_1 | |
349 , fctr_lvl_2 | |
350 , e$message | |
351 ) | |
352 ) | |
135 }) | 353 }) |
136 } else { | 354 } else { |
137 # print("plotting dummy graph") | |
138 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") | 355 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") |
139 text(x=1, y=1, labels="no orthogonal projection is possible") | 356 text(x=1, y=1, labels="no orthogonal projection is possible") |
357 } | |
358 } | |
359 cplot_p <- x_env$cplot_p | |
360 cplot_o <- x_env$cplot_o | |
361 if (cplot_p || cplot_o) { | |
362 if (cplot_p) { | |
363 do_s_plot( | |
364 x_env = x_env | |
365 , predictor_projection_x = TRUE | |
366 , cplot_x = TRUE | |
367 , cor_vs_cov_x = my_cor_vs_cov | |
368 ) | |
369 did_plots <- 1 | |
370 } else { | |
371 did_plots <- 0 | |
372 } | |
373 if (cplot_o) { | |
374 if (my_ortho_cor_vs_cov_exists) { | |
375 do_s_plot( | |
376 x_env = x_env | |
377 , predictor_projection_x = FALSE | |
378 , cplot_x = TRUE | |
379 , cor_vs_cov_x = my_ortho_cor_vs_cov | |
380 ) | |
381 } else { | |
382 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n") | |
383 text(x=1, y=1, labels="no orthogonal projection is possible") | |
384 } | |
385 did_plots <- 1 + did_plots | |
386 } | |
387 if (did_plots == 1) { | |
388 plot(x=1, y=1, xaxt="n", yaxt="n", xlab="", ylab="", type="n", fg = "white") | |
140 } | 389 } |
141 } | 390 } |
142 return (my_cor_vs_cov) | 391 return (my_cor_vs_cov) |
143 } else { | 392 } else { |
144 # x_progress(sprintf("x_is_match = %s, ncol(x_dataMatrix) = %d, length(unique(x_predictor)) = %d",x_is_match, ncol(x_dataMatrix), length(unique(x_predictor)))) | |
145 return (NULL) | 393 return (NULL) |
146 } | 394 } |
147 } | 395 } |
148 | 396 |
149 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510 | 397 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510 |
150 corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = function(t){}, salience_tsv_action = function(t){}) { | 398 corcov_calc <- function( |
399 calc_env | |
400 , failure_action = stop | |
401 , progress_action = function(x) { } | |
402 , corcov_tsv_action = function(t) { } | |
403 , salience_tsv_action = function(t) { } | |
404 , extra_plots = c() | |
405 ) { | |
151 if ( ! is.environment(calc_env) ) { | 406 if ( ! is.environment(calc_env) ) { |
152 failure_action("corcov_calc: fatal error - 'calc_env' is not an environment") | 407 failure_action("corcov_calc: fatal error - 'calc_env' is not an environment") |
153 return ( FALSE ) | 408 return ( FALSE ) |
154 } | 409 } |
155 if ( ! is.function(corcov_tsv_action) ) { | 410 if ( ! is.function(corcov_tsv_action) ) { |
172 # extract the levels from the environment | 427 # extract the levels from the environment |
173 originalLevCSV <- levCSV <- calc_env$levCSV | 428 originalLevCSV <- levCSV <- calc_env$levCSV |
174 # matchingC is one of { "none", "wildcard", "regex" } | 429 # matchingC is one of { "none", "wildcard", "regex" } |
175 matchingC <- calc_env$matchingC | 430 matchingC <- calc_env$matchingC |
176 labelFeatures <- calc_env$labelFeatures | 431 labelFeatures <- calc_env$labelFeatures |
177 labelOrthoFeatures <- calc_env$labelOrthoFeatures | |
178 | 432 |
179 # arg/env checking | 433 # arg/env checking |
180 if (!(facC %in% names(smpl_metadata))) { | 434 if (!(facC %in% names(smpl_metadata))) { |
181 failure_action(sprintf("bad parameter! Factor name '%s' not found in sampleMetadata", facC)) | 435 failure_action( |
436 sprintf("bad parameter! Factor name '%s' not found in sampleMetadata" | |
437 , facC)) | |
182 return ( FALSE ) | 438 return ( FALSE ) |
183 } | 439 } |
184 | 440 |
185 mz <- vrbl_metadata$mz | 441 mz <- vrbl_metadata$mz |
186 names(mz) <- vrbl_metadata$variableMetadata | 442 names(mz) <- vrbl_metadata$variableMetadata |
187 mz_lookup <- function(feature) unname(mz[feature]) | 443 mz_lookup <- function(feature) unname(mz[feature]) |
188 | 444 |
189 rt <- vrbl_metadata$rt | 445 rt <- vrbl_metadata$rt |
190 names(rt) <- vrbl_metadata$variableMetadata | 446 names(rt) <- vrbl_metadata$variableMetadata |
191 rt_lookup <- function(feature) unname(rt[feature]) | 447 rt_lookup <- function(feature) unname(rt[feature]) |
192 | 448 |
193 # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv) | 449 # calculate salience_df as data.frame(feature, max_level, max_median, max_rcv, mean_median, salience, salient_rcv) |
194 salience_df <- calc_env$salience_df <- w4msalience( | 450 salience_df <- calc_env$salience_df <- w4msalience( |
195 data_matrix = data_matrix | 451 data_matrix = data_matrix |
196 , sample_class = smpl_metadata[,facC] | 452 , sample_class = smpl_metadata[,facC] |
197 , failure_action = failure_action | 453 , failure_action = failure_action |
234 ) | 490 ) |
235 ) | 491 ) |
236 } | 492 } |
237 | 493 |
238 # transpose matrix because ropls matrix is the transpose of XCMS matrix | 494 # transpose matrix because ropls matrix is the transpose of XCMS matrix |
495 tdm <- t(data_matrix) | |
239 # Wiklund_2008 centers and pareto-scales data before OPLS-DA S-plot | 496 # Wiklund_2008 centers and pareto-scales data before OPLS-DA S-plot |
240 # center | 497 # However, data should be neither centered nor pareto scaled here because ropls::opls does that; this fixes issue #2. |
241 cdm <- center_colmeans(t(data_matrix)) | |
242 # pareto-scale | |
243 my_scale <- sqrt(apply(cdm, 2, sd, na.rm=TRUE)) | |
244 scdm <- sweep(cdm, 2, my_scale, "/") | |
245 | 498 |
246 # pattern to match column names like k10_kruskal_k4.k3_sig | 499 # pattern to match column names like k10_kruskal_k4.k3_sig |
247 col_pattern <- sprintf('^%s_%s_(.*)[.](.*)_sig$', facC, tesC) | 500 col_pattern <- sprintf('^%s_%s_(.*)[.](.*)_sig$', facC, tesC) |
248 # column name like k10_kruskal_sig | 501 # column name like k10_kruskal_sig |
249 intersample_sig_col <- sprintf('%s_%s_sig', facC, tesC) | 502 intersample_sig_col <- sprintf('%s_%s_sig', facC, tesC) |
263 did_plot <- FALSE | 516 did_plot <- FALSE |
264 if (tesC != "none") { | 517 if (tesC != "none") { |
265 # for each column name, extract the parts of the name matched by 'col_pattern', if any | 518 # for each column name, extract the parts of the name matched by 'col_pattern', if any |
266 the_colnames <- colnames(vrbl_metadata) | 519 the_colnames <- colnames(vrbl_metadata) |
267 if ( ! Reduce( f = "||", x = grepl(tesC, the_colnames) ) ) { | 520 if ( ! Reduce( f = "||", x = grepl(tesC, the_colnames) ) ) { |
268 failure_action(sprintf("bad parameter! variableMetadata must contain results of W4M Univariate test '%s'.", tesC)) | 521 failure_action( |
522 sprintf( | |
523 "bad parameter! variableMetadata must contain results of W4M Univariate test '%s'." | |
524 , tesC)) | |
269 return ( FALSE ) | 525 return ( FALSE ) |
270 } | 526 } |
271 col_matches <- lapply( | 527 col_matches <- lapply( |
272 X = the_colnames, | 528 X = the_colnames, |
273 FUN = function(x) { | 529 FUN = function(x) { |
290 level_union <- c(level_union, col_match[2], col_match[3]) | 546 level_union <- c(level_union, col_match[2], col_match[3]) |
291 } | 547 } |
292 } | 548 } |
293 } | 549 } |
294 level_union <- unique(sort(level_union)) | 550 level_union <- unique(sort(level_union)) |
295 overall_significant <- 1 == ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE ) | 551 overall_significant <- 1 == ( |
552 if (intersample_sig_col %in% colnames(vrbl_metadata)) { | |
553 vrbl_metadata[,intersample_sig_col] | |
554 } else { | |
555 1 | |
556 } | |
557 ) | |
296 if ( length(level_union) > 2 ) { | 558 if ( length(level_union) > 2 ) { |
297 chosen_samples <- smpl_metadata_facC %in% level_union | 559 chosen_samples <- smpl_metadata_facC %in% level_union |
298 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) | 560 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) |
299 col_selector <- vrbl_metadata_names[ overall_significant ] | 561 col_selector <- vrbl_metadata_names[ overall_significant ] |
300 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] | 562 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] |
301 plot_action <- function(fctr_lvl_1, fctr_lvl_2) { | 563 plot_action <- function(fctr_lvl_1, fctr_lvl_2) { |
302 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) | 564 progress_action( |
303 predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 ) | 565 sprintf("calculating/plotting contrast of %s vs. %s" |
566 , fctr_lvl_1, fctr_lvl_2) | |
567 ) | |
568 predictor <- sapply( | |
569 X = chosen_facC | |
570 , FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 | |
571 ) | |
304 my_cor_cov <- do_detail_plot( | 572 my_cor_cov <- do_detail_plot( |
305 x_dataMatrix = my_matrix | 573 x_dataMatrix = my_matrix |
306 , x_predictor = predictor | 574 , x_predictor = predictor |
307 , x_is_match = is_match | 575 , x_is_match = TRUE |
308 , x_algorithm = algoC | 576 , x_algorithm = algoC |
309 , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" | 577 , x_prefix = if (pairSigFeatOnly) { |
578 "Significantly contrasting features" | |
579 } else { | |
580 "Significant features" | |
581 } | |
310 , x_show_labels = labelFeatures | 582 , x_show_labels = labelFeatures |
311 , x_show_loado_labels = labelOrthoFeatures | |
312 , x_progress = progress_action | 583 , x_progress = progress_action |
313 , x_crossval_i = min(7, length(chosen_samples)) | 584 , x_crossval_i = min(7, length(chosen_samples)) |
314 , x_env = calc_env | 585 , x_env = calc_env |
315 ) | 586 ) |
316 if ( is.null(my_cor_cov) ) { | 587 if ( is.null(my_cor_cov) ) { |
317 progress_action("NOTHING TO PLOT.") | 588 progress_action("NOTHING TO PLOT") |
318 } else { | 589 } else { |
319 my_tsv <- my_cor_cov$tsv1 | 590 my_tsv <- my_cor_cov$tsv1 |
320 my_tsv$mz <- mz_lookup(my_tsv$featureID) | 591 my_tsv$mz <- mz_lookup(my_tsv$featureID) |
321 my_tsv$rt <- rt_lookup(my_tsv$featureID) | 592 my_tsv$rt <- rt_lookup(my_tsv$featureID) |
322 my_tsv["level1Level2Sig"] <- vrbl_metadata[ match(my_tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] | 593 my_tsv["level1Level2Sig"] <- vrbl_metadata[ |
594 match(my_tsv$featureID, vrbl_metadata_names) | |
595 , vrbl_metadata_col | |
596 ] | |
323 tsv <<- my_tsv | 597 tsv <<- my_tsv |
324 corcov_tsv_action(tsv) | 598 corcov_tsv_action(tsv) |
325 did_plot <<- TRUE | 599 did_plot <<- TRUE |
326 } | 600 } |
327 } | 601 } |
346 vrbl_metadata_col <- col_match[1] # ^^^^^^^^^^^^^^^^^^^^^ # Column name | 620 vrbl_metadata_col <- col_match[1] # ^^^^^^^^^^^^^^^^^^^^^ # Column name |
347 fctr_lvl_1 <- col_match[2] # ^^ # Factor-level 1 | 621 fctr_lvl_1 <- col_match[2] # ^^ # Factor-level 1 |
348 fctr_lvl_2 <- col_match[3] # ^^ # Factor-level 2 | 622 fctr_lvl_2 <- col_match[3] # ^^ # Factor-level 2 |
349 # only process this column if both factors are members of lvlCSV | 623 # only process this column if both factors are members of lvlCSV |
350 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) | 624 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) |
351 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) | 625 if (is_match) { |
352 # TODO delete next line displaying currently-processed column | 626 progress_action( |
353 # cat(sprintf("%s %s %s %s\n", vrbl_metadata_col, fctr_lvl_1, fctr_lvl_2, is_match)) | 627 sprintf("calculating/plotting contrast of %s vs. %s." |
354 # choose only samples with one of the two factors for this column | 628 , fctr_lvl_1, fctr_lvl_2 |
355 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) | 629 ) |
356 predictor <- smpl_metadata_facC[chosen_samples] | 630 ) |
357 # extract only the significantly-varying features and the chosen samples | 631 # choose only samples with one of the two factors for this column |
358 fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * ( if (intersample_sig_col %in% colnames(vrbl_metadata)) vrbl_metadata[,intersample_sig_col] else TRUE ) | 632 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) |
359 col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ] | 633 predictor <- smpl_metadata_facC[chosen_samples] |
360 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] | 634 # extract only the significantly-varying features and the chosen samples |
361 my_cor_cov <- do_detail_plot( | 635 fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * |
362 x_dataMatrix = my_matrix | 636 ( if (intersample_sig_col %in% colnames(vrbl_metadata)) { |
363 , x_predictor = predictor | 637 vrbl_metadata[,intersample_sig_col] |
364 , x_is_match = is_match | 638 } else { |
365 , x_algorithm = algoC | 639 1 |
366 , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" | 640 } |
367 , x_show_labels = labelFeatures | 641 ) |
368 , x_show_loado_labels = labelOrthoFeatures | 642 col_selector <- vrbl_metadata_names[ |
369 , x_progress = progress_action | 643 if ( pairSigFeatOnly ) fully_significant else overall_significant |
370 , x_crossval_i = min(7, length(chosen_samples)) | 644 ] |
371 , x_env = calc_env | 645 my_matrix <- tdm[ chosen_samples, col_selector, drop = FALSE ] |
372 ) | 646 my_cor_cov <- do_detail_plot( |
373 if ( is.null(my_cor_cov) ) { | 647 x_dataMatrix = my_matrix |
374 progress_action("NOTHING TO PLOT.") | 648 , x_predictor = predictor |
649 , x_is_match = is_match | |
650 , x_algorithm = algoC | |
651 , x_prefix = if (pairSigFeatOnly) { | |
652 "Significantly contrasting features" | |
653 } else { | |
654 "Significant features" | |
655 } | |
656 , x_show_labels = labelFeatures | |
657 , x_progress = progress_action | |
658 , x_crossval_i = min(7, length(chosen_samples)) | |
659 , x_env = calc_env | |
660 ) | |
661 if ( is.null(my_cor_cov) ) { | |
662 progress_action("NOTHING TO PLOT.") | |
663 } else { | |
664 tsv <- my_cor_cov$tsv1 | |
665 tsv$mz <- mz_lookup(tsv$featureID) | |
666 tsv$rt <- rt_lookup(tsv$featureID) | |
667 tsv["level1Level2Sig"] <- vrbl_metadata[ | |
668 match(tsv$featureID, vrbl_metadata_names) | |
669 , vrbl_metadata_col | |
670 ] | |
671 corcov_tsv_action(tsv) | |
672 did_plot <- TRUE | |
673 } | |
375 } else { | 674 } else { |
376 tsv <- my_cor_cov$tsv1 | 675 progress_action( |
377 tsv$mz <- mz_lookup(tsv$featureID) | 676 sprintf("skipping contrast of %s vs. %s." |
378 tsv$rt <- rt_lookup(tsv$featureID) | 677 , fctr_lvl_1, fctr_lvl_2 |
379 tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] | 678 ) |
380 corcov_tsv_action(tsv) | 679 ) |
381 did_plot <- TRUE | |
382 } | 680 } |
383 } | 681 } |
384 } | 682 } |
385 } | 683 } |
386 } else { # tesC == "none" | 684 } else { # tesC == "none" |
685 # find all the levels for factor facC in sampleMetadata | |
387 level_union <- unique(sort(smpl_metadata_facC)) | 686 level_union <- unique(sort(smpl_metadata_facC)) |
687 # identify the selected levels for factor facC from sampleMetadata | |
688 level_include <- sapply(X = level_union, FUN = isLevelSelected) | |
689 # discard the non-selected levels for factor facC | |
690 level_union <- level_union[level_include] | |
388 if ( length(level_union) > 1 ) { | 691 if ( length(level_union) > 1 ) { |
389 if ( length(level_union) > 2 ) { | 692 if ( length(level_union) > 2 ) { |
390 ## pass 1 - contrast each selected level with all other levels combined into one "super-level" ## | 693 ## pass 1 - contrast each selected level with all other levels combined into one "super-level" ## |
391 completed <- c() | 694 completed <- c() |
392 lapply( | 695 lapply( |
393 X = level_union | 696 X = level_union |
394 , FUN = function(x) { | 697 , FUN = function(x) { |
395 fctr_lvl_1 <- x[1] | 698 fctr_lvl_1 <- x[1] |
396 fctr_lvl_2 <- { | 699 fctr_lvl_2 <- { |
397 if ( fctr_lvl_1 %in% completed ) | 700 if ( fctr_lvl_1 %in% completed ) |
398 return("DUMMY") | 701 return("DUMMY") |
399 # strF(completed) | 702 # strF(completed) |
400 completed <<- c(completed, fctr_lvl_1) | 703 completed <<- c(completed, fctr_lvl_1) |
401 setdiff(level_union, fctr_lvl_1) | 704 setdiff(level_union, fctr_lvl_1) |
402 } | 705 } |
403 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) | 706 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) |
404 fctr_lvl_2 <- "other" | 707 fctr_lvl_2 <- "other" |
405 # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) ) | |
406 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) | |
407 if (length(unique(chosen_samples)) < 1) { | 708 if (length(unique(chosen_samples)) < 1) { |
408 progress_action("NOTHING TO PLOT...") | 709 progress_action( |
710 sprintf("Skipping contrast of %s vs. %s; there are no chosen samples." | |
711 , fctr_lvl_1, fctr_lvl_2) | |
712 ) | |
409 } else { | 713 } else { |
410 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) | 714 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) |
411 predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 ) | 715 predictor <- sapply( |
412 my_matrix <- scdm[ chosen_samples, , drop = FALSE ] | 716 X = chosen_facC |
717 , FUN = function(fac) { | |
718 if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 | |
719 } | |
720 ) | |
721 my_matrix <- tdm[ chosen_samples, , drop = FALSE ] | |
413 # only process this column if both factors are members of lvlCSV | 722 # only process this column if both factors are members of lvlCSV |
414 is_match <- isLevelSelected(fctr_lvl_1) | 723 is_match <- isLevelSelected(fctr_lvl_1) |
724 if (is_match) { | |
725 progress_action( | |
726 sprintf("Calculating/plotting contrast of %s vs. %s" | |
727 , fctr_lvl_1, fctr_lvl_2) | |
728 ) | |
729 my_cor_cov <- do_detail_plot( | |
730 x_dataMatrix = my_matrix | |
731 , x_predictor = predictor | |
732 , x_is_match = is_match | |
733 , x_algorithm = algoC | |
734 , x_prefix = "Features" | |
735 , x_show_labels = labelFeatures | |
736 , x_progress = progress_action | |
737 , x_crossval_i = min(7, length(chosen_samples)) | |
738 , x_env = calc_env | |
739 ) | |
740 if ( is.null(my_cor_cov) ) { | |
741 progress_action("NOTHING TO PLOT...") | |
742 } else { | |
743 tsv <- my_cor_cov$tsv1 | |
744 tsv$mz <- mz_lookup(tsv$featureID) | |
745 tsv$rt <- rt_lookup(tsv$featureID) | |
746 corcov_tsv_action(tsv) | |
747 did_plot <<- TRUE | |
748 } | |
749 } else { | |
750 } | |
751 } | |
752 "dummy" # need to return a value; otherwise combn fails with an error | |
753 } | |
754 ) | |
755 } | |
756 ## pass 2 - contrast each selected level with each of the other levels individually ## | |
757 completed <- c() | |
758 utils::combn( | |
759 x = level_union | |
760 , m = 2 | |
761 , FUN = function(x) { | |
762 fctr_lvl_1 <- x[1] | |
763 fctr_lvl_2 <- x[2] | |
764 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) | |
765 if (length(unique(chosen_samples)) < 1) { | |
766 progress_action( | |
767 sprintf("Skipping contrast of %s vs. %s. - There are no chosen samples." | |
768 , fctr_lvl_1, fctr_lvl_2 | |
769 ) | |
770 ) | |
771 } else { | |
772 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) | |
773 predictor <- chosen_facC | |
774 my_matrix <- tdm[ chosen_samples, , drop = FALSE ] | |
775 # only process this column if both factors are members of lvlCSV | |
776 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) | |
777 if (is_match) { | |
778 progress_action( | |
779 sprintf("Calculating/plotting contrast of %s vs. %s." | |
780 , fctr_lvl_1, fctr_lvl_2) | |
781 ) | |
415 my_cor_cov <- do_detail_plot( | 782 my_cor_cov <- do_detail_plot( |
416 x_dataMatrix = my_matrix | 783 x_dataMatrix = my_matrix |
417 , x_predictor = predictor | 784 , x_predictor = predictor |
418 , x_is_match = is_match | 785 , x_is_match = is_match |
419 , x_algorithm = algoC | 786 , x_algorithm = algoC |
420 , x_prefix = "Features" | 787 , x_prefix = "Features" |
421 , x_show_labels = labelFeatures | 788 , x_show_labels = labelFeatures |
422 , x_show_loado_labels = labelOrthoFeatures | |
423 , x_progress = progress_action | 789 , x_progress = progress_action |
424 , x_crossval_i = min(7, length(chosen_samples)) | 790 , x_crossval_i = min(7, length(chosen_samples)) |
425 , x_env = calc_env | 791 , x_env = calc_env |
426 ) | 792 ) |
427 if ( is.null(my_cor_cov) ) { | 793 if ( is.null(my_cor_cov) ) { |
428 progress_action("NOTHING TO PLOT") | 794 progress_action("NOTHING TO PLOT.....") |
429 } else { | 795 } else { |
430 tsv <- my_cor_cov$tsv1 | 796 tsv <- my_cor_cov$tsv1 |
431 tsv$mz <- mz_lookup(tsv$featureID) | 797 tsv$mz <- mz_lookup(tsv$featureID) |
432 tsv$rt <- rt_lookup(tsv$featureID) | 798 tsv$rt <- rt_lookup(tsv$featureID) |
433 corcov_tsv_action(tsv) | 799 corcov_tsv_action(tsv) |
434 did_plot <<- TRUE | 800 did_plot <<- TRUE |
435 } | 801 } |
436 } | 802 } else { |
437 #print("baz") | 803 progress_action( |
438 "dummy" # need to return a value; otherwise combn fails with an error | 804 sprintf("Skipping contrast of %s vs. %s." |
805 , fctr_lvl_1, fctr_lvl_2 | |
806 ) | |
807 ) | |
808 } | |
439 } | 809 } |
440 ) | |
441 } | |
442 ## pass 2 - contrast each selected level with each of the other levels individually ## | |
443 completed <- c() | |
444 utils::combn( | |
445 x = level_union | |
446 , m = 2 | |
447 , FUN = function(x) { | |
448 fctr_lvl_1 <- x[1] | |
449 fctr_lvl_2 <- x[2] | |
450 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) | |
451 # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) ) | |
452 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) | |
453 if (length(unique(chosen_samples)) < 1) { | |
454 progress_action("NOTHING TO PLOT...") | |
455 } else { | |
456 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) | |
457 predictor <- chosen_facC | |
458 my_matrix <- scdm[ chosen_samples, , drop = FALSE ] | |
459 # only process this column if both factors are members of lvlCSV | |
460 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) | |
461 my_cor_cov <- do_detail_plot( | |
462 x_dataMatrix = my_matrix | |
463 , x_predictor = predictor | |
464 , x_is_match = is_match | |
465 , x_algorithm = algoC | |
466 , x_prefix = "Features" | |
467 , x_show_labels = labelFeatures | |
468 , x_show_loado_labels = labelOrthoFeatures | |
469 , x_progress = progress_action | |
470 , x_crossval_i = min(7, length(chosen_samples)) | |
471 , x_env = calc_env | |
472 ) | |
473 if ( is.null(my_cor_cov) ) { | |
474 progress_action("NOTHING TO PLOT") | |
475 } else { | |
476 tsv <- my_cor_cov$tsv1 | |
477 tsv$mz <- mz_lookup(tsv$featureID) | |
478 tsv$rt <- rt_lookup(tsv$featureID) | |
479 corcov_tsv_action(tsv) | |
480 did_plot <<- TRUE | |
481 } | |
482 } | |
483 #print("baz") | |
484 "dummy" # need to return a value; otherwise combn fails with an error | 810 "dummy" # need to return a value; otherwise combn fails with an error |
485 } | 811 } |
486 ) | 812 ) |
487 } else { | 813 } else { |
488 progress_action("NOTHING TO PLOT....") | 814 progress_action("NOTHING TO PLOT......") |
489 } | 815 } |
490 } | 816 } |
491 if (!did_plot) { | 817 if (!did_plot) { |
492 failure_action(sprintf("bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'", facC, originalLevCSV)) | 818 failure_action( |
819 sprintf( | |
820 "bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'" | |
821 , facC, originalLevCSV)) | |
493 return ( FALSE ) | 822 return ( FALSE ) |
494 } | 823 } |
495 return ( TRUE ) | 824 return ( TRUE ) |
496 } | 825 } |
497 | 826 |
498 # Calculate data for correlation-versus-covariance plot | 827 # Calculate data for correlation-versus-covariance plot |
499 # Adapted from: | 828 # Adapted from: |
500 # Wiklund_2008 doi:10.1021/ac0713510 | 829 # Wiklund_2008 doi:10.1021/ac0713510 |
501 # Galindo_Prieto_2014 doi:10.1002/cem.2627 | 830 # Galindo_Prieto_2014 doi:10.1002/cem.2627 |
502 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R | 831 # https://github.com/HegemanLab/extra_tools/blob/master/generic_PCA.R |
503 cor_vs_cov <- function(matrix_x, ropls_x) { | 832 cor_vs_cov <- function( |
833 matrix_x | |
834 , ropls_x | |
835 , predictor_projection_x = TRUE | |
836 , x_progress = print | |
837 ) { | |
838 tryCatch({ | |
839 return( | |
840 cor_vs_cov_try( matrix_x, ropls_x, predictor_projection_x, x_progress) | |
841 ) | |
842 }, error = function(e) { | |
843 x_progress( | |
844 sprintf( | |
845 "cor_vs_cov fatal error - %s" | |
846 , as.character(e) # e$message | |
847 ) | |
848 ) | |
849 return ( NULL ) | |
850 }) | |
851 } | |
852 | |
853 cor_vs_cov_try <- function( | |
854 matrix_x | |
855 , ropls_x | |
856 , predictor_projection_x = TRUE | |
857 , x_progress = print | |
858 ) { | |
504 x_class <- class(ropls_x) | 859 x_class <- class(ropls_x) |
505 if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) | 860 if ( !( as.character(x_class) == "opls" ) ) { # || !( attr(class(x_class),"package") == "ropls" ) ) |
506 stop( "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class ", as.character(x_class) ) | 861 stop( |
862 paste( | |
863 "cor_vs_cov: Expected ropls_x to be of class ropls::opls but instead it was of class " | |
864 , as.character(x_class) | |
865 ) | |
866 ) | |
507 } | 867 } |
508 result <- list() | 868 result <- list() |
869 result$projection <- projection <- if (predictor_projection_x) 1 else 2 | |
509 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS | 870 # suppLs$algoC - Character: algorithm used - "svd" for singular value decomposition; "nipals" for NIPALS |
510 if ( ropls_x@suppLs$algoC == "nipals") { | 871 if ( ropls_x@suppLs$algoC == "nipals") { |
511 # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510 | 872 # Equations (1) and (2) from *Supplement to* Wiklund 2008, doi:10.1021/ac0713510 |
512 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) | 873 mag <- function(one_dimensional) sqrt(sum(one_dimensional * one_dimensional)) |
513 mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x])) | 874 mag_xi <- sapply(X = 1:ncol(matrix_x), FUN = function(x) mag(matrix_x[,x])) |
514 score_matrix <- ropls_x@scoreMN | 875 if (predictor_projection_x) |
876 score_matrix <- ropls_x@scoreMN | |
877 else | |
878 score_matrix <- ropls_x@orthoScoreMN | |
515 score_matrix_transposed <- t(score_matrix) | 879 score_matrix_transposed <- t(score_matrix) |
516 score_matrix_magnitude <- mag(score_matrix) | 880 score_matrix_magnitude <- mag(score_matrix) |
517 result$covariance <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) | 881 result$covariance <- |
518 result$correlation <- score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi ) | 882 score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * score_matrix_magnitude ) |
883 result$correlation <- | |
884 score_matrix_transposed %*% matrix_x / ( score_matrix_magnitude * mag_xi ) | |
519 } else { | 885 } else { |
520 # WARNING - untested code - I don't have test data to exercise this branch | 886 # WARNING - untested code - I don't have test data to exercise this branch |
521 # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 | 887 # Equations (1) and (2) from Wiklund 2008, doi:10.1021/ac0713510 |
522 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F | 888 # scoreMN - Numerical matrix of x scores (T; dimensions: nrow(x) x predI) X = TP' + E; Y = TC' + F |
523 score_matrix <- ropls_x@scoreMN | 889 if (predictor_projection_x) |
890 score_matrix <- ropls_x@scoreMN | |
891 else | |
892 score_matrix <- ropls_x@orthoScoreMN | |
524 score_matrix_transposed <- t(score_matrix) | 893 score_matrix_transposed <- t(score_matrix) |
525 cov_divisor <- nrow(matrix_x) - 1 | 894 cov_divisor <- nrow(matrix_x) - 1 |
526 result$covariance <- sapply( | 895 result$covariance <- sapply( |
527 X = 1:ncol(matrix_x) | 896 X = 1:ncol(matrix_x) |
528 , FUN = function(x) score_matrix_transposed %*% matrix_x[,x] / cov_divisor | 897 , FUN = function(x) score_matrix_transposed %*% matrix_x[,x] / cov_divisor |
538 , FUN = function(x) { | 907 , FUN = function(x) { |
539 ( score_matrix_transposed / score_sd ) %*% ( matrix_x[,x] / (xSdVn[x] * cov_divisor) ) | 908 ( score_matrix_transposed / score_sd ) %*% ( matrix_x[,x] / (xSdVn[x] * cov_divisor) ) |
540 } | 909 } |
541 ) | 910 ) |
542 } | 911 } |
543 result$correlation <- result$correlation[1,,drop = TRUE] | 912 result$correlation <- result$correlation[ 1, , drop = TRUE ] |
544 result$covariance <- result$covariance[1,,drop = TRUE] | 913 result$covariance <- result$covariance [ 1, , drop = TRUE ] |
545 | 914 |
546 # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014 | 915 # Variant 4 of Variable Influence on Projection for OPLS from Galindo_Prieto_2014 |
547 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) | 916 # Length = number of features; labels = feature identifiers. (The same is true for $correlation and $covariance.) |
548 result$vip4p <- as.numeric(ropls_x@vipVn) | 917 result$vip4p <- as.numeric(ropls_x@vipVn) |
549 result$vip4o <- as.numeric(ropls_x@orthoVipVn) | 918 result$vip4o <- as.numeric(ropls_x@orthoVipVn) |
554 fctr_lvl_1 <- level_names[1] | 923 fctr_lvl_1 <- level_names[1] |
555 fctr_lvl_2 <- level_names[2] | 924 fctr_lvl_2 <- level_names[2] |
556 feature_count <- length(ropls_x@vipVn) | 925 feature_count <- length(ropls_x@vipVn) |
557 result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count) | 926 result$level1 <- rep.int(x = fctr_lvl_1, times = feature_count) |
558 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) | 927 result$level2 <- rep.int(x = fctr_lvl_2, times = feature_count) |
559 # print(sprintf("sd(covariance) = %f; sd(correlation) = %f", sd(result$covariance), sd(result$correlation))) | |
560 superresult <- list() | 928 superresult <- list() |
561 if (length(result$vip4o) == 0) result$vip4o <- NA | 929 if (length(result$vip4o) == 0) result$vip4o <- NA |
562 greaterLevel <- sapply( X = result$correlation, FUN = function(my_corr) if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 ) | 930 greaterLevel <- sapply( |
563 superresult$tsv1 <- data.frame( | 931 X = result$correlation |
564 featureID = names(ropls_x@vipVn) | 932 , FUN = function(my_corr) |
933 tryCatch({ | |
934 if ( is.nan( my_corr ) ) { | |
935 print("my_corr is NaN") | |
936 NA | |
937 } else { | |
938 if ( my_corr < 0 ) fctr_lvl_1 else fctr_lvl_2 | |
939 } | |
940 }, error = function(e) { | |
941 x_progress( | |
942 sprintf( | |
943 "cor_vs_cov -> sapply: error - substituting NA - %s" | |
944 , as.character(e) | |
945 ) | |
946 ) | |
947 NA | |
948 }) | |
949 ) | |
950 | |
951 # begin fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 | |
952 featureID <- names(ropls_x@vipVn) | |
953 greaterLevel <- greaterLevel[featureID] | |
954 result$correlation <- result$correlation[featureID] | |
955 result$covariance <- result$covariance[featureID] | |
956 # end fixes for https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/issues/1 | |
957 | |
958 tsv1 <- data.frame( | |
959 featureID = featureID | |
565 , factorLevel1 = result$level1 | 960 , factorLevel1 = result$level1 |
566 , factorLevel2 = result$level2 | 961 , factorLevel2 = result$level2 |
567 , greaterLevel = greaterLevel | 962 , greaterLevel = greaterLevel |
963 , projection = result$projection | |
568 , correlation = result$correlation | 964 , correlation = result$correlation |
569 , covariance = result$covariance | 965 , covariance = result$covariance |
570 , vip4p = result$vip4p | 966 , vip4p = result$vip4p |
571 , vip4o = result$vip4o | 967 , vip4o = result$vip4o |
572 , loadp = result$loadp | 968 , loadp = result$loadp |
573 , loado = result$loado | 969 , loado = result$loado |
574 , row.names = NULL | 970 , row.names = NULL |
575 ) | 971 ) |
576 rownames(superresult$tsv1) <- superresult$tsv1$featureID | 972 tsv1 <- tsv1[!is.na(tsv1$correlation),] |
973 tsv1 <- tsv1[!is.na(tsv1$covariance),] | |
974 superresult$tsv1 <- tsv1 | |
975 rownames(superresult$tsv1) <- tsv1$featureID | |
976 superresult$projection <- result$projection | |
577 superresult$covariance <- result$covariance | 977 superresult$covariance <- result$covariance |
578 superresult$correlation <- result$correlation | 978 superresult$correlation <- result$correlation |
579 superresult$vip4p <- result$vip4p | 979 superresult$vip4p <- result$vip4p |
580 superresult$vip4o <- result$vip4o | 980 superresult$vip4o <- result$vip4o |
581 superresult$loadp <- result$loadp | 981 superresult$loadp <- result$loadp |
582 superresult$loado <- result$loado | 982 superresult$loado <- result$loado |
583 superresult$details <- result | 983 superresult$details <- result |
584 # #print(superresult$tsv1) | |
585 result$superresult <- superresult | 984 result$superresult <- superresult |
586 # Include thise in case future consumers of this routine want to use it in currently unanticipated ways | 985 # Include thise in case future consumers of this routine want to use it in currently unanticipated ways |
587 result$oplsda <- ropls_x | 986 result$oplsda <- ropls_x |
588 result$predictor <- ropls_x@suppLs$y # in case future consumers of this routine want to use it in currently unanticipated ways | 987 result$predictor <- ropls_x@suppLs$y # in case future consumers of this routine want to use it in currently unanticipated ways |
589 return (superresult) | 988 return (superresult) |
590 } | 989 } |
591 | 990 |
592 | 991 # vim: sw=2 ts=2 et : |