Mercurial > repos > eschen42 > w4mcorcov
comparison w4mcorcov_calc.R @ 1:e25fd8a13665 draft
planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit bd26542b811de06c1a877337a2840a9f899c2b94
author | eschen42 |
---|---|
date | Mon, 16 Oct 2017 09:18:29 -0400 |
parents | 50a07adddfbd |
children | a06344808ffc |
comparison
equal
deleted
inserted
replaced
0:50a07adddfbd | 1:e25fd8a13665 |
---|---|
40 lim_x <- 1.2 | 40 lim_x <- 1.2 |
41 main_label <- sprintf("%s for levels %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2) | 41 main_label <- sprintf("%s for levels %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2) |
42 # print("main_label") | 42 # print("main_label") |
43 # print(main_label) | 43 # print(main_label) |
44 main_cex <- min(1.0, 46.0/nchar(main_label)) | 44 main_cex <- min(1.0, 46.0/nchar(main_label)) |
45 # " It is generally accepted that a variable should be selected if vj>1, [27–29], but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." (Mehmood 2012 doi:10.1016/j.chemolab.2004.12.011) | 45 # "It is generally accepted that a variable should be selected if vj>1, [27–29], |
46 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." | |
47 # (Mehmood 2012 doi:10.1186/1748-7188-6-27) | |
46 vipco <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) | 48 vipco <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) |
47 alpha <- 0.1 + 0.4 * vipco | 49 alpha <- 0.1 + 0.4 * vipco |
48 red <- as.numeric(correlation < 0) * vipco | 50 red <- as.numeric(correlation < 0) * vipco |
49 blue <- as.numeric(correlation > 0) * vipco | 51 blue <- as.numeric(correlation > 0) * vipco |
50 minus_cor <- -correlation | 52 minus_cor <- -correlation |
51 minus_cov <- -covariance | 53 minus_cov <- -covariance |
52 # x_progress("head(names(minus_cor)): ", head(names(minus_cor))) | 54 # cex <- salience_lookup(feature = names(minus_cor)) |
53 cex <- salience_lookup(feature = names(minus_cor)) | 55 # cex <- 0.25 + (1.25 * cex / max(cex)) |
54 # x_progress("head(cex.1): ", head(cex)) | 56 cex <- 0.75 |
55 # cex <- 0.25 + (0.75 * cex / max(cex)) | |
56 cex <- 0.25 + (1.25 * cex / max(cex)) | |
57 # x_progress("head(cex.2): ", head(cex)) | |
58 plot( | 57 plot( |
59 y = minus_cor | 58 y = minus_cor |
60 , x = minus_cov | 59 , x = minus_cov |
61 , type="p" | 60 , type="p" |
62 , xlim=c(-lim_x, lim_x + off(0.1)) | 61 , xlim=c(-lim_x, lim_x + off(0.1)) |
127 return (NULL) | 126 return (NULL) |
128 } | 127 } |
129 } | 128 } |
130 | 129 |
131 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510 | 130 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510 |
132 corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = NULL) { | 131 corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = function(t){}, salience_tsv_action = function(t){}) { |
133 if ( ! is.environment(calc_env) ) { | 132 if ( ! is.environment(calc_env) ) { |
134 failure_action("corcov_calc: fatal error - 'calc_env' is not an environment") | 133 failure_action("corcov_calc: fatal error - 'calc_env' is not an environment") |
135 return ( FALSE ) | 134 return ( FALSE ) |
136 } | 135 } |
137 if ( is.null(corcov_tsv_action) || ! is.function(corcov_tsv_action) ) { | 136 if ( ! is.function(corcov_tsv_action) ) { |
138 failure_action("corcov_calc: fatal error - 'corcov_tsv_action' is not a function") | 137 failure_action("corcov_calc: fatal error - 'corcov_tsv_action' is not a function") |
138 return ( FALSE ) | |
139 } | |
140 if ( ! is.function(salience_tsv_action) ) { | |
141 failure_action("salience_calc: fatal error - 'salience_tsv_action' is not a function") | |
139 return ( FALSE ) | 142 return ( FALSE ) |
140 } | 143 } |
141 | 144 |
142 # extract parameters from the environment | 145 # extract parameters from the environment |
143 vrbl_metadata <- calc_env$vrbl_metadata | 146 vrbl_metadata <- calc_env$vrbl_metadata |
163 salience_df <- calc_env$salience_df <- w4msalience( | 166 salience_df <- calc_env$salience_df <- w4msalience( |
164 data_matrix = data_matrix | 167 data_matrix = data_matrix |
165 , sample_class = smpl_metadata[,facC] | 168 , sample_class = smpl_metadata[,facC] |
166 , failure_action = failure_action | 169 , failure_action = failure_action |
167 ) | 170 ) |
171 salience_tsv_action({ | |
172 my_df <- data.frame( | |
173 featureID = salience_df$feature | |
174 , salientLevel = salience_df$max_level | |
175 , salientRCV = salience_df$salient_rcv | |
176 , salience = salience_df$salience | |
177 ) | |
178 my_df[order(-my_df$salience),] | |
179 }) | |
168 salience <- salience_df$salience | 180 salience <- salience_df$salience |
169 names(salience) <- salience_df$feature | 181 names(salience) <- salience_df$feature |
170 salience_lookup <- calc_env$salience_lookup <- function(feature) unname(salience[feature]) | 182 salience_lookup <- calc_env$salience_lookup <- function(feature) unname(salience[feature]) |
171 salient_rcv <- salience_df$salient_rcv | 183 salient_rcv <- salience_df$salient_rcv |
172 names(salient_rcv) <- salience_df$feature | 184 names(salient_rcv) <- salience_df$feature |
229 | 241 |
230 did_plot <- FALSE | 242 did_plot <- FALSE |
231 if (tesC != "none") { | 243 if (tesC != "none") { |
232 # for each column name, extract the parts of the name matched by 'col_pattern', if any | 244 # for each column name, extract the parts of the name matched by 'col_pattern', if any |
233 the_colnames <- colnames(vrbl_metadata) | 245 the_colnames <- colnames(vrbl_metadata) |
234 if (!Reduce(f = "||", x = grepl(tesC, the_colnames))) { | 246 if ( ! Reduce( f = "||", x = grepl(tesC, the_colnames) ) ) { |
235 failure_action(sprintf("bad parameter! variableMetadata must contain results of W4M Univariate test '%s'.", tesC)) | 247 failure_action(sprintf("bad parameter! variableMetadata must contain results of W4M Univariate test '%s'.", tesC)) |
236 return ( FALSE ) | 248 return ( FALSE ) |
237 } | 249 } |
238 col_matches <- lapply( | 250 col_matches <- lapply( |
239 X = the_colnames, | 251 X = the_colnames, |
240 FUN = function(x) { | 252 FUN = function(x) { |
241 regmatches( x, regexec(col_pattern, x) )[[1]] | 253 regmatches( x, regexec(col_pattern, x) )[[1]] |
242 } | 254 } |
243 ) | 255 ) |
256 ## first contrast each selected level with all other levels combined into one "super-level" ## | |
257 # process columns matching the pattern | |
258 level_union <- c() | |
259 for (i in 1:length(col_matches)) { | |
260 col_match <- col_matches[[i]] | |
261 if (length(col_match) > 0) { | |
262 # it's an actual match; extract the pieces, e.g., k10_kruskal_k4.k3_sig | |
263 vrbl_metadata_col <- col_match[1] # ^^^^^^^^^^^^^^^^^^^^^ # Column name | |
264 fctr_lvl_1 <- col_match[2] # ^^ # Factor-level 1 | |
265 fctr_lvl_2 <- col_match[3] # ^^ # Factor-level 2 | |
266 # only process this column if both factors are members of lvlCSV | |
267 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) | |
268 if (is_match) { | |
269 level_union <- c(level_union, col_match[2], col_match[3]) | |
270 } | |
271 } | |
272 } | |
273 level_union <- unique(sort(level_union)) | |
274 overall_significant <- 1 == vrbl_metadata[,intersample_sig_col] | |
275 if ( length(level_union) > 1 ) { | |
276 chosen_samples <- smpl_metadata_facC %in% level_union | |
277 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) | |
278 col_selector <- vrbl_metadata_names[ overall_significant ] | |
279 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] | |
280 fctr_lvl_2 <- "other" | |
281 for ( fctr_lvl_1 in level_union[1:length(level_union)] ) { | |
282 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) | |
283 predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 ) | |
284 my_cor_cov <- do_detail_plot( | |
285 x_dataMatrix = my_matrix | |
286 , x_predictor = predictor | |
287 , x_is_match = is_match | |
288 , x_algorithm = algoC | |
289 , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features" | |
290 , x_show_labels = labelFeatures | |
291 , x_progress = progress_action | |
292 , x_env = calc_env | |
293 ) | |
294 if ( is.null(my_cor_cov) ) { | |
295 progress_action("NOTHING TO PLOT.") | |
296 } else { | |
297 tsv <- my_cor_cov$tsv1 | |
298 # tsv$salientLevel <- salient_level_lookup(tsv$featureID) | |
299 # tsv$salientRCV <- salient_rcv_lookup(tsv$featureID) | |
300 # tsv$salience <- salience_lookup(tsv$featureID) | |
301 tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] | |
302 corcov_tsv_action(tsv) | |
303 did_plot <- TRUE | |
304 } | |
305 } | |
306 } | |
307 | |
308 ## next, contrast each selected level with each of the other levels individually ## | |
244 # process columns matching the pattern | 309 # process columns matching the pattern |
245 for (i in 1:length(col_matches)) { | 310 for (i in 1:length(col_matches)) { |
246 # for each potential match of the pattern | 311 # for each potential match of the pattern |
247 col_match <- col_matches[[i]] | 312 col_match <- col_matches[[i]] |
248 if (length(col_match) > 0) { | 313 if (length(col_match) > 0) { |
258 # choose only samples with one of the two factors for this column | 323 # choose only samples with one of the two factors for this column |
259 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) | 324 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) |
260 predictor <- smpl_metadata_facC[chosen_samples] | 325 predictor <- smpl_metadata_facC[chosen_samples] |
261 # extract only the significantly-varying features and the chosen samples | 326 # extract only the significantly-varying features and the chosen samples |
262 fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * vrbl_metadata[,intersample_sig_col] | 327 fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * vrbl_metadata[,intersample_sig_col] |
263 overall_significant <- 1 == vrbl_metadata[,intersample_sig_col] | |
264 col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ] | 328 col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ] |
265 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] | 329 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] |
266 my_cor_cov <- do_detail_plot( | 330 my_cor_cov <- do_detail_plot( |
267 x_dataMatrix = my_matrix | 331 x_dataMatrix = my_matrix |
268 , x_predictor = predictor | 332 , x_predictor = predictor |
275 ) | 339 ) |
276 if ( is.null(my_cor_cov) ) { | 340 if ( is.null(my_cor_cov) ) { |
277 progress_action("NOTHING TO PLOT.") | 341 progress_action("NOTHING TO PLOT.") |
278 } else { | 342 } else { |
279 tsv <- my_cor_cov$tsv1 | 343 tsv <- my_cor_cov$tsv1 |
280 tsv$salientLevel <- salient_level_lookup(tsv$featureID) | 344 # tsv$salientLevel <- salient_level_lookup(tsv$featureID) |
281 tsv$salientRCV <- salient_rcv_lookup(tsv$featureID) | 345 # tsv$salientRCV <- salient_rcv_lookup(tsv$featureID) |
282 tsv$salience <- salience_lookup(tsv$featureID) | 346 # tsv$salience <- salience_lookup(tsv$featureID) |
283 tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] | 347 tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] |
284 corcov_tsv_action(tsv) | 348 corcov_tsv_action(tsv) |
285 did_plot <- TRUE | 349 did_plot <- TRUE |
286 } | 350 } |
287 } | 351 } |
288 } | 352 } |
289 } else { # tesC == "none" | 353 } else { # tesC == "none" |
290 utils::combn( | 354 level_union <- unique(sort(smpl_metadata_facC)) |
291 x = unique(sort(smpl_metadata_facC)) | 355 if ( length(level_union) > 1 ) { |
292 , m = 2 | 356 ## pass 1 - contrast each selected level with all other levels combined into one "super-level" ## |
293 , FUN = function(x) { | 357 completed <- c() |
294 fctr_lvl_1 <- x[1] | 358 lapply( |
295 fctr_lvl_2 <- x[2] | 359 X = level_union |
296 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) | 360 , FUN = function(x) { |
297 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) | 361 fctr_lvl_1 <- x[1] |
298 if (length(unique(chosen_samples)) < 1) { | 362 fctr_lvl_2 <- { |
299 progress_action("NOTHING TO PLOT...") | 363 if ( fctr_lvl_1 %in% completed ) |
300 } else { | 364 return("DUMMY") |
301 predictor <- smpl_metadata_facC[chosen_samples] | 365 # strF(completed) |
302 my_matrix <- scdm[ chosen_samples, , drop = FALSE ] | 366 completed <<- c(completed, fctr_lvl_1) |
303 # only process this column if both factors are members of lvlCSV | 367 setdiff(level_union, fctr_lvl_1) |
304 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) | 368 } |
305 my_cor_cov <- do_detail_plot( | 369 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) |
306 x_dataMatrix = my_matrix | 370 fctr_lvl_2 <- "other" |
307 , x_predictor = predictor | 371 # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) ) |
308 , x_is_match = is_match | 372 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) |
309 , x_algorithm = algoC | 373 if (length(unique(chosen_samples)) < 1) { |
310 , x_prefix = "Features" | 374 progress_action("NOTHING TO PLOT...") |
311 , x_show_labels = labelFeatures | |
312 , x_progress = progress_action | |
313 , x_env = calc_env | |
314 ) | |
315 if ( is.null(my_cor_cov) ) { | |
316 progress_action("NOTHING TO PLOT") | |
317 } else { | 375 } else { |
318 tsv <- my_cor_cov$tsv1 | 376 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) |
319 tsv$salientLevel <- salient_level_lookup(tsv$featureID) | 377 predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 ) |
320 tsv$salientRCV <- salient_rcv_lookup(tsv$featureID) | 378 my_matrix <- scdm[ chosen_samples, , drop = FALSE ] |
321 tsv$salience <- salience_lookup(tsv$featureID) | 379 # only process this column if both factors are members of lvlCSV |
322 corcov_tsv_action(tsv) | 380 is_match <- isLevelSelected(fctr_lvl_1) |
323 did_plot <<- TRUE | 381 my_cor_cov <- do_detail_plot( |
382 x_dataMatrix = my_matrix | |
383 , x_predictor = predictor | |
384 , x_is_match = is_match | |
385 , x_algorithm = algoC | |
386 , x_prefix = "Features" | |
387 , x_show_labels = labelFeatures | |
388 , x_progress = progress_action | |
389 , x_env = calc_env | |
390 ) | |
391 if ( is.null(my_cor_cov) ) { | |
392 progress_action("NOTHING TO PLOT") | |
393 } else { | |
394 tsv <- my_cor_cov$tsv1 | |
395 # tsv$salientLevel <- salient_level_lookup(tsv$featureID) | |
396 # tsv$salientRCV <- salient_rcv_lookup(tsv$featureID) | |
397 # tsv$salience <- salience_lookup(tsv$featureID) | |
398 corcov_tsv_action(tsv) | |
399 did_plot <<- TRUE | |
400 } | |
324 } | 401 } |
402 #print("baz") | |
403 "dummy" # need to return a value; otherwise combn fails with an error | |
325 } | 404 } |
326 #print("baz") | 405 ) |
327 "dummy" # need to return a value; otherwise combn fails with an error | 406 ## pass 2 - contrast each selected level with each of the other levels individually ## |
328 } | 407 completed <- c() |
329 ) | 408 utils::combn( |
409 x = level_union | |
410 , m = 2 | |
411 , FUN = function(x) { | |
412 fctr_lvl_1 <- x[1] | |
413 fctr_lvl_2 <- x[2] | |
414 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) | |
415 # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) ) | |
416 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) | |
417 if (length(unique(chosen_samples)) < 1) { | |
418 progress_action("NOTHING TO PLOT...") | |
419 } else { | |
420 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples]) | |
421 predictor <- chosen_facC | |
422 my_matrix <- scdm[ chosen_samples, , drop = FALSE ] | |
423 # only process this column if both factors are members of lvlCSV | |
424 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) | |
425 my_cor_cov <- do_detail_plot( | |
426 x_dataMatrix = my_matrix | |
427 , x_predictor = predictor | |
428 , x_is_match = is_match | |
429 , x_algorithm = algoC | |
430 , x_prefix = "Features" | |
431 , x_show_labels = labelFeatures | |
432 , x_progress = progress_action | |
433 , x_env = calc_env | |
434 ) | |
435 if ( is.null(my_cor_cov) ) { | |
436 progress_action("NOTHING TO PLOT") | |
437 } else { | |
438 tsv <- my_cor_cov$tsv1 | |
439 # tsv$salientLevel <- salient_level_lookup(tsv$featureID) | |
440 # tsv$salientRCV <- salient_rcv_lookup(tsv$featureID) | |
441 # tsv$salience <- salience_lookup(tsv$featureID) | |
442 corcov_tsv_action(tsv) | |
443 did_plot <<- TRUE | |
444 } | |
445 } | |
446 #print("baz") | |
447 "dummy" # need to return a value; otherwise combn fails with an error | |
448 } | |
449 ) | |
450 } else { | |
451 progress_action("NOTHING TO PLOT....") | |
452 } | |
330 } | 453 } |
331 if (!did_plot) { | 454 if (!did_plot) { |
332 failure_action(sprintf("bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'", facC, originalLevCSV)) | 455 failure_action(sprintf("bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'", facC, originalLevCSV)) |
333 return ( FALSE ) | 456 return ( FALSE ) |
334 } | 457 } |