comparison w4mcorcov_calc.R @ 1:e25fd8a13665 draft

planemo upload for repository https://github.com/HegemanLab/w4mcorcov_galaxy_wrapper/tree/master commit bd26542b811de06c1a877337a2840a9f899c2b94
author eschen42
date Mon, 16 Oct 2017 09:18:29 -0400
parents 50a07adddfbd
children a06344808ffc
comparison
equal deleted inserted replaced
0:50a07adddfbd 1:e25fd8a13665
40 lim_x <- 1.2 40 lim_x <- 1.2
41 main_label <- sprintf("%s for levels %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2) 41 main_label <- sprintf("%s for levels %s versus %s", x_prefix, fctr_lvl_1, fctr_lvl_2)
42 # print("main_label") 42 # print("main_label")
43 # print(main_label) 43 # print(main_label)
44 main_cex <- min(1.0, 46.0/nchar(main_label)) 44 main_cex <- min(1.0, 46.0/nchar(main_label))
45 # " It is generally accepted that a variable should be selected if vj>1, [27–29], but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]." (Mehmood 2012 doi:10.1016/j.chemolab.2004.12.011) 45 # "It is generally accepted that a variable should be selected if vj>1, [27–29],
46 # but a proper threshold between 0.83 and 1.21 can yield more relevant variables according to [28]."
47 # (Mehmood 2012 doi:10.1186/1748-7188-6-27)
46 vipco <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83))) 48 vipco <- pmax(0, pmin(1,(vip4p-0.83)/(1.21-0.83)))
47 alpha <- 0.1 + 0.4 * vipco 49 alpha <- 0.1 + 0.4 * vipco
48 red <- as.numeric(correlation < 0) * vipco 50 red <- as.numeric(correlation < 0) * vipco
49 blue <- as.numeric(correlation > 0) * vipco 51 blue <- as.numeric(correlation > 0) * vipco
50 minus_cor <- -correlation 52 minus_cor <- -correlation
51 minus_cov <- -covariance 53 minus_cov <- -covariance
52 # x_progress("head(names(minus_cor)): ", head(names(minus_cor))) 54 # cex <- salience_lookup(feature = names(minus_cor))
53 cex <- salience_lookup(feature = names(minus_cor)) 55 # cex <- 0.25 + (1.25 * cex / max(cex))
54 # x_progress("head(cex.1): ", head(cex)) 56 cex <- 0.75
55 # cex <- 0.25 + (0.75 * cex / max(cex))
56 cex <- 0.25 + (1.25 * cex / max(cex))
57 # x_progress("head(cex.2): ", head(cex))
58 plot( 57 plot(
59 y = minus_cor 58 y = minus_cor
60 , x = minus_cov 59 , x = minus_cov
61 , type="p" 60 , type="p"
62 , xlim=c(-lim_x, lim_x + off(0.1)) 61 , xlim=c(-lim_x, lim_x + off(0.1))
127 return (NULL) 126 return (NULL)
128 } 127 }
129 } 128 }
130 129
131 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510 130 # S-PLOT and OPLS reference: Wiklund_2008 doi:10.1021/ac0713510
132 corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = NULL) { 131 corcov_calc <- function(calc_env, failure_action = stop, progress_action = function(x){}, corcov_tsv_action = function(t){}, salience_tsv_action = function(t){}) {
133 if ( ! is.environment(calc_env) ) { 132 if ( ! is.environment(calc_env) ) {
134 failure_action("corcov_calc: fatal error - 'calc_env' is not an environment") 133 failure_action("corcov_calc: fatal error - 'calc_env' is not an environment")
135 return ( FALSE ) 134 return ( FALSE )
136 } 135 }
137 if ( is.null(corcov_tsv_action) || ! is.function(corcov_tsv_action) ) { 136 if ( ! is.function(corcov_tsv_action) ) {
138 failure_action("corcov_calc: fatal error - 'corcov_tsv_action' is not a function") 137 failure_action("corcov_calc: fatal error - 'corcov_tsv_action' is not a function")
138 return ( FALSE )
139 }
140 if ( ! is.function(salience_tsv_action) ) {
141 failure_action("salience_calc: fatal error - 'salience_tsv_action' is not a function")
139 return ( FALSE ) 142 return ( FALSE )
140 } 143 }
141 144
142 # extract parameters from the environment 145 # extract parameters from the environment
143 vrbl_metadata <- calc_env$vrbl_metadata 146 vrbl_metadata <- calc_env$vrbl_metadata
163 salience_df <- calc_env$salience_df <- w4msalience( 166 salience_df <- calc_env$salience_df <- w4msalience(
164 data_matrix = data_matrix 167 data_matrix = data_matrix
165 , sample_class = smpl_metadata[,facC] 168 , sample_class = smpl_metadata[,facC]
166 , failure_action = failure_action 169 , failure_action = failure_action
167 ) 170 )
171 salience_tsv_action({
172 my_df <- data.frame(
173 featureID = salience_df$feature
174 , salientLevel = salience_df$max_level
175 , salientRCV = salience_df$salient_rcv
176 , salience = salience_df$salience
177 )
178 my_df[order(-my_df$salience),]
179 })
168 salience <- salience_df$salience 180 salience <- salience_df$salience
169 names(salience) <- salience_df$feature 181 names(salience) <- salience_df$feature
170 salience_lookup <- calc_env$salience_lookup <- function(feature) unname(salience[feature]) 182 salience_lookup <- calc_env$salience_lookup <- function(feature) unname(salience[feature])
171 salient_rcv <- salience_df$salient_rcv 183 salient_rcv <- salience_df$salient_rcv
172 names(salient_rcv) <- salience_df$feature 184 names(salient_rcv) <- salience_df$feature
229 241
230 did_plot <- FALSE 242 did_plot <- FALSE
231 if (tesC != "none") { 243 if (tesC != "none") {
232 # for each column name, extract the parts of the name matched by 'col_pattern', if any 244 # for each column name, extract the parts of the name matched by 'col_pattern', if any
233 the_colnames <- colnames(vrbl_metadata) 245 the_colnames <- colnames(vrbl_metadata)
234 if (!Reduce(f = "||", x = grepl(tesC, the_colnames))) { 246 if ( ! Reduce( f = "||", x = grepl(tesC, the_colnames) ) ) {
235 failure_action(sprintf("bad parameter! variableMetadata must contain results of W4M Univariate test '%s'.", tesC)) 247 failure_action(sprintf("bad parameter! variableMetadata must contain results of W4M Univariate test '%s'.", tesC))
236 return ( FALSE ) 248 return ( FALSE )
237 } 249 }
238 col_matches <- lapply( 250 col_matches <- lapply(
239 X = the_colnames, 251 X = the_colnames,
240 FUN = function(x) { 252 FUN = function(x) {
241 regmatches( x, regexec(col_pattern, x) )[[1]] 253 regmatches( x, regexec(col_pattern, x) )[[1]]
242 } 254 }
243 ) 255 )
256 ## first contrast each selected level with all other levels combined into one "super-level" ##
257 # process columns matching the pattern
258 level_union <- c()
259 for (i in 1:length(col_matches)) {
260 col_match <- col_matches[[i]]
261 if (length(col_match) > 0) {
262 # it's an actual match; extract the pieces, e.g., k10_kruskal_k4.k3_sig
263 vrbl_metadata_col <- col_match[1] # ^^^^^^^^^^^^^^^^^^^^^ # Column name
264 fctr_lvl_1 <- col_match[2] # ^^ # Factor-level 1
265 fctr_lvl_2 <- col_match[3] # ^^ # Factor-level 2
266 # only process this column if both factors are members of lvlCSV
267 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
268 if (is_match) {
269 level_union <- c(level_union, col_match[2], col_match[3])
270 }
271 }
272 }
273 level_union <- unique(sort(level_union))
274 overall_significant <- 1 == vrbl_metadata[,intersample_sig_col]
275 if ( length(level_union) > 1 ) {
276 chosen_samples <- smpl_metadata_facC %in% level_union
277 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
278 col_selector <- vrbl_metadata_names[ overall_significant ]
279 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
280 fctr_lvl_2 <- "other"
281 for ( fctr_lvl_1 in level_union[1:length(level_union)] ) {
282 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
283 predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 )
284 my_cor_cov <- do_detail_plot(
285 x_dataMatrix = my_matrix
286 , x_predictor = predictor
287 , x_is_match = is_match
288 , x_algorithm = algoC
289 , x_prefix = if (pairSigFeatOnly) "Significantly contrasting features" else "Significant features"
290 , x_show_labels = labelFeatures
291 , x_progress = progress_action
292 , x_env = calc_env
293 )
294 if ( is.null(my_cor_cov) ) {
295 progress_action("NOTHING TO PLOT.")
296 } else {
297 tsv <- my_cor_cov$tsv1
298 # tsv$salientLevel <- salient_level_lookup(tsv$featureID)
299 # tsv$salientRCV <- salient_rcv_lookup(tsv$featureID)
300 # tsv$salience <- salience_lookup(tsv$featureID)
301 tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ]
302 corcov_tsv_action(tsv)
303 did_plot <- TRUE
304 }
305 }
306 }
307
308 ## next, contrast each selected level with each of the other levels individually ##
244 # process columns matching the pattern 309 # process columns matching the pattern
245 for (i in 1:length(col_matches)) { 310 for (i in 1:length(col_matches)) {
246 # for each potential match of the pattern 311 # for each potential match of the pattern
247 col_match <- col_matches[[i]] 312 col_match <- col_matches[[i]]
248 if (length(col_match) > 0) { 313 if (length(col_match) > 0) {
258 # choose only samples with one of the two factors for this column 323 # choose only samples with one of the two factors for this column
259 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) 324 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
260 predictor <- smpl_metadata_facC[chosen_samples] 325 predictor <- smpl_metadata_facC[chosen_samples]
261 # extract only the significantly-varying features and the chosen samples 326 # extract only the significantly-varying features and the chosen samples
262 fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * vrbl_metadata[,intersample_sig_col] 327 fully_significant <- 1 == vrbl_metadata[,vrbl_metadata_col] * vrbl_metadata[,intersample_sig_col]
263 overall_significant <- 1 == vrbl_metadata[,intersample_sig_col]
264 col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ] 328 col_selector <- vrbl_metadata_names[ if ( pairSigFeatOnly ) fully_significant else overall_significant ]
265 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ] 329 my_matrix <- scdm[ chosen_samples, col_selector, drop = FALSE ]
266 my_cor_cov <- do_detail_plot( 330 my_cor_cov <- do_detail_plot(
267 x_dataMatrix = my_matrix 331 x_dataMatrix = my_matrix
268 , x_predictor = predictor 332 , x_predictor = predictor
275 ) 339 )
276 if ( is.null(my_cor_cov) ) { 340 if ( is.null(my_cor_cov) ) {
277 progress_action("NOTHING TO PLOT.") 341 progress_action("NOTHING TO PLOT.")
278 } else { 342 } else {
279 tsv <- my_cor_cov$tsv1 343 tsv <- my_cor_cov$tsv1
280 tsv$salientLevel <- salient_level_lookup(tsv$featureID) 344 # tsv$salientLevel <- salient_level_lookup(tsv$featureID)
281 tsv$salientRCV <- salient_rcv_lookup(tsv$featureID) 345 # tsv$salientRCV <- salient_rcv_lookup(tsv$featureID)
282 tsv$salience <- salience_lookup(tsv$featureID) 346 # tsv$salience <- salience_lookup(tsv$featureID)
283 tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ] 347 tsv["level1Level2Sig"] <- vrbl_metadata[ match(tsv$featureID, vrbl_metadata_names), vrbl_metadata_col ]
284 corcov_tsv_action(tsv) 348 corcov_tsv_action(tsv)
285 did_plot <- TRUE 349 did_plot <- TRUE
286 } 350 }
287 } 351 }
288 } 352 }
289 } else { # tesC == "none" 353 } else { # tesC == "none"
290 utils::combn( 354 level_union <- unique(sort(smpl_metadata_facC))
291 x = unique(sort(smpl_metadata_facC)) 355 if ( length(level_union) > 1 ) {
292 , m = 2 356 ## pass 1 - contrast each selected level with all other levels combined into one "super-level" ##
293 , FUN = function(x) { 357 completed <- c()
294 fctr_lvl_1 <- x[1] 358 lapply(
295 fctr_lvl_2 <- x[2] 359 X = level_union
296 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2)) 360 , FUN = function(x) {
297 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2) 361 fctr_lvl_1 <- x[1]
298 if (length(unique(chosen_samples)) < 1) { 362 fctr_lvl_2 <- {
299 progress_action("NOTHING TO PLOT...") 363 if ( fctr_lvl_1 %in% completed )
300 } else { 364 return("DUMMY")
301 predictor <- smpl_metadata_facC[chosen_samples] 365 # strF(completed)
302 my_matrix <- scdm[ chosen_samples, , drop = FALSE ] 366 completed <<- c(completed, fctr_lvl_1)
303 # only process this column if both factors are members of lvlCSV 367 setdiff(level_union, fctr_lvl_1)
304 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2) 368 }
305 my_cor_cov <- do_detail_plot( 369 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
306 x_dataMatrix = my_matrix 370 fctr_lvl_2 <- "other"
307 , x_predictor = predictor 371 # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) )
308 , x_is_match = is_match 372 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
309 , x_algorithm = algoC 373 if (length(unique(chosen_samples)) < 1) {
310 , x_prefix = "Features" 374 progress_action("NOTHING TO PLOT...")
311 , x_show_labels = labelFeatures
312 , x_progress = progress_action
313 , x_env = calc_env
314 )
315 if ( is.null(my_cor_cov) ) {
316 progress_action("NOTHING TO PLOT")
317 } else { 375 } else {
318 tsv <- my_cor_cov$tsv1 376 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
319 tsv$salientLevel <- salient_level_lookup(tsv$featureID) 377 predictor <- sapply( X = chosen_facC, FUN = function(fac) if ( fac == fctr_lvl_1 ) fctr_lvl_1 else fctr_lvl_2 )
320 tsv$salientRCV <- salient_rcv_lookup(tsv$featureID) 378 my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
321 tsv$salience <- salience_lookup(tsv$featureID) 379 # only process this column if both factors are members of lvlCSV
322 corcov_tsv_action(tsv) 380 is_match <- isLevelSelected(fctr_lvl_1)
323 did_plot <<- TRUE 381 my_cor_cov <- do_detail_plot(
382 x_dataMatrix = my_matrix
383 , x_predictor = predictor
384 , x_is_match = is_match
385 , x_algorithm = algoC
386 , x_prefix = "Features"
387 , x_show_labels = labelFeatures
388 , x_progress = progress_action
389 , x_env = calc_env
390 )
391 if ( is.null(my_cor_cov) ) {
392 progress_action("NOTHING TO PLOT")
393 } else {
394 tsv <- my_cor_cov$tsv1
395 # tsv$salientLevel <- salient_level_lookup(tsv$featureID)
396 # tsv$salientRCV <- salient_rcv_lookup(tsv$featureID)
397 # tsv$salience <- salience_lookup(tsv$featureID)
398 corcov_tsv_action(tsv)
399 did_plot <<- TRUE
400 }
324 } 401 }
402 #print("baz")
403 "dummy" # need to return a value; otherwise combn fails with an error
325 } 404 }
326 #print("baz") 405 )
327 "dummy" # need to return a value; otherwise combn fails with an error 406 ## pass 2 - contrast each selected level with each of the other levels individually ##
328 } 407 completed <- c()
329 ) 408 utils::combn(
409 x = level_union
410 , m = 2
411 , FUN = function(x) {
412 fctr_lvl_1 <- x[1]
413 fctr_lvl_2 <- x[2]
414 chosen_samples <- smpl_metadata_facC %in% c(fctr_lvl_1, fctr_lvl_2)
415 # print( sprintf("sum(chosen_samples) %d, factor_level_2 %s", sum(chosen_samples), fctr_lvl_2) )
416 progress_action(sprintf("calculating/plotting contrast of %s vs. %s", fctr_lvl_1, fctr_lvl_2))
417 if (length(unique(chosen_samples)) < 1) {
418 progress_action("NOTHING TO PLOT...")
419 } else {
420 chosen_facC <- as.character(smpl_metadata_facC[chosen_samples])
421 predictor <- chosen_facC
422 my_matrix <- scdm[ chosen_samples, , drop = FALSE ]
423 # only process this column if both factors are members of lvlCSV
424 is_match <- isLevelSelected(fctr_lvl_1) && isLevelSelected(fctr_lvl_2)
425 my_cor_cov <- do_detail_plot(
426 x_dataMatrix = my_matrix
427 , x_predictor = predictor
428 , x_is_match = is_match
429 , x_algorithm = algoC
430 , x_prefix = "Features"
431 , x_show_labels = labelFeatures
432 , x_progress = progress_action
433 , x_env = calc_env
434 )
435 if ( is.null(my_cor_cov) ) {
436 progress_action("NOTHING TO PLOT")
437 } else {
438 tsv <- my_cor_cov$tsv1
439 # tsv$salientLevel <- salient_level_lookup(tsv$featureID)
440 # tsv$salientRCV <- salient_rcv_lookup(tsv$featureID)
441 # tsv$salience <- salience_lookup(tsv$featureID)
442 corcov_tsv_action(tsv)
443 did_plot <<- TRUE
444 }
445 }
446 #print("baz")
447 "dummy" # need to return a value; otherwise combn fails with an error
448 }
449 )
450 } else {
451 progress_action("NOTHING TO PLOT....")
452 }
330 } 453 }
331 if (!did_plot) { 454 if (!did_plot) {
332 failure_action(sprintf("bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'", facC, originalLevCSV)) 455 failure_action(sprintf("bad parameter! sampleMetadata must have at least two levels of factor '%s' matching '%s'", facC, originalLevCSV))
333 return ( FALSE ) 456 return ( FALSE )
334 } 457 }