Mercurial > repos > eschen42 > mqppep_anova
comparison mqppep_anova_script.Rmd @ 15:2c5f1a2fe16a draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 96659062ea07ac43d139746b0d119f1ee020f9cd"
| author | eschen42 |
|---|---|
| date | Sat, 26 Mar 2022 02:27:12 +0000 |
| parents | 6679616d0c18 |
| children | 61adb8801b73 |
comparison
equal
deleted
inserted
replaced
| 14:6679616d0c18 | 15:2c5f1a2fe16a |
|---|---|
| 6 pdf_document: | 6 pdf_document: |
| 7 toc: true | 7 toc: true |
| 8 latex_document: | 8 latex_document: |
| 9 toc: true | 9 toc: true |
| 10 params: | 10 params: |
| 11 inputFile: "test-data/test_input_for_anova.tabular" | |
| 12 alphaFile: "test-data/alpha_levels.tabular" | 11 alphaFile: "test-data/alpha_levels.tabular" |
| 13 firstDataColumn: "Intensity" | 12 inputFile: "test-data/UT_Phospho_ST_Sites.preproc.tabular" |
| 13 firstDataColumn: "^Intensity[^_]" | |
| 14 imputationMethod: !r c("group-median", "median", "mean", "random")[4] | 14 imputationMethod: !r c("group-median", "median", "mean", "random")[4] |
| 15 meanPercentile: 1 | 15 meanPercentile: 1 |
| 16 sdPercentile: 1.0 | 16 sdPercentile: 1.0 |
| 17 regexSampleNames: "\\.\\d+[A-Z]$" | 17 regexSampleNames: "\\.\\d+[A-Z]$" |
| 18 regexSampleGrouping: "\\d+" | 18 regexSampleGrouping: "\\d+" |
| 19 imputedDataFilename: "test-data/imputedDataFilename.txt" | 19 imputedDataFilename: "test-data/limbo/imputedDataFilename.txt" |
| 20 imputedQNLTDataFile: "test-data/imputedQNLTDataFile.txt" | 20 imputedQNLTDataFile: "test-data/limbo/imputedQNLTDataFile.txt" |
| 21 show_toc: true | 21 show_toc: true |
| 22 --- | 22 --- |
| 23 <!-- | 23 <!-- |
| 24 alphaFile: "test-data/alpha_levels.tabular" | |
| 25 inputFile: "test-data/test_input_for_anova.tabular" | |
| 26 inputFile: "test-data/UT_Phospho_ST_Sites.preproc.tabular" | |
| 27 inputFile: "test-data/density_failure.preproc_tab.tabular" | |
| 24 latex_document: default | 28 latex_document: default |
| 25 inputFile: "test-data/test_input_for_anova.tabular" | |
| 26 inputFile: "test-data/density_failure.preproc_tab.tabular" | |
| 27 inputFile: "test-data/UT_Phospho_STY_Sites.preproc_tab" | |
| 28 date: "May 28, 2018; Mar 16, 2022" | |
| 29 --> | 29 --> |
| 30 ```{r setup, include = FALSE} | 30 ```{r setup, include = FALSE} |
| 31 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285 | 31 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285 |
| 32 knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10)) | 32 knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10)) |
| 33 | 33 |
| 42 const_stripchart_cex <- 0.5 | 42 const_stripchart_cex <- 0.5 |
| 43 const_stripsmall_cex <- | 43 const_stripsmall_cex <- |
| 44 sqrt(const_stripchart_cex * const_stripchart_cex / 2) | 44 sqrt(const_stripchart_cex * const_stripchart_cex / 2) |
| 45 const_stripchart_jitter <- 0.3 | 45 const_stripchart_jitter <- 0.3 |
| 46 const_write_debug_files <- FALSE | 46 const_write_debug_files <- FALSE |
| 47 const_table_anchor <- "tbp" | |
| 47 | 48 |
| 48 ### FUNCTIONS | 49 ### FUNCTIONS |
| 49 | 50 |
| 50 #ANOVA filter function | 51 #ANOVA filter function |
| 51 anova_func <- function(x, grouping_factor) { | 52 anova_func <- function(x, grouping_factor) { |
| 101 latex_collapsed_vector(" & ", v) | 102 latex_collapsed_vector(" & ", v) |
| 102 cat(" \\\\\n") | 103 cat(" \\\\\n") |
| 103 } | 104 } |
| 104 | 105 |
| 105 # Use this like print.data.frame, from which it is adapted: | 106 # Use this like print.data.frame, from which it is adapted: |
| 106 print_data_frame_latex <- | 107 data_frame_latex <- |
| 107 function( | 108 function( |
| 108 x, | 109 x, |
| 109 ..., | 110 ..., |
| 110 # digits to pass to format.data.frame | 111 # digits to pass to format.data.frame |
| 111 digits = NULL, | 112 digits = NULL, |
| 114 # maximumn number of rows to print | 115 # maximumn number of rows to print |
| 115 max = NULL, | 116 max = NULL, |
| 116 # string with justification of each column | 117 # string with justification of each column |
| 117 justification = NULL, | 118 justification = NULL, |
| 118 # TRUE to center on page | 119 # TRUE to center on page |
| 119 centered = FALSE, | 120 centered = TRUE, |
| 120 # optional capttion | 121 # optional capttion |
| 121 caption = NULL, | 122 caption = NULL, |
| 122 # h(inline); b(bottom); t (top) or p (separate page) | 123 # h(inline); b(bottom); t (top) or p (separate page) |
| 123 anchor = "h" | 124 anchor = "h" |
| 124 ) { | 125 ) { |
| 198 invisible(x) | 199 invisible(x) |
| 199 } | 200 } |
| 200 | 201 |
| 201 ``` | 202 ``` |
| 202 | 203 |
| 203 ## Purpose: | 204 ## Purpose |
| 204 | 205 |
| 205 Perform imputation of missing values, quantile normalization, and ANOVA. | 206 Perform imputation of missing values, quantile normalization, and ANOVA. |
| 206 | 207 |
| 207 <!-- | |
| 208 ## Variables to change for each input file | |
| 209 --> | |
| 210 ```{r include = FALSE} | 208 ```{r include = FALSE} |
| 211 # Input Filename | 209 # Input Filename |
| 212 input_file <- params$inputFile | 210 input_file <- params$inputFile |
| 213 | 211 |
| 214 # First data column - ideally, this could be detected via regexSampleNames, | 212 # First data column - ideally, this could be detected via regexSampleNames, |
| 215 # but for now leave it as is. | 213 # but for now leave it as is. |
| 216 first_data_column <- params$firstDataColumn | 214 first_data_column <- params$firstDataColumn |
| 217 fdc_is_integer <- TRUE | 215 fdc_is_integer <- is.integer(first_data_column) |
| 218 first_data_column <- withCallingHandlers( | 216 if (fdc_is_integer) { |
| 219 as.integer(first_data_column) | 217 first_data_column <- as.integer(params$firstDataColumn) |
| 220 , warning = function(w) fdc_is_integer <<- FALSE | |
| 221 ) | |
| 222 if (FALSE == fdc_is_integer) { | |
| 223 first_data_column <- params$firstDataColumn | |
| 224 } | 218 } |
| 225 | 219 |
| 226 # False discovery rate adjustment for ANOVA | 220 # False discovery rate adjustment for ANOVA |
| 227 # Since pY abundance is low, set to 0.10 and 0.20 in addition to 0.05 | 221 # Since pY abundance is low, set to 0.10 and 0.20 in addition to 0.05 |
| 228 val_fdr <- | 222 val_fdr <- |
| 229 read.table(file = params$alphaFile, sep = "\t", header = F, quote = "")[, 1] | 223 read.table(file = params$alphaFile, sep = "\t", header = F, quote = "") |
| 224 | |
| 225 if ( | |
| 226 ncol(val_fdr) != 1 || | |
| 227 sum(!is.numeric(val_fdr[, 1])) || | |
| 228 sum(val_fdr[, 1] < 0) || | |
| 229 sum(val_fdr[, 1] > 1) | |
| 230 ) { | |
| 231 stop("alphaFile should be one column of numbers within the range [0.0,1.0]") | |
| 232 } | |
| 233 val_fdr <- val_fdr[, 1] | |
| 230 | 234 |
| 231 #Imputed Data filename | 235 #Imputed Data filename |
| 232 imputed_data_filename <- params$imputedDataFilename | 236 imputed_data_filename <- params$imputedDataFilename |
| 233 imp_qn_lt_data_filenm <- params$imputedQNLTDataFile | 237 imp_qn_lt_data_filenm <- params$imputedQNLTDataFile |
| 234 | 238 |
| 272 quote = "", | 276 quote = "", |
| 273 check.names = FALSE | 277 check.names = FALSE |
| 274 ) | 278 ) |
| 275 ``` | 279 ``` |
| 276 | 280 |
| 277 ### Parse column names, sample names, and factor levels from input file | 281 ## Extract Sample Names and Factor Levels |
| 282 | |
| 283 Column names parsed from input file are shown in Table 1; sample names and factor levels, in Table 2. | |
| 278 | 284 |
| 279 ```{r echo = FALSE, results = 'asis'} | 285 ```{r echo = FALSE, results = 'asis'} |
| 280 # Write column naames as an enumerated list. | |
| 281 column_name_df <- data.frame( | |
| 282 column = seq_len(length(colnames(full_data))), | |
| 283 name = colnames(full_data) | |
| 284 ) | |
| 285 print_data_frame_latex( | |
| 286 x = column_name_df, | |
| 287 justification = "l l", | |
| 288 centered = TRUE, | |
| 289 caption = "Input data column name", | |
| 290 anchor = "h" | |
| 291 ) | |
| 292 | 286 |
| 293 data_column_indices <- grep(first_data_column, names(full_data), perl = TRUE) | 287 data_column_indices <- grep(first_data_column, names(full_data), perl = TRUE) |
| 294 cat( | 288 |
| 295 sprintf( | 289 if (!fdc_is_integer) { |
| 296 "\n\nData columns: [%d,%d]\n\n", | |
| 297 min(data_column_indices), | |
| 298 max(data_column_indices) | |
| 299 ) | |
| 300 ) | |
| 301 | |
| 302 if (FALSE == fdc_is_integer) { | |
| 303 if (length(data_column_indices) > 0) { | 290 if (length(data_column_indices) > 0) { |
| 304 first_data_column <- data_column_indices[1] | 291 first_data_column <- data_column_indices[1] |
| 305 } else { | 292 } else { |
| 306 stop(paste("failed to convert firstDataColumn:", first_data_column)) | 293 stop(paste("failed to convert firstDataColumn:", first_data_column)) |
| 307 } | 294 } |
| 308 } | 295 } |
| 296 | |
| 297 cat( | |
| 298 sprintf( | |
| 299 paste( | |
| 300 "\n\nPeptide-intensity data for each sample is", | |
| 301 "in one of columns %d through %d.\n\n" | |
| 302 ), | |
| 303 min(data_column_indices), | |
| 304 max(data_column_indices) | |
| 305 ) | |
| 306 ) | |
| 307 | |
| 308 # Write column names as a LaTeX enumerated list. | |
| 309 column_name_df <- data.frame( | |
| 310 column = seq_len(length(colnames(full_data))), | |
| 311 name = colnames(full_data) | |
| 312 ) | |
| 313 data_frame_latex( | |
| 314 x = column_name_df, | |
| 315 justification = "l l", | |
| 316 centered = TRUE, | |
| 317 caption = "Input data column name", | |
| 318 anchor = const_table_anchor | |
| 319 ) | |
| 309 | 320 |
| 310 ``` | 321 ``` |
| 311 | 322 |
| 312 ```{r echo = FALSE, results = 'asis'} | 323 ```{r echo = FALSE, results = 'asis'} |
| 313 #```{r echo = FALSE, results = 'asis'} | 324 #```{r echo = FALSE, results = 'asis'} |
| 334 number_of_samples <- length(sample_name_matches) | 345 number_of_samples <- length(sample_name_matches) |
| 335 sample_factor_df <- data.frame( | 346 sample_factor_df <- data.frame( |
| 336 sample = sample_name_matches, | 347 sample = sample_name_matches, |
| 337 level = sample_factor_levels | 348 level = sample_factor_levels |
| 338 ) | 349 ) |
| 339 print_data_frame_latex( | 350 data_frame_latex( |
| 340 x = sample_factor_df, | 351 x = sample_factor_df, |
| 341 justification = "c c", | 352 justification = "c c", |
| 342 centered = TRUE, | 353 centered = TRUE, |
| 343 caption = "Factor level", | 354 caption = "Factor level", |
| 344 anchor = "h" | 355 anchor = const_table_anchor |
| 345 ) | 356 ) |
| 346 ``` | 357 ``` |
| 347 ```{r echo = FALSE, results = 'asis'} | 358 ```{r echo = FALSE, results = 'asis'} |
| 348 cat("\\newpage\n") | 359 cat("\\newpage\n") |
| 349 ``` | 360 ``` |
| 368 # Vertical plot | 379 # Vertical plot |
| 369 boxplot( | 380 boxplot( |
| 370 quant_data_log | 381 quant_data_log |
| 371 , las = 1 | 382 , las = 1 |
| 372 , col = const_boxplot_fill | 383 , col = const_boxplot_fill |
| 384 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") | |
| 385 , xlab = "Sample" | |
| 373 ) | 386 ) |
| 374 # Points | |
| 375 stripchart( | |
| 376 quant_data_log, # Data | |
| 377 method = "jitter", # Random noise | |
| 378 jitter = const_stripchart_jitter, | |
| 379 pch = 19, # Pch symbols | |
| 380 cex = const_stripchart_cex, # Size of symbols reduced | |
| 381 col = "goldenrod", # Color of the symbol | |
| 382 vertical = TRUE, # Vertical mode | |
| 383 add = TRUE # Add it over | |
| 384 ) | |
| 385 par(old_par) | 387 par(old_par) |
| 386 | 388 |
| 387 | 389 |
| 388 | 390 |
| 389 cat("\n\n\n") | 391 cat("\n\n\n") |
| 396 if (nrow(quant_data_log) > 1) { | 398 if (nrow(quant_data_log) > 1) { |
| 397 quant_data_log_stack <- stack(quant_data_log) | 399 quant_data_log_stack <- stack(quant_data_log) |
| 398 ggplot( | 400 ggplot( |
| 399 quant_data_log_stack, | 401 quant_data_log_stack, |
| 400 aes(x = values) | 402 aes(x = values) |
| 401 ) + | 403 ) + xlab(latex2exp::TeX("$log_{10}$(peptide intensity)")) + |
| 404 ylab("Probability density") + | |
| 402 geom_density( | 405 geom_density( |
| 403 aes(group = ind, colour = ind), | 406 aes(group = ind, colour = ind), |
| 404 na.rm = TRUE | 407 na.rm = TRUE |
| 405 ) | 408 ) |
| 406 } else { | 409 } else { |
| 409 ``` | 412 ``` |
| 410 | 413 |
| 411 ### Globally, are peptide intensities are approximately unimodal? | 414 ### Globally, are peptide intensities are approximately unimodal? |
| 412 | 415 |
| 413 <!-- | 416 <!-- |
| 414 # ref for bquote below particularly and plotting math expressions generally: | 417 # bquote could be used as an alternative to latex2exp::TeX below particularly |
| 418 # and when plotting math expressions generally, at the expense of mastering | |
| 419 # another syntax, which hardly seems worthwhile when I need to use TeX | |
| 420 # elsewhere; here's an introduction to bquote: | |
| 415 # https://www.r-bloggers.com/2018/03/math-notation-for-r-plot-titles-expression-and-bquote/ | 421 # https://www.r-bloggers.com/2018/03/math-notation-for-r-plot-titles-expression-and-bquote/ |
| 416 --> | 422 --> |
| 417 ```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5), results = 'asis'} | 423 ```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5), results = 'asis'} |
| 418 | 424 |
| 419 # identify the location of missing values | 425 # identify the location of missing values |
| 420 fin <- is.finite(as.numeric(as.matrix(quant_data_log))) | 426 fin <- is.finite(as.numeric(as.matrix(quant_data_log))) |
| 421 | 427 |
| 422 logvalues <- as.numeric(as.matrix(quant_data_log))[fin] | 428 logvalues <- as.numeric(as.matrix(quant_data_log))[fin] |
| 429 logvalues_density <- density(logvalues) | |
| 423 plot( | 430 plot( |
| 424 density(logvalues), | 431 x = logvalues_density, |
| 425 main = bquote( | 432 main = latex2exp::TeX( |
| 426 "Smoothed estimated probability density vs." ~ log[10](intensity)), | 433 "Smoothed estimated probability density vs. $log_{10}$(peptide intensity)" |
| 427 xlab = bquote(log[10](intensity)) | 434 ), |
| 435 xlab = latex2exp::TeX("$log_{10}$(peptide intensity)"), | |
| 436 ylab = "Probability density" | |
| 428 ) | 437 ) |
| 429 hist( | 438 hist( |
| 430 x = as.numeric(as.matrix(quant_data_log)) | 439 x = as.numeric(as.matrix(quant_data_log)), |
| 431 , breaks = 100 | 440 xlim = c(min(logvalues_density$x), max(logvalues_density$x)), |
| 432 , main = bquote("Frequency vs." ~ log[10](intensity)) | 441 breaks = 100, |
| 433 , xlab = bquote(log[10](intensity)) | 442 main = latex2exp::TeX("Frequency vs. $log_{10}$(peptide intensity)"), |
| 443 xlab = latex2exp::TeX("$log_{10}$(peptide intensity)") | |
| 434 ) | 444 ) |
| 435 ``` | 445 ``` |
| 436 | 446 |
| 437 ### Distribution of standard deviations of $log_{10}(\text{intensity})$, ignoring missing values: | 447 ### Distribution of standard deviations of $log_{10}(\text{intensity})$, ignoring missing values: |
| 438 | 448 |
| 450 if (sum(!is.na(sds)) > 2) { | 460 if (sum(!is.na(sds)) > 2) { |
| 451 plot( | 461 plot( |
| 452 density(sds, na.rm = T) | 462 density(sds, na.rm = T) |
| 453 , main = "Smoothed estimated probability density vs. std. deviation" | 463 , main = "Smoothed estimated probability density vs. std. deviation" |
| 454 , sub = "(probability estimation made with Gaussian smoothing)" | 464 , sub = "(probability estimation made with Gaussian smoothing)" |
| 465 , ylab = "Probability density" | |
| 455 ) | 466 ) |
| 456 } else { | 467 } else { |
| 457 cat( | 468 cat( |
| 458 "At least two non-missing values are required to plot", | 469 "At least two non-missing values are required to plot", |
| 459 "probability density.\n\n" | 470 "probability density.\n\n" |
| 508 switch( | 519 switch( |
| 509 imputation_method | 520 imputation_method |
| 510 , "group-median" = { | 521 , "group-median" = { |
| 511 imputation_method_description <- | 522 imputation_method_description <- |
| 512 paste("Substitute missing value with", | 523 paste("Substitute missing value with", |
| 513 "median peptide intensity for sample group\n" | 524 "median peptide intensity for sample group.\n" |
| 514 ) | 525 ) |
| 515 sample_level_integers <- as.integer(sample_factor_levels) | 526 sample_level_integers <- as.integer(sample_factor_levels) |
| 516 for (i in seq_len(length(levels(sample_factor_levels)))) { | 527 for (i in seq_len(length(levels(sample_factor_levels)))) { |
| 517 level_cols <- i == sample_level_integers | 528 level_cols <- i == sample_level_integers |
| 518 ind <- which(is.na(quant_data_imp[, level_cols]), arr.ind = TRUE) | 529 ind <- which(is.na(quant_data_imp[, level_cols]), arr.ind = TRUE) |
| 522 good_rows <- !is.na(rowMeans(quant_data_imp)) | 533 good_rows <- !is.na(rowMeans(quant_data_imp)) |
| 523 } | 534 } |
| 524 , "median" = { | 535 , "median" = { |
| 525 imputation_method_description <- | 536 imputation_method_description <- |
| 526 paste("Substitute missing value with", | 537 paste("Substitute missing value with", |
| 527 "median peptide intensity across all sample classes\n" | 538 "median peptide intensity across all sample classes.\n" |
| 528 ) | 539 ) |
| 529 quant_data_imp[ind] <- apply(quant_data_imp, 1, median, na.rm = T)[ind[, 1]] | 540 quant_data_imp[ind] <- apply(quant_data_imp, 1, median, na.rm = T)[ind[, 1]] |
| 530 good_rows <- !is.na(rowMeans(quant_data_imp)) | 541 good_rows <- !is.na(rowMeans(quant_data_imp)) |
| 531 } | 542 } |
| 532 , "mean" = { | 543 , "mean" = { |
| 533 imputation_method_description <- | 544 imputation_method_description <- |
| 534 paste("Substitute missing value with", | 545 paste("Substitute missing value with", |
| 535 "mean peptide intensity across all sample classes\n" | 546 "mean peptide intensity across all sample classes.\n" |
| 536 ) | 547 ) |
| 537 quant_data_imp[ind] <- apply(quant_data_imp, 1, mean, na.rm = T)[ind[, 1]] | 548 quant_data_imp[ind] <- apply(quant_data_imp, 1, mean, na.rm = T)[ind[, 1]] |
| 538 good_rows <- !is.na(rowMeans(quant_data_imp)) | 549 good_rows <- !is.na(rowMeans(quant_data_imp)) |
| 539 } | 550 } |
| 540 , "random" = { | 551 , "random" = { |
| 542 # If you want results to be reproducible, you will want to call | 553 # If you want results to be reproducible, you will want to call |
| 543 # base::set.seed before calling stats::rnorm | 554 # base::set.seed before calling stats::rnorm |
| 544 imputation_method_description <- | 555 imputation_method_description <- |
| 545 paste("Substitute each missing value with random intensity", | 556 paste("Substitute each missing value with random intensity", |
| 546 sprintf( | 557 sprintf( |
| 547 "random intensity $N \\sim (%0.2f, %0.2f)$\n", | 558 "random intensity $N \\sim (%0.2f, %0.2f)$.\n", |
| 548 q1, m1 | 559 q1, m1 |
| 549 ) | 560 ) |
| 550 ) | 561 ) |
| 551 cat(sprintf("mean_percentile (from input parameter) is %2.0f\n\n", | 562 cat(sprintf("mean_percentile (from input parameter) is %2.0f\n\n", |
| 552 100 * mean_percentile)) | 563 100 * mean_percentile)) |
| 553 cat(sprintf("sd_percentile (from input parameter) is %0.2f\n\n", | 564 cat(sprintf("sd_percentile (from input parameter) is %0.2f\n\n", |
| 554 sd_percentile)) | 565 sd_percentile)) |
| 555 #ACE cat(sprintf("sd for rnorm is %0.4f\n\n", m1)) | |
| 556 quant_data_imp[ind] <- | 566 quant_data_imp[ind] <- |
| 557 10 ^ rnorm(number_to_impute, mean = q1, sd = m1) | 567 10 ^ rnorm(number_to_impute, mean = q1, sd = m1) |
| 558 good_rows <- !is.na(rowMeans(quant_data_imp)) | 568 good_rows <- !is.na(rowMeans(quant_data_imp)) |
| 559 } | 569 } |
| 560 ) | 570 ) |
| 682 log10(quant_data_imp) | 692 log10(quant_data_imp) |
| 683 | 693 |
| 684 write_debug_file(quant_data_imp_log10) | 694 write_debug_file(quant_data_imp_log10) |
| 685 | 695 |
| 686 red_dots <- quant_data_imp_log10 * x | 696 red_dots <- quant_data_imp_log10 * x |
| 687 ylim <- c( | 697 count_red <- sum(!is.na(red_dots)) |
| 698 count_blue <- sum(!is.na(blue_dots)) | |
| 699 ylim_save <- ylim <- c( | |
| 688 min(red_dots, blue_dots, na.rm = TRUE), | 700 min(red_dots, blue_dots, na.rm = TRUE), |
| 689 max(red_dots, blue_dots, na.rm = TRUE) | 701 max(red_dots, blue_dots, na.rm = TRUE) |
| 690 ) | 702 ) |
| 691 # ref: https://r-charts.com/distribution/add-points-boxplot/ | 703 show_stripchart <- |
| 704 50 > (count_red + count_blue) / length(sample_name_matches) | |
| 705 if (show_stripchart) { | |
| 706 boxplot_sub <- "Light blue = data before imputation; Red = imputed data" | |
| 707 } else { | |
| 708 boxplot_sub <- "" | |
| 709 } | |
| 710 | |
| 692 # Vertical plot | 711 # Vertical plot |
| 693 colnames(blue_dots) <- sample_name_matches | 712 colnames(blue_dots) <- sample_name_matches |
| 694 boxplot( | 713 boxplot( |
| 695 blue_dots | 714 blue_dots |
| 696 , las = 1 # "always horizontal" | 715 , las = 1 # "always horizontal" |
| 697 , col = const_boxplot_fill | 716 , col = const_boxplot_fill |
| 698 , ylim = ylim | 717 , ylim = ylim |
| 699 , main = "Peptide intensities before and after imputation" | 718 , main = "Peptide intensities before and after imputation" |
| 700 , sub = "Light blue = data before imputation; Red = imputed data" | 719 , sub = boxplot_sub |
| 701 , xlab = "Sample" | 720 , xlab = "Sample" |
| 702 , ylab = "log10(peptide intensity)" | 721 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") |
| 703 ) | 722 ) |
| 704 # Points | 723 |
| 705 # NA values are not plotted | 724 if (show_stripchart) { |
| 706 stripchart( | 725 # Points |
| 707 blue_dots, # Data | 726 # ref: https://r-charts.com/distribution/add-points-boxplot/ |
| 708 method = "jitter", # Random noise | 727 # NA values are not plotted |
| 709 jitter = const_stripchart_jitter, | 728 stripchart( |
| 710 pch = 19, # Pch symbols | 729 blue_dots, # Data |
| 711 cex = const_stripsmall_cex, # Size of symbols reduced | 730 method = "jitter", # Random noise |
| 712 col = "lightblue", # Color of the symbol | 731 jitter = const_stripchart_jitter, |
| 713 vertical = TRUE, # Vertical mode | 732 pch = 19, # Pch symbols |
| 714 add = TRUE # Add it over | 733 cex = const_stripsmall_cex, # Size of symbols reduced |
| 715 ) | 734 col = "lightblue", # Color of the symbol |
| 716 stripchart( | 735 vertical = TRUE, # Vertical mode |
| 717 red_dots, # Data | 736 add = TRUE # Add it over |
| 718 method = "jitter", # Random noise | 737 ) |
| 719 jitter = const_stripchart_jitter, | 738 stripchart( |
| 720 pch = 19, # Pch symbols | 739 red_dots, # Data |
| 721 cex = const_stripsmall_cex, # Size of symbols reduced | 740 method = "jitter", # Random noise |
| 722 col = "red", # Color of the symbol | 741 jitter = const_stripchart_jitter, |
| 723 vertical = TRUE, # Vertical mode | 742 pch = 19, # Pch symbols |
| 724 add = TRUE # Add it over | 743 cex = const_stripsmall_cex, # Size of symbols reduced |
| 725 ) | 744 col = "red", # Color of the symbol |
| 745 vertical = TRUE, # Vertical mode | |
| 746 add = TRUE # Add it over | |
| 747 ) | |
| 748 | |
| 749 } else { | |
| 750 # violin plot | |
| 751 cat("\\leavevmode\n\\quad\n\n\\quad\n\n") | |
| 752 vioplot::vioplot( | |
| 753 x = lapply(blue_dots, function(x) x[!is.na(x)]), | |
| 754 col = "lightblue1", | |
| 755 side = "left", | |
| 756 plotCentre = "line", | |
| 757 ylim = ylim_save, | |
| 758 main = "Distributions of observed and imputed data", | |
| 759 sub = "Light blue = observed data; Pink = imputed data", | |
| 760 xlab = "Sample", | |
| 761 ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") | |
| 762 ) | |
| 763 vioplot::vioplot( | |
| 764 x = lapply(red_dots, function(x) x[!is.na(x)]), | |
| 765 col = "lightpink1", | |
| 766 side = "right", | |
| 767 plotCentre = "line", | |
| 768 add = T | |
| 769 ) | |
| 770 } | |
| 771 | |
| 726 par(old_par) | 772 par(old_par) |
| 727 | 773 |
| 728 # density plot | 774 # density plot |
| 729 cat("\\leavevmode\n\n\n\n\n\n\n") | 775 cat("\\leavevmode\n\n\n\n\n\n\n") |
| 730 if (can_plot_before_after_imp) { | 776 if (can_plot_before_after_imp) { |
| 736 paste( | 782 paste( |
| 737 "Blue = data before imputation; Red = imputed data;", | 783 "Blue = data before imputation; Red = imputed data;", |
| 738 "Black = combined" | 784 "Black = combined" |
| 739 ), | 785 ), |
| 740 main = "Density of peptide intensity before and after imputation", | 786 main = "Density of peptide intensity before and after imputation", |
| 741 xlab = "log10(peptide intensity)", | 787 xlab = latex2exp::TeX("$log_{10}$(peptide intensity)"), |
| 742 ylab = "Probability density" | 788 ylab = "Probability density" |
| 743 ) | 789 ) |
| 744 lines(d_original, col = "blue") | 790 lines(d_original, col = "blue") |
| 745 lines(d_imputed, col = "red") | 791 lines(d_imputed, col = "red") |
| 746 } else { | 792 } else { |
| 907 colnames(quant_data_log) <- sample_name_matches | 953 colnames(quant_data_log) <- sample_name_matches |
| 908 boxplot( | 954 boxplot( |
| 909 quant_data_log | 955 quant_data_log |
| 910 , las = 1 | 956 , las = 1 |
| 911 , col = const_boxplot_fill | 957 , col = const_boxplot_fill |
| 912 ) | 958 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") |
| 913 # Points | 959 , xlab = "Sample" |
| 914 stripchart( | 960 ) |
| 915 quant_data_log, # Data | |
| 916 method = "jitter", # Random noise | |
| 917 jitter = const_stripchart_jitter, | |
| 918 pch = 19, # Pch symbols | |
| 919 cex = const_stripchart_cex, # Size of symbols reduced | |
| 920 col = "goldenrod", # Color of the symbol | |
| 921 vertical = TRUE, # Vertical mode | |
| 922 add = TRUE # Add it over | |
| 923 ) | |
| 924 par(old_par) | 961 par(old_par) |
| 925 } else { | 962 } else { |
| 926 cat("There are no peptides to plot\n") | 963 cat("There are no peptides to plot\n") |
| 927 } | 964 } |
| 928 | 965 |
| 934 if (nrow(quant_data_log) > 1) { | 971 if (nrow(quant_data_log) > 1) { |
| 935 quant_data_log_stack <- stack(quant_data_log) | 972 quant_data_log_stack <- stack(quant_data_log) |
| 936 ggplot( | 973 ggplot( |
| 937 quant_data_log_stack, | 974 quant_data_log_stack, |
| 938 aes(x = values) | 975 aes(x = values) |
| 939 ) + | 976 ) + xlab(latex2exp::TeX("$log_{10}$(peptide intensity)")) + |
| 977 ylab("Probability density") + | |
| 940 geom_density( | 978 geom_density( |
| 941 aes(group = ind, colour = ind), | 979 aes(group = ind, colour = ind), |
| 942 na.rm = TRUE | 980 na.rm = TRUE |
| 943 ) | 981 ) |
| 944 } else { | 982 } else { |
| 947 ``` | 985 ``` |
| 948 ```{r echo = FALSE, results = 'asis'} | 986 ```{r echo = FALSE, results = 'asis'} |
| 949 cat("\\leavevmode\\newpage\n") | 987 cat("\\leavevmode\\newpage\n") |
| 950 ``` | 988 ``` |
| 951 | 989 |
| 952 ## Perform ANOVA filters | 990 ## Perform ANOVA Filters |
| 953 | 991 |
| 954 ```{r, echo = FALSE} | 992 ```{r, echo = FALSE} |
| 955 # Make new data frame containing only Phosphopeptides | 993 # Make new data frame containing only Phosphopeptides |
| 956 # to connect preANOVA to ANOVA (connect_df) | 994 # to connect preANOVA to ANOVA (connect_df) |
| 957 connect_df <- data.frame( | 995 connect_df <- data.frame( |
| 1123 boxplot( | 1161 boxplot( |
| 1124 filtered_data_filtered, | 1162 filtered_data_filtered, |
| 1125 main = "Imputed, normalized intensities", # no line plot | 1163 main = "Imputed, normalized intensities", # no line plot |
| 1126 las = 1, | 1164 las = 1, |
| 1127 col = const_boxplot_fill, | 1165 col = const_boxplot_fill, |
| 1128 ylab = expression(log[10](intensity)) | 1166 ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") |
| 1129 ) | 1167 ) |
| 1130 # Points | |
| 1131 stripchart( | |
| 1132 filtered_data_filtered, # Data | |
| 1133 method = "jitter", # Random noise | |
| 1134 jitter = const_stripchart_jitter, | |
| 1135 pch = 19, # Pch symbols | |
| 1136 cex = const_stripchart_cex, # Size of symbols reduced | |
| 1137 col = "goldenrod", # Color of the symbol | |
| 1138 vertical = TRUE, # Vertical mode | |
| 1139 add = TRUE # Add it over | |
| 1140 ) | |
| 1141 par(old_par) | 1168 par(old_par) |
| 1142 } else { | 1169 } else { |
| 1143 cat(sprintf( | 1170 cat(sprintf( |
| 1144 "%s < %0.2f\n\n\n\n\n", | 1171 "%s < %0.2f\n\n\n\n\n", |
| 1145 "No peptides were found to have cutoff adjusted p-value <", | 1172 "No peptides were found to have cutoff adjusted p-value", |
| 1146 cutoff | 1173 cutoff |
| 1147 )) | 1174 )) |
| 1148 } | 1175 } |
| 1149 | 1176 |
| 1150 if (nrow(filtered_data_filtered) > 0) { | 1177 if (nrow(filtered_data_filtered) > 0) { |
| 1227 "whose adjusted p-value < %0.2f\n", | 1254 "whose adjusted p-value < %0.2f\n", |
| 1228 cutoff) | 1255 cutoff) |
| 1229 ) | 1256 ) |
| 1230 } else { | 1257 } else { |
| 1231 if (nrow(m) == 1) { | 1258 if (nrow(m) == 1) { |
| 1259 next | |
| 1260 } else { | |
| 1232 cat( | 1261 cat( |
| 1233 sprintf("Heatmap for %d usable peptides whose", nrow(m)), | 1262 sprintf("Heatmap for %d usable peptides whose", nrow(m)), |
| 1234 sprintf("adjusted p-value < %0.2f\n", cutoff) | 1263 sprintf("adjusted p-value < %0.2f\n", cutoff) |
| 1235 ) | 1264 ) |
| 1236 next | |
| 1237 } | 1265 } |
| 1238 } | 1266 } |
| 1239 cat("\n\n\n") | 1267 cat("\n\n\n") |
| 1240 cat("\n\n\n") | 1268 cat("\n\n\n") |
| 1241 try( | 1269 try( |
| 1261 } | 1289 } |
| 1262 } | 1290 } |
| 1263 } | 1291 } |
| 1264 cat("\\leavevmode\n\n\n") | 1292 cat("\\leavevmode\n\n\n") |
| 1265 ``` | 1293 ``` |
| 1266 | |
| 1267 <!-- | |
| 1268 ## Peptide IDs, etc. | |
| 1269 | |
| 1270 See output files. | |
| 1271 --> |
