Mercurial > repos > eschen42 > mqppep_anova
comparison mqppep_anova_script.Rmd @ 15:2c5f1a2fe16a draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 96659062ea07ac43d139746b0d119f1ee020f9cd"
author | eschen42 |
---|---|
date | Sat, 26 Mar 2022 02:27:12 +0000 |
parents | 6679616d0c18 |
children | 61adb8801b73 |
comparison
equal
deleted
inserted
replaced
14:6679616d0c18 | 15:2c5f1a2fe16a |
---|---|
6 pdf_document: | 6 pdf_document: |
7 toc: true | 7 toc: true |
8 latex_document: | 8 latex_document: |
9 toc: true | 9 toc: true |
10 params: | 10 params: |
11 inputFile: "test-data/test_input_for_anova.tabular" | |
12 alphaFile: "test-data/alpha_levels.tabular" | 11 alphaFile: "test-data/alpha_levels.tabular" |
13 firstDataColumn: "Intensity" | 12 inputFile: "test-data/UT_Phospho_ST_Sites.preproc.tabular" |
13 firstDataColumn: "^Intensity[^_]" | |
14 imputationMethod: !r c("group-median", "median", "mean", "random")[4] | 14 imputationMethod: !r c("group-median", "median", "mean", "random")[4] |
15 meanPercentile: 1 | 15 meanPercentile: 1 |
16 sdPercentile: 1.0 | 16 sdPercentile: 1.0 |
17 regexSampleNames: "\\.\\d+[A-Z]$" | 17 regexSampleNames: "\\.\\d+[A-Z]$" |
18 regexSampleGrouping: "\\d+" | 18 regexSampleGrouping: "\\d+" |
19 imputedDataFilename: "test-data/imputedDataFilename.txt" | 19 imputedDataFilename: "test-data/limbo/imputedDataFilename.txt" |
20 imputedQNLTDataFile: "test-data/imputedQNLTDataFile.txt" | 20 imputedQNLTDataFile: "test-data/limbo/imputedQNLTDataFile.txt" |
21 show_toc: true | 21 show_toc: true |
22 --- | 22 --- |
23 <!-- | 23 <!-- |
24 alphaFile: "test-data/alpha_levels.tabular" | |
25 inputFile: "test-data/test_input_for_anova.tabular" | |
26 inputFile: "test-data/UT_Phospho_ST_Sites.preproc.tabular" | |
27 inputFile: "test-data/density_failure.preproc_tab.tabular" | |
24 latex_document: default | 28 latex_document: default |
25 inputFile: "test-data/test_input_for_anova.tabular" | |
26 inputFile: "test-data/density_failure.preproc_tab.tabular" | |
27 inputFile: "test-data/UT_Phospho_STY_Sites.preproc_tab" | |
28 date: "May 28, 2018; Mar 16, 2022" | |
29 --> | 29 --> |
30 ```{r setup, include = FALSE} | 30 ```{r setup, include = FALSE} |
31 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285 | 31 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285 |
32 knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10)) | 32 knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10)) |
33 | 33 |
42 const_stripchart_cex <- 0.5 | 42 const_stripchart_cex <- 0.5 |
43 const_stripsmall_cex <- | 43 const_stripsmall_cex <- |
44 sqrt(const_stripchart_cex * const_stripchart_cex / 2) | 44 sqrt(const_stripchart_cex * const_stripchart_cex / 2) |
45 const_stripchart_jitter <- 0.3 | 45 const_stripchart_jitter <- 0.3 |
46 const_write_debug_files <- FALSE | 46 const_write_debug_files <- FALSE |
47 const_table_anchor <- "tbp" | |
47 | 48 |
48 ### FUNCTIONS | 49 ### FUNCTIONS |
49 | 50 |
50 #ANOVA filter function | 51 #ANOVA filter function |
51 anova_func <- function(x, grouping_factor) { | 52 anova_func <- function(x, grouping_factor) { |
101 latex_collapsed_vector(" & ", v) | 102 latex_collapsed_vector(" & ", v) |
102 cat(" \\\\\n") | 103 cat(" \\\\\n") |
103 } | 104 } |
104 | 105 |
105 # Use this like print.data.frame, from which it is adapted: | 106 # Use this like print.data.frame, from which it is adapted: |
106 print_data_frame_latex <- | 107 data_frame_latex <- |
107 function( | 108 function( |
108 x, | 109 x, |
109 ..., | 110 ..., |
110 # digits to pass to format.data.frame | 111 # digits to pass to format.data.frame |
111 digits = NULL, | 112 digits = NULL, |
114 # maximumn number of rows to print | 115 # maximumn number of rows to print |
115 max = NULL, | 116 max = NULL, |
116 # string with justification of each column | 117 # string with justification of each column |
117 justification = NULL, | 118 justification = NULL, |
118 # TRUE to center on page | 119 # TRUE to center on page |
119 centered = FALSE, | 120 centered = TRUE, |
120 # optional capttion | 121 # optional capttion |
121 caption = NULL, | 122 caption = NULL, |
122 # h(inline); b(bottom); t (top) or p (separate page) | 123 # h(inline); b(bottom); t (top) or p (separate page) |
123 anchor = "h" | 124 anchor = "h" |
124 ) { | 125 ) { |
198 invisible(x) | 199 invisible(x) |
199 } | 200 } |
200 | 201 |
201 ``` | 202 ``` |
202 | 203 |
203 ## Purpose: | 204 ## Purpose |
204 | 205 |
205 Perform imputation of missing values, quantile normalization, and ANOVA. | 206 Perform imputation of missing values, quantile normalization, and ANOVA. |
206 | 207 |
207 <!-- | |
208 ## Variables to change for each input file | |
209 --> | |
210 ```{r include = FALSE} | 208 ```{r include = FALSE} |
211 # Input Filename | 209 # Input Filename |
212 input_file <- params$inputFile | 210 input_file <- params$inputFile |
213 | 211 |
214 # First data column - ideally, this could be detected via regexSampleNames, | 212 # First data column - ideally, this could be detected via regexSampleNames, |
215 # but for now leave it as is. | 213 # but for now leave it as is. |
216 first_data_column <- params$firstDataColumn | 214 first_data_column <- params$firstDataColumn |
217 fdc_is_integer <- TRUE | 215 fdc_is_integer <- is.integer(first_data_column) |
218 first_data_column <- withCallingHandlers( | 216 if (fdc_is_integer) { |
219 as.integer(first_data_column) | 217 first_data_column <- as.integer(params$firstDataColumn) |
220 , warning = function(w) fdc_is_integer <<- FALSE | |
221 ) | |
222 if (FALSE == fdc_is_integer) { | |
223 first_data_column <- params$firstDataColumn | |
224 } | 218 } |
225 | 219 |
226 # False discovery rate adjustment for ANOVA | 220 # False discovery rate adjustment for ANOVA |
227 # Since pY abundance is low, set to 0.10 and 0.20 in addition to 0.05 | 221 # Since pY abundance is low, set to 0.10 and 0.20 in addition to 0.05 |
228 val_fdr <- | 222 val_fdr <- |
229 read.table(file = params$alphaFile, sep = "\t", header = F, quote = "")[, 1] | 223 read.table(file = params$alphaFile, sep = "\t", header = F, quote = "") |
224 | |
225 if ( | |
226 ncol(val_fdr) != 1 || | |
227 sum(!is.numeric(val_fdr[, 1])) || | |
228 sum(val_fdr[, 1] < 0) || | |
229 sum(val_fdr[, 1] > 1) | |
230 ) { | |
231 stop("alphaFile should be one column of numbers within the range [0.0,1.0]") | |
232 } | |
233 val_fdr <- val_fdr[, 1] | |
230 | 234 |
231 #Imputed Data filename | 235 #Imputed Data filename |
232 imputed_data_filename <- params$imputedDataFilename | 236 imputed_data_filename <- params$imputedDataFilename |
233 imp_qn_lt_data_filenm <- params$imputedQNLTDataFile | 237 imp_qn_lt_data_filenm <- params$imputedQNLTDataFile |
234 | 238 |
272 quote = "", | 276 quote = "", |
273 check.names = FALSE | 277 check.names = FALSE |
274 ) | 278 ) |
275 ``` | 279 ``` |
276 | 280 |
277 ### Parse column names, sample names, and factor levels from input file | 281 ## Extract Sample Names and Factor Levels |
282 | |
283 Column names parsed from input file are shown in Table 1; sample names and factor levels, in Table 2. | |
278 | 284 |
279 ```{r echo = FALSE, results = 'asis'} | 285 ```{r echo = FALSE, results = 'asis'} |
280 # Write column naames as an enumerated list. | |
281 column_name_df <- data.frame( | |
282 column = seq_len(length(colnames(full_data))), | |
283 name = colnames(full_data) | |
284 ) | |
285 print_data_frame_latex( | |
286 x = column_name_df, | |
287 justification = "l l", | |
288 centered = TRUE, | |
289 caption = "Input data column name", | |
290 anchor = "h" | |
291 ) | |
292 | 286 |
293 data_column_indices <- grep(first_data_column, names(full_data), perl = TRUE) | 287 data_column_indices <- grep(first_data_column, names(full_data), perl = TRUE) |
294 cat( | 288 |
295 sprintf( | 289 if (!fdc_is_integer) { |
296 "\n\nData columns: [%d,%d]\n\n", | |
297 min(data_column_indices), | |
298 max(data_column_indices) | |
299 ) | |
300 ) | |
301 | |
302 if (FALSE == fdc_is_integer) { | |
303 if (length(data_column_indices) > 0) { | 290 if (length(data_column_indices) > 0) { |
304 first_data_column <- data_column_indices[1] | 291 first_data_column <- data_column_indices[1] |
305 } else { | 292 } else { |
306 stop(paste("failed to convert firstDataColumn:", first_data_column)) | 293 stop(paste("failed to convert firstDataColumn:", first_data_column)) |
307 } | 294 } |
308 } | 295 } |
296 | |
297 cat( | |
298 sprintf( | |
299 paste( | |
300 "\n\nPeptide-intensity data for each sample is", | |
301 "in one of columns %d through %d.\n\n" | |
302 ), | |
303 min(data_column_indices), | |
304 max(data_column_indices) | |
305 ) | |
306 ) | |
307 | |
308 # Write column names as a LaTeX enumerated list. | |
309 column_name_df <- data.frame( | |
310 column = seq_len(length(colnames(full_data))), | |
311 name = colnames(full_data) | |
312 ) | |
313 data_frame_latex( | |
314 x = column_name_df, | |
315 justification = "l l", | |
316 centered = TRUE, | |
317 caption = "Input data column name", | |
318 anchor = const_table_anchor | |
319 ) | |
309 | 320 |
310 ``` | 321 ``` |
311 | 322 |
312 ```{r echo = FALSE, results = 'asis'} | 323 ```{r echo = FALSE, results = 'asis'} |
313 #```{r echo = FALSE, results = 'asis'} | 324 #```{r echo = FALSE, results = 'asis'} |
334 number_of_samples <- length(sample_name_matches) | 345 number_of_samples <- length(sample_name_matches) |
335 sample_factor_df <- data.frame( | 346 sample_factor_df <- data.frame( |
336 sample = sample_name_matches, | 347 sample = sample_name_matches, |
337 level = sample_factor_levels | 348 level = sample_factor_levels |
338 ) | 349 ) |
339 print_data_frame_latex( | 350 data_frame_latex( |
340 x = sample_factor_df, | 351 x = sample_factor_df, |
341 justification = "c c", | 352 justification = "c c", |
342 centered = TRUE, | 353 centered = TRUE, |
343 caption = "Factor level", | 354 caption = "Factor level", |
344 anchor = "h" | 355 anchor = const_table_anchor |
345 ) | 356 ) |
346 ``` | 357 ``` |
347 ```{r echo = FALSE, results = 'asis'} | 358 ```{r echo = FALSE, results = 'asis'} |
348 cat("\\newpage\n") | 359 cat("\\newpage\n") |
349 ``` | 360 ``` |
368 # Vertical plot | 379 # Vertical plot |
369 boxplot( | 380 boxplot( |
370 quant_data_log | 381 quant_data_log |
371 , las = 1 | 382 , las = 1 |
372 , col = const_boxplot_fill | 383 , col = const_boxplot_fill |
384 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") | |
385 , xlab = "Sample" | |
373 ) | 386 ) |
374 # Points | |
375 stripchart( | |
376 quant_data_log, # Data | |
377 method = "jitter", # Random noise | |
378 jitter = const_stripchart_jitter, | |
379 pch = 19, # Pch symbols | |
380 cex = const_stripchart_cex, # Size of symbols reduced | |
381 col = "goldenrod", # Color of the symbol | |
382 vertical = TRUE, # Vertical mode | |
383 add = TRUE # Add it over | |
384 ) | |
385 par(old_par) | 387 par(old_par) |
386 | 388 |
387 | 389 |
388 | 390 |
389 cat("\n\n\n") | 391 cat("\n\n\n") |
396 if (nrow(quant_data_log) > 1) { | 398 if (nrow(quant_data_log) > 1) { |
397 quant_data_log_stack <- stack(quant_data_log) | 399 quant_data_log_stack <- stack(quant_data_log) |
398 ggplot( | 400 ggplot( |
399 quant_data_log_stack, | 401 quant_data_log_stack, |
400 aes(x = values) | 402 aes(x = values) |
401 ) + | 403 ) + xlab(latex2exp::TeX("$log_{10}$(peptide intensity)")) + |
404 ylab("Probability density") + | |
402 geom_density( | 405 geom_density( |
403 aes(group = ind, colour = ind), | 406 aes(group = ind, colour = ind), |
404 na.rm = TRUE | 407 na.rm = TRUE |
405 ) | 408 ) |
406 } else { | 409 } else { |
409 ``` | 412 ``` |
410 | 413 |
411 ### Globally, are peptide intensities are approximately unimodal? | 414 ### Globally, are peptide intensities are approximately unimodal? |
412 | 415 |
413 <!-- | 416 <!-- |
414 # ref for bquote below particularly and plotting math expressions generally: | 417 # bquote could be used as an alternative to latex2exp::TeX below particularly |
418 # and when plotting math expressions generally, at the expense of mastering | |
419 # another syntax, which hardly seems worthwhile when I need to use TeX | |
420 # elsewhere; here's an introduction to bquote: | |
415 # https://www.r-bloggers.com/2018/03/math-notation-for-r-plot-titles-expression-and-bquote/ | 421 # https://www.r-bloggers.com/2018/03/math-notation-for-r-plot-titles-expression-and-bquote/ |
416 --> | 422 --> |
417 ```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5), results = 'asis'} | 423 ```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5), results = 'asis'} |
418 | 424 |
419 # identify the location of missing values | 425 # identify the location of missing values |
420 fin <- is.finite(as.numeric(as.matrix(quant_data_log))) | 426 fin <- is.finite(as.numeric(as.matrix(quant_data_log))) |
421 | 427 |
422 logvalues <- as.numeric(as.matrix(quant_data_log))[fin] | 428 logvalues <- as.numeric(as.matrix(quant_data_log))[fin] |
429 logvalues_density <- density(logvalues) | |
423 plot( | 430 plot( |
424 density(logvalues), | 431 x = logvalues_density, |
425 main = bquote( | 432 main = latex2exp::TeX( |
426 "Smoothed estimated probability density vs." ~ log[10](intensity)), | 433 "Smoothed estimated probability density vs. $log_{10}$(peptide intensity)" |
427 xlab = bquote(log[10](intensity)) | 434 ), |
435 xlab = latex2exp::TeX("$log_{10}$(peptide intensity)"), | |
436 ylab = "Probability density" | |
428 ) | 437 ) |
429 hist( | 438 hist( |
430 x = as.numeric(as.matrix(quant_data_log)) | 439 x = as.numeric(as.matrix(quant_data_log)), |
431 , breaks = 100 | 440 xlim = c(min(logvalues_density$x), max(logvalues_density$x)), |
432 , main = bquote("Frequency vs." ~ log[10](intensity)) | 441 breaks = 100, |
433 , xlab = bquote(log[10](intensity)) | 442 main = latex2exp::TeX("Frequency vs. $log_{10}$(peptide intensity)"), |
443 xlab = latex2exp::TeX("$log_{10}$(peptide intensity)") | |
434 ) | 444 ) |
435 ``` | 445 ``` |
436 | 446 |
437 ### Distribution of standard deviations of $log_{10}(\text{intensity})$, ignoring missing values: | 447 ### Distribution of standard deviations of $log_{10}(\text{intensity})$, ignoring missing values: |
438 | 448 |
450 if (sum(!is.na(sds)) > 2) { | 460 if (sum(!is.na(sds)) > 2) { |
451 plot( | 461 plot( |
452 density(sds, na.rm = T) | 462 density(sds, na.rm = T) |
453 , main = "Smoothed estimated probability density vs. std. deviation" | 463 , main = "Smoothed estimated probability density vs. std. deviation" |
454 , sub = "(probability estimation made with Gaussian smoothing)" | 464 , sub = "(probability estimation made with Gaussian smoothing)" |
465 , ylab = "Probability density" | |
455 ) | 466 ) |
456 } else { | 467 } else { |
457 cat( | 468 cat( |
458 "At least two non-missing values are required to plot", | 469 "At least two non-missing values are required to plot", |
459 "probability density.\n\n" | 470 "probability density.\n\n" |
508 switch( | 519 switch( |
509 imputation_method | 520 imputation_method |
510 , "group-median" = { | 521 , "group-median" = { |
511 imputation_method_description <- | 522 imputation_method_description <- |
512 paste("Substitute missing value with", | 523 paste("Substitute missing value with", |
513 "median peptide intensity for sample group\n" | 524 "median peptide intensity for sample group.\n" |
514 ) | 525 ) |
515 sample_level_integers <- as.integer(sample_factor_levels) | 526 sample_level_integers <- as.integer(sample_factor_levels) |
516 for (i in seq_len(length(levels(sample_factor_levels)))) { | 527 for (i in seq_len(length(levels(sample_factor_levels)))) { |
517 level_cols <- i == sample_level_integers | 528 level_cols <- i == sample_level_integers |
518 ind <- which(is.na(quant_data_imp[, level_cols]), arr.ind = TRUE) | 529 ind <- which(is.na(quant_data_imp[, level_cols]), arr.ind = TRUE) |
522 good_rows <- !is.na(rowMeans(quant_data_imp)) | 533 good_rows <- !is.na(rowMeans(quant_data_imp)) |
523 } | 534 } |
524 , "median" = { | 535 , "median" = { |
525 imputation_method_description <- | 536 imputation_method_description <- |
526 paste("Substitute missing value with", | 537 paste("Substitute missing value with", |
527 "median peptide intensity across all sample classes\n" | 538 "median peptide intensity across all sample classes.\n" |
528 ) | 539 ) |
529 quant_data_imp[ind] <- apply(quant_data_imp, 1, median, na.rm = T)[ind[, 1]] | 540 quant_data_imp[ind] <- apply(quant_data_imp, 1, median, na.rm = T)[ind[, 1]] |
530 good_rows <- !is.na(rowMeans(quant_data_imp)) | 541 good_rows <- !is.na(rowMeans(quant_data_imp)) |
531 } | 542 } |
532 , "mean" = { | 543 , "mean" = { |
533 imputation_method_description <- | 544 imputation_method_description <- |
534 paste("Substitute missing value with", | 545 paste("Substitute missing value with", |
535 "mean peptide intensity across all sample classes\n" | 546 "mean peptide intensity across all sample classes.\n" |
536 ) | 547 ) |
537 quant_data_imp[ind] <- apply(quant_data_imp, 1, mean, na.rm = T)[ind[, 1]] | 548 quant_data_imp[ind] <- apply(quant_data_imp, 1, mean, na.rm = T)[ind[, 1]] |
538 good_rows <- !is.na(rowMeans(quant_data_imp)) | 549 good_rows <- !is.na(rowMeans(quant_data_imp)) |
539 } | 550 } |
540 , "random" = { | 551 , "random" = { |
542 # If you want results to be reproducible, you will want to call | 553 # If you want results to be reproducible, you will want to call |
543 # base::set.seed before calling stats::rnorm | 554 # base::set.seed before calling stats::rnorm |
544 imputation_method_description <- | 555 imputation_method_description <- |
545 paste("Substitute each missing value with random intensity", | 556 paste("Substitute each missing value with random intensity", |
546 sprintf( | 557 sprintf( |
547 "random intensity $N \\sim (%0.2f, %0.2f)$\n", | 558 "random intensity $N \\sim (%0.2f, %0.2f)$.\n", |
548 q1, m1 | 559 q1, m1 |
549 ) | 560 ) |
550 ) | 561 ) |
551 cat(sprintf("mean_percentile (from input parameter) is %2.0f\n\n", | 562 cat(sprintf("mean_percentile (from input parameter) is %2.0f\n\n", |
552 100 * mean_percentile)) | 563 100 * mean_percentile)) |
553 cat(sprintf("sd_percentile (from input parameter) is %0.2f\n\n", | 564 cat(sprintf("sd_percentile (from input parameter) is %0.2f\n\n", |
554 sd_percentile)) | 565 sd_percentile)) |
555 #ACE cat(sprintf("sd for rnorm is %0.4f\n\n", m1)) | |
556 quant_data_imp[ind] <- | 566 quant_data_imp[ind] <- |
557 10 ^ rnorm(number_to_impute, mean = q1, sd = m1) | 567 10 ^ rnorm(number_to_impute, mean = q1, sd = m1) |
558 good_rows <- !is.na(rowMeans(quant_data_imp)) | 568 good_rows <- !is.na(rowMeans(quant_data_imp)) |
559 } | 569 } |
560 ) | 570 ) |
682 log10(quant_data_imp) | 692 log10(quant_data_imp) |
683 | 693 |
684 write_debug_file(quant_data_imp_log10) | 694 write_debug_file(quant_data_imp_log10) |
685 | 695 |
686 red_dots <- quant_data_imp_log10 * x | 696 red_dots <- quant_data_imp_log10 * x |
687 ylim <- c( | 697 count_red <- sum(!is.na(red_dots)) |
698 count_blue <- sum(!is.na(blue_dots)) | |
699 ylim_save <- ylim <- c( | |
688 min(red_dots, blue_dots, na.rm = TRUE), | 700 min(red_dots, blue_dots, na.rm = TRUE), |
689 max(red_dots, blue_dots, na.rm = TRUE) | 701 max(red_dots, blue_dots, na.rm = TRUE) |
690 ) | 702 ) |
691 # ref: https://r-charts.com/distribution/add-points-boxplot/ | 703 show_stripchart <- |
704 50 > (count_red + count_blue) / length(sample_name_matches) | |
705 if (show_stripchart) { | |
706 boxplot_sub <- "Light blue = data before imputation; Red = imputed data" | |
707 } else { | |
708 boxplot_sub <- "" | |
709 } | |
710 | |
692 # Vertical plot | 711 # Vertical plot |
693 colnames(blue_dots) <- sample_name_matches | 712 colnames(blue_dots) <- sample_name_matches |
694 boxplot( | 713 boxplot( |
695 blue_dots | 714 blue_dots |
696 , las = 1 # "always horizontal" | 715 , las = 1 # "always horizontal" |
697 , col = const_boxplot_fill | 716 , col = const_boxplot_fill |
698 , ylim = ylim | 717 , ylim = ylim |
699 , main = "Peptide intensities before and after imputation" | 718 , main = "Peptide intensities before and after imputation" |
700 , sub = "Light blue = data before imputation; Red = imputed data" | 719 , sub = boxplot_sub |
701 , xlab = "Sample" | 720 , xlab = "Sample" |
702 , ylab = "log10(peptide intensity)" | 721 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") |
703 ) | 722 ) |
704 # Points | 723 |
705 # NA values are not plotted | 724 if (show_stripchart) { |
706 stripchart( | 725 # Points |
707 blue_dots, # Data | 726 # ref: https://r-charts.com/distribution/add-points-boxplot/ |
708 method = "jitter", # Random noise | 727 # NA values are not plotted |
709 jitter = const_stripchart_jitter, | 728 stripchart( |
710 pch = 19, # Pch symbols | 729 blue_dots, # Data |
711 cex = const_stripsmall_cex, # Size of symbols reduced | 730 method = "jitter", # Random noise |
712 col = "lightblue", # Color of the symbol | 731 jitter = const_stripchart_jitter, |
713 vertical = TRUE, # Vertical mode | 732 pch = 19, # Pch symbols |
714 add = TRUE # Add it over | 733 cex = const_stripsmall_cex, # Size of symbols reduced |
715 ) | 734 col = "lightblue", # Color of the symbol |
716 stripchart( | 735 vertical = TRUE, # Vertical mode |
717 red_dots, # Data | 736 add = TRUE # Add it over |
718 method = "jitter", # Random noise | 737 ) |
719 jitter = const_stripchart_jitter, | 738 stripchart( |
720 pch = 19, # Pch symbols | 739 red_dots, # Data |
721 cex = const_stripsmall_cex, # Size of symbols reduced | 740 method = "jitter", # Random noise |
722 col = "red", # Color of the symbol | 741 jitter = const_stripchart_jitter, |
723 vertical = TRUE, # Vertical mode | 742 pch = 19, # Pch symbols |
724 add = TRUE # Add it over | 743 cex = const_stripsmall_cex, # Size of symbols reduced |
725 ) | 744 col = "red", # Color of the symbol |
745 vertical = TRUE, # Vertical mode | |
746 add = TRUE # Add it over | |
747 ) | |
748 | |
749 } else { | |
750 # violin plot | |
751 cat("\\leavevmode\n\\quad\n\n\\quad\n\n") | |
752 vioplot::vioplot( | |
753 x = lapply(blue_dots, function(x) x[!is.na(x)]), | |
754 col = "lightblue1", | |
755 side = "left", | |
756 plotCentre = "line", | |
757 ylim = ylim_save, | |
758 main = "Distributions of observed and imputed data", | |
759 sub = "Light blue = observed data; Pink = imputed data", | |
760 xlab = "Sample", | |
761 ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") | |
762 ) | |
763 vioplot::vioplot( | |
764 x = lapply(red_dots, function(x) x[!is.na(x)]), | |
765 col = "lightpink1", | |
766 side = "right", | |
767 plotCentre = "line", | |
768 add = T | |
769 ) | |
770 } | |
771 | |
726 par(old_par) | 772 par(old_par) |
727 | 773 |
728 # density plot | 774 # density plot |
729 cat("\\leavevmode\n\n\n\n\n\n\n") | 775 cat("\\leavevmode\n\n\n\n\n\n\n") |
730 if (can_plot_before_after_imp) { | 776 if (can_plot_before_after_imp) { |
736 paste( | 782 paste( |
737 "Blue = data before imputation; Red = imputed data;", | 783 "Blue = data before imputation; Red = imputed data;", |
738 "Black = combined" | 784 "Black = combined" |
739 ), | 785 ), |
740 main = "Density of peptide intensity before and after imputation", | 786 main = "Density of peptide intensity before and after imputation", |
741 xlab = "log10(peptide intensity)", | 787 xlab = latex2exp::TeX("$log_{10}$(peptide intensity)"), |
742 ylab = "Probability density" | 788 ylab = "Probability density" |
743 ) | 789 ) |
744 lines(d_original, col = "blue") | 790 lines(d_original, col = "blue") |
745 lines(d_imputed, col = "red") | 791 lines(d_imputed, col = "red") |
746 } else { | 792 } else { |
907 colnames(quant_data_log) <- sample_name_matches | 953 colnames(quant_data_log) <- sample_name_matches |
908 boxplot( | 954 boxplot( |
909 quant_data_log | 955 quant_data_log |
910 , las = 1 | 956 , las = 1 |
911 , col = const_boxplot_fill | 957 , col = const_boxplot_fill |
912 ) | 958 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") |
913 # Points | 959 , xlab = "Sample" |
914 stripchart( | 960 ) |
915 quant_data_log, # Data | |
916 method = "jitter", # Random noise | |
917 jitter = const_stripchart_jitter, | |
918 pch = 19, # Pch symbols | |
919 cex = const_stripchart_cex, # Size of symbols reduced | |
920 col = "goldenrod", # Color of the symbol | |
921 vertical = TRUE, # Vertical mode | |
922 add = TRUE # Add it over | |
923 ) | |
924 par(old_par) | 961 par(old_par) |
925 } else { | 962 } else { |
926 cat("There are no peptides to plot\n") | 963 cat("There are no peptides to plot\n") |
927 } | 964 } |
928 | 965 |
934 if (nrow(quant_data_log) > 1) { | 971 if (nrow(quant_data_log) > 1) { |
935 quant_data_log_stack <- stack(quant_data_log) | 972 quant_data_log_stack <- stack(quant_data_log) |
936 ggplot( | 973 ggplot( |
937 quant_data_log_stack, | 974 quant_data_log_stack, |
938 aes(x = values) | 975 aes(x = values) |
939 ) + | 976 ) + xlab(latex2exp::TeX("$log_{10}$(peptide intensity)")) + |
977 ylab("Probability density") + | |
940 geom_density( | 978 geom_density( |
941 aes(group = ind, colour = ind), | 979 aes(group = ind, colour = ind), |
942 na.rm = TRUE | 980 na.rm = TRUE |
943 ) | 981 ) |
944 } else { | 982 } else { |
947 ``` | 985 ``` |
948 ```{r echo = FALSE, results = 'asis'} | 986 ```{r echo = FALSE, results = 'asis'} |
949 cat("\\leavevmode\\newpage\n") | 987 cat("\\leavevmode\\newpage\n") |
950 ``` | 988 ``` |
951 | 989 |
952 ## Perform ANOVA filters | 990 ## Perform ANOVA Filters |
953 | 991 |
954 ```{r, echo = FALSE} | 992 ```{r, echo = FALSE} |
955 # Make new data frame containing only Phosphopeptides | 993 # Make new data frame containing only Phosphopeptides |
956 # to connect preANOVA to ANOVA (connect_df) | 994 # to connect preANOVA to ANOVA (connect_df) |
957 connect_df <- data.frame( | 995 connect_df <- data.frame( |
1123 boxplot( | 1161 boxplot( |
1124 filtered_data_filtered, | 1162 filtered_data_filtered, |
1125 main = "Imputed, normalized intensities", # no line plot | 1163 main = "Imputed, normalized intensities", # no line plot |
1126 las = 1, | 1164 las = 1, |
1127 col = const_boxplot_fill, | 1165 col = const_boxplot_fill, |
1128 ylab = expression(log[10](intensity)) | 1166 ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") |
1129 ) | 1167 ) |
1130 # Points | |
1131 stripchart( | |
1132 filtered_data_filtered, # Data | |
1133 method = "jitter", # Random noise | |
1134 jitter = const_stripchart_jitter, | |
1135 pch = 19, # Pch symbols | |
1136 cex = const_stripchart_cex, # Size of symbols reduced | |
1137 col = "goldenrod", # Color of the symbol | |
1138 vertical = TRUE, # Vertical mode | |
1139 add = TRUE # Add it over | |
1140 ) | |
1141 par(old_par) | 1168 par(old_par) |
1142 } else { | 1169 } else { |
1143 cat(sprintf( | 1170 cat(sprintf( |
1144 "%s < %0.2f\n\n\n\n\n", | 1171 "%s < %0.2f\n\n\n\n\n", |
1145 "No peptides were found to have cutoff adjusted p-value <", | 1172 "No peptides were found to have cutoff adjusted p-value", |
1146 cutoff | 1173 cutoff |
1147 )) | 1174 )) |
1148 } | 1175 } |
1149 | 1176 |
1150 if (nrow(filtered_data_filtered) > 0) { | 1177 if (nrow(filtered_data_filtered) > 0) { |
1227 "whose adjusted p-value < %0.2f\n", | 1254 "whose adjusted p-value < %0.2f\n", |
1228 cutoff) | 1255 cutoff) |
1229 ) | 1256 ) |
1230 } else { | 1257 } else { |
1231 if (nrow(m) == 1) { | 1258 if (nrow(m) == 1) { |
1259 next | |
1260 } else { | |
1232 cat( | 1261 cat( |
1233 sprintf("Heatmap for %d usable peptides whose", nrow(m)), | 1262 sprintf("Heatmap for %d usable peptides whose", nrow(m)), |
1234 sprintf("adjusted p-value < %0.2f\n", cutoff) | 1263 sprintf("adjusted p-value < %0.2f\n", cutoff) |
1235 ) | 1264 ) |
1236 next | |
1237 } | 1265 } |
1238 } | 1266 } |
1239 cat("\n\n\n") | 1267 cat("\n\n\n") |
1240 cat("\n\n\n") | 1268 cat("\n\n\n") |
1241 try( | 1269 try( |
1261 } | 1289 } |
1262 } | 1290 } |
1263 } | 1291 } |
1264 cat("\\leavevmode\n\n\n") | 1292 cat("\\leavevmode\n\n\n") |
1265 ``` | 1293 ``` |
1266 | |
1267 <!-- | |
1268 ## Peptide IDs, etc. | |
1269 | |
1270 See output files. | |
1271 --> |