comparison mqppep_anova_script.Rmd @ 15:2c5f1a2fe16a draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 96659062ea07ac43d139746b0d119f1ee020f9cd"
author eschen42
date Sat, 26 Mar 2022 02:27:12 +0000
parents 6679616d0c18
children 61adb8801b73
comparison
equal deleted inserted replaced
14:6679616d0c18 15:2c5f1a2fe16a
6 pdf_document: 6 pdf_document:
7 toc: true 7 toc: true
8 latex_document: 8 latex_document:
9 toc: true 9 toc: true
10 params: 10 params:
11 inputFile: "test-data/test_input_for_anova.tabular"
12 alphaFile: "test-data/alpha_levels.tabular" 11 alphaFile: "test-data/alpha_levels.tabular"
13 firstDataColumn: "Intensity" 12 inputFile: "test-data/UT_Phospho_ST_Sites.preproc.tabular"
13 firstDataColumn: "^Intensity[^_]"
14 imputationMethod: !r c("group-median", "median", "mean", "random")[4] 14 imputationMethod: !r c("group-median", "median", "mean", "random")[4]
15 meanPercentile: 1 15 meanPercentile: 1
16 sdPercentile: 1.0 16 sdPercentile: 1.0
17 regexSampleNames: "\\.\\d+[A-Z]$" 17 regexSampleNames: "\\.\\d+[A-Z]$"
18 regexSampleGrouping: "\\d+" 18 regexSampleGrouping: "\\d+"
19 imputedDataFilename: "test-data/imputedDataFilename.txt" 19 imputedDataFilename: "test-data/limbo/imputedDataFilename.txt"
20 imputedQNLTDataFile: "test-data/imputedQNLTDataFile.txt" 20 imputedQNLTDataFile: "test-data/limbo/imputedQNLTDataFile.txt"
21 show_toc: true 21 show_toc: true
22 --- 22 ---
23 <!-- 23 <!--
24 alphaFile: "test-data/alpha_levels.tabular"
25 inputFile: "test-data/test_input_for_anova.tabular"
26 inputFile: "test-data/UT_Phospho_ST_Sites.preproc.tabular"
27 inputFile: "test-data/density_failure.preproc_tab.tabular"
24 latex_document: default 28 latex_document: default
25 inputFile: "test-data/test_input_for_anova.tabular"
26 inputFile: "test-data/density_failure.preproc_tab.tabular"
27 inputFile: "test-data/UT_Phospho_STY_Sites.preproc_tab"
28 date: "May 28, 2018; Mar 16, 2022"
29 --> 29 -->
30 ```{r setup, include = FALSE} 30 ```{r setup, include = FALSE}
31 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285 31 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285
32 knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10)) 32 knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10))
33 33
42 const_stripchart_cex <- 0.5 42 const_stripchart_cex <- 0.5
43 const_stripsmall_cex <- 43 const_stripsmall_cex <-
44 sqrt(const_stripchart_cex * const_stripchart_cex / 2) 44 sqrt(const_stripchart_cex * const_stripchart_cex / 2)
45 const_stripchart_jitter <- 0.3 45 const_stripchart_jitter <- 0.3
46 const_write_debug_files <- FALSE 46 const_write_debug_files <- FALSE
47 const_table_anchor <- "tbp"
47 48
48 ### FUNCTIONS 49 ### FUNCTIONS
49 50
50 #ANOVA filter function 51 #ANOVA filter function
51 anova_func <- function(x, grouping_factor) { 52 anova_func <- function(x, grouping_factor) {
101 latex_collapsed_vector(" & ", v) 102 latex_collapsed_vector(" & ", v)
102 cat(" \\\\\n") 103 cat(" \\\\\n")
103 } 104 }
104 105
105 # Use this like print.data.frame, from which it is adapted: 106 # Use this like print.data.frame, from which it is adapted:
106 print_data_frame_latex <- 107 data_frame_latex <-
107 function( 108 function(
108 x, 109 x,
109 ..., 110 ...,
110 # digits to pass to format.data.frame 111 # digits to pass to format.data.frame
111 digits = NULL, 112 digits = NULL,
114 # maximumn number of rows to print 115 # maximumn number of rows to print
115 max = NULL, 116 max = NULL,
116 # string with justification of each column 117 # string with justification of each column
117 justification = NULL, 118 justification = NULL,
118 # TRUE to center on page 119 # TRUE to center on page
119 centered = FALSE, 120 centered = TRUE,
120 # optional capttion 121 # optional capttion
121 caption = NULL, 122 caption = NULL,
122 # h(inline); b(bottom); t (top) or p (separate page) 123 # h(inline); b(bottom); t (top) or p (separate page)
123 anchor = "h" 124 anchor = "h"
124 ) { 125 ) {
198 invisible(x) 199 invisible(x)
199 } 200 }
200 201
201 ``` 202 ```
202 203
203 ## Purpose: 204 ## Purpose
204 205
205 Perform imputation of missing values, quantile normalization, and ANOVA. 206 Perform imputation of missing values, quantile normalization, and ANOVA.
206 207
207 <!--
208 ## Variables to change for each input file
209 -->
210 ```{r include = FALSE} 208 ```{r include = FALSE}
211 # Input Filename 209 # Input Filename
212 input_file <- params$inputFile 210 input_file <- params$inputFile
213 211
214 # First data column - ideally, this could be detected via regexSampleNames, 212 # First data column - ideally, this could be detected via regexSampleNames,
215 # but for now leave it as is. 213 # but for now leave it as is.
216 first_data_column <- params$firstDataColumn 214 first_data_column <- params$firstDataColumn
217 fdc_is_integer <- TRUE 215 fdc_is_integer <- is.integer(first_data_column)
218 first_data_column <- withCallingHandlers( 216 if (fdc_is_integer) {
219 as.integer(first_data_column) 217 first_data_column <- as.integer(params$firstDataColumn)
220 , warning = function(w) fdc_is_integer <<- FALSE
221 )
222 if (FALSE == fdc_is_integer) {
223 first_data_column <- params$firstDataColumn
224 } 218 }
225 219
226 # False discovery rate adjustment for ANOVA 220 # False discovery rate adjustment for ANOVA
227 # Since pY abundance is low, set to 0.10 and 0.20 in addition to 0.05 221 # Since pY abundance is low, set to 0.10 and 0.20 in addition to 0.05
228 val_fdr <- 222 val_fdr <-
229 read.table(file = params$alphaFile, sep = "\t", header = F, quote = "")[, 1] 223 read.table(file = params$alphaFile, sep = "\t", header = F, quote = "")
224
225 if (
226 ncol(val_fdr) != 1 ||
227 sum(!is.numeric(val_fdr[, 1])) ||
228 sum(val_fdr[, 1] < 0) ||
229 sum(val_fdr[, 1] > 1)
230 ) {
231 stop("alphaFile should be one column of numbers within the range [0.0,1.0]")
232 }
233 val_fdr <- val_fdr[, 1]
230 234
231 #Imputed Data filename 235 #Imputed Data filename
232 imputed_data_filename <- params$imputedDataFilename 236 imputed_data_filename <- params$imputedDataFilename
233 imp_qn_lt_data_filenm <- params$imputedQNLTDataFile 237 imp_qn_lt_data_filenm <- params$imputedQNLTDataFile
234 238
272 quote = "", 276 quote = "",
273 check.names = FALSE 277 check.names = FALSE
274 ) 278 )
275 ``` 279 ```
276 280
277 ### Parse column names, sample names, and factor levels from input file 281 ## Extract Sample Names and Factor Levels
282
283 Column names parsed from input file are shown in Table 1; sample names and factor levels, in Table 2.
278 284
279 ```{r echo = FALSE, results = 'asis'} 285 ```{r echo = FALSE, results = 'asis'}
280 # Write column naames as an enumerated list.
281 column_name_df <- data.frame(
282 column = seq_len(length(colnames(full_data))),
283 name = colnames(full_data)
284 )
285 print_data_frame_latex(
286 x = column_name_df,
287 justification = "l l",
288 centered = TRUE,
289 caption = "Input data column name",
290 anchor = "h"
291 )
292 286
293 data_column_indices <- grep(first_data_column, names(full_data), perl = TRUE) 287 data_column_indices <- grep(first_data_column, names(full_data), perl = TRUE)
294 cat( 288
295 sprintf( 289 if (!fdc_is_integer) {
296 "\n\nData columns: [%d,%d]\n\n",
297 min(data_column_indices),
298 max(data_column_indices)
299 )
300 )
301
302 if (FALSE == fdc_is_integer) {
303 if (length(data_column_indices) > 0) { 290 if (length(data_column_indices) > 0) {
304 first_data_column <- data_column_indices[1] 291 first_data_column <- data_column_indices[1]
305 } else { 292 } else {
306 stop(paste("failed to convert firstDataColumn:", first_data_column)) 293 stop(paste("failed to convert firstDataColumn:", first_data_column))
307 } 294 }
308 } 295 }
296
297 cat(
298 sprintf(
299 paste(
300 "\n\nPeptide-intensity data for each sample is",
301 "in one of columns %d through %d.\n\n"
302 ),
303 min(data_column_indices),
304 max(data_column_indices)
305 )
306 )
307
308 # Write column names as a LaTeX enumerated list.
309 column_name_df <- data.frame(
310 column = seq_len(length(colnames(full_data))),
311 name = colnames(full_data)
312 )
313 data_frame_latex(
314 x = column_name_df,
315 justification = "l l",
316 centered = TRUE,
317 caption = "Input data column name",
318 anchor = const_table_anchor
319 )
309 320
310 ``` 321 ```
311 322
312 ```{r echo = FALSE, results = 'asis'} 323 ```{r echo = FALSE, results = 'asis'}
313 #```{r echo = FALSE, results = 'asis'} 324 #```{r echo = FALSE, results = 'asis'}
334 number_of_samples <- length(sample_name_matches) 345 number_of_samples <- length(sample_name_matches)
335 sample_factor_df <- data.frame( 346 sample_factor_df <- data.frame(
336 sample = sample_name_matches, 347 sample = sample_name_matches,
337 level = sample_factor_levels 348 level = sample_factor_levels
338 ) 349 )
339 print_data_frame_latex( 350 data_frame_latex(
340 x = sample_factor_df, 351 x = sample_factor_df,
341 justification = "c c", 352 justification = "c c",
342 centered = TRUE, 353 centered = TRUE,
343 caption = "Factor level", 354 caption = "Factor level",
344 anchor = "h" 355 anchor = const_table_anchor
345 ) 356 )
346 ``` 357 ```
347 ```{r echo = FALSE, results = 'asis'} 358 ```{r echo = FALSE, results = 'asis'}
348 cat("\\newpage\n") 359 cat("\\newpage\n")
349 ``` 360 ```
368 # Vertical plot 379 # Vertical plot
369 boxplot( 380 boxplot(
370 quant_data_log 381 quant_data_log
371 , las = 1 382 , las = 1
372 , col = const_boxplot_fill 383 , col = const_boxplot_fill
384 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
385 , xlab = "Sample"
373 ) 386 )
374 # Points
375 stripchart(
376 quant_data_log, # Data
377 method = "jitter", # Random noise
378 jitter = const_stripchart_jitter,
379 pch = 19, # Pch symbols
380 cex = const_stripchart_cex, # Size of symbols reduced
381 col = "goldenrod", # Color of the symbol
382 vertical = TRUE, # Vertical mode
383 add = TRUE # Add it over
384 )
385 par(old_par) 387 par(old_par)
386 388
387 389
388 390
389 cat("\n\n\n") 391 cat("\n\n\n")
396 if (nrow(quant_data_log) > 1) { 398 if (nrow(quant_data_log) > 1) {
397 quant_data_log_stack <- stack(quant_data_log) 399 quant_data_log_stack <- stack(quant_data_log)
398 ggplot( 400 ggplot(
399 quant_data_log_stack, 401 quant_data_log_stack,
400 aes(x = values) 402 aes(x = values)
401 ) + 403 ) + xlab(latex2exp::TeX("$log_{10}$(peptide intensity)")) +
404 ylab("Probability density") +
402 geom_density( 405 geom_density(
403 aes(group = ind, colour = ind), 406 aes(group = ind, colour = ind),
404 na.rm = TRUE 407 na.rm = TRUE
405 ) 408 )
406 } else { 409 } else {
409 ``` 412 ```
410 413
411 ### Globally, are peptide intensities are approximately unimodal? 414 ### Globally, are peptide intensities are approximately unimodal?
412 415
413 <!-- 416 <!--
414 # ref for bquote below particularly and plotting math expressions generally: 417 # bquote could be used as an alternative to latex2exp::TeX below particularly
418 # and when plotting math expressions generally, at the expense of mastering
419 # another syntax, which hardly seems worthwhile when I need to use TeX
420 # elsewhere; here's an introduction to bquote:
415 # https://www.r-bloggers.com/2018/03/math-notation-for-r-plot-titles-expression-and-bquote/ 421 # https://www.r-bloggers.com/2018/03/math-notation-for-r-plot-titles-expression-and-bquote/
416 --> 422 -->
417 ```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5), results = 'asis'} 423 ```{r echo = FALSE, fig.align = "left", fig.dim = c(9, 5), results = 'asis'}
418 424
419 # identify the location of missing values 425 # identify the location of missing values
420 fin <- is.finite(as.numeric(as.matrix(quant_data_log))) 426 fin <- is.finite(as.numeric(as.matrix(quant_data_log)))
421 427
422 logvalues <- as.numeric(as.matrix(quant_data_log))[fin] 428 logvalues <- as.numeric(as.matrix(quant_data_log))[fin]
429 logvalues_density <- density(logvalues)
423 plot( 430 plot(
424 density(logvalues), 431 x = logvalues_density,
425 main = bquote( 432 main = latex2exp::TeX(
426 "Smoothed estimated probability density vs." ~ log[10](intensity)), 433 "Smoothed estimated probability density vs. $log_{10}$(peptide intensity)"
427 xlab = bquote(log[10](intensity)) 434 ),
435 xlab = latex2exp::TeX("$log_{10}$(peptide intensity)"),
436 ylab = "Probability density"
428 ) 437 )
429 hist( 438 hist(
430 x = as.numeric(as.matrix(quant_data_log)) 439 x = as.numeric(as.matrix(quant_data_log)),
431 , breaks = 100 440 xlim = c(min(logvalues_density$x), max(logvalues_density$x)),
432 , main = bquote("Frequency vs." ~ log[10](intensity)) 441 breaks = 100,
433 , xlab = bquote(log[10](intensity)) 442 main = latex2exp::TeX("Frequency vs. $log_{10}$(peptide intensity)"),
443 xlab = latex2exp::TeX("$log_{10}$(peptide intensity)")
434 ) 444 )
435 ``` 445 ```
436 446
437 ### Distribution of standard deviations of $log_{10}(\text{intensity})$, ignoring missing values: 447 ### Distribution of standard deviations of $log_{10}(\text{intensity})$, ignoring missing values:
438 448
450 if (sum(!is.na(sds)) > 2) { 460 if (sum(!is.na(sds)) > 2) {
451 plot( 461 plot(
452 density(sds, na.rm = T) 462 density(sds, na.rm = T)
453 , main = "Smoothed estimated probability density vs. std. deviation" 463 , main = "Smoothed estimated probability density vs. std. deviation"
454 , sub = "(probability estimation made with Gaussian smoothing)" 464 , sub = "(probability estimation made with Gaussian smoothing)"
465 , ylab = "Probability density"
455 ) 466 )
456 } else { 467 } else {
457 cat( 468 cat(
458 "At least two non-missing values are required to plot", 469 "At least two non-missing values are required to plot",
459 "probability density.\n\n" 470 "probability density.\n\n"
508 switch( 519 switch(
509 imputation_method 520 imputation_method
510 , "group-median" = { 521 , "group-median" = {
511 imputation_method_description <- 522 imputation_method_description <-
512 paste("Substitute missing value with", 523 paste("Substitute missing value with",
513 "median peptide intensity for sample group\n" 524 "median peptide intensity for sample group.\n"
514 ) 525 )
515 sample_level_integers <- as.integer(sample_factor_levels) 526 sample_level_integers <- as.integer(sample_factor_levels)
516 for (i in seq_len(length(levels(sample_factor_levels)))) { 527 for (i in seq_len(length(levels(sample_factor_levels)))) {
517 level_cols <- i == sample_level_integers 528 level_cols <- i == sample_level_integers
518 ind <- which(is.na(quant_data_imp[, level_cols]), arr.ind = TRUE) 529 ind <- which(is.na(quant_data_imp[, level_cols]), arr.ind = TRUE)
522 good_rows <- !is.na(rowMeans(quant_data_imp)) 533 good_rows <- !is.na(rowMeans(quant_data_imp))
523 } 534 }
524 , "median" = { 535 , "median" = {
525 imputation_method_description <- 536 imputation_method_description <-
526 paste("Substitute missing value with", 537 paste("Substitute missing value with",
527 "median peptide intensity across all sample classes\n" 538 "median peptide intensity across all sample classes.\n"
528 ) 539 )
529 quant_data_imp[ind] <- apply(quant_data_imp, 1, median, na.rm = T)[ind[, 1]] 540 quant_data_imp[ind] <- apply(quant_data_imp, 1, median, na.rm = T)[ind[, 1]]
530 good_rows <- !is.na(rowMeans(quant_data_imp)) 541 good_rows <- !is.na(rowMeans(quant_data_imp))
531 } 542 }
532 , "mean" = { 543 , "mean" = {
533 imputation_method_description <- 544 imputation_method_description <-
534 paste("Substitute missing value with", 545 paste("Substitute missing value with",
535 "mean peptide intensity across all sample classes\n" 546 "mean peptide intensity across all sample classes.\n"
536 ) 547 )
537 quant_data_imp[ind] <- apply(quant_data_imp, 1, mean, na.rm = T)[ind[, 1]] 548 quant_data_imp[ind] <- apply(quant_data_imp, 1, mean, na.rm = T)[ind[, 1]]
538 good_rows <- !is.na(rowMeans(quant_data_imp)) 549 good_rows <- !is.na(rowMeans(quant_data_imp))
539 } 550 }
540 , "random" = { 551 , "random" = {
542 # If you want results to be reproducible, you will want to call 553 # If you want results to be reproducible, you will want to call
543 # base::set.seed before calling stats::rnorm 554 # base::set.seed before calling stats::rnorm
544 imputation_method_description <- 555 imputation_method_description <-
545 paste("Substitute each missing value with random intensity", 556 paste("Substitute each missing value with random intensity",
546 sprintf( 557 sprintf(
547 "random intensity $N \\sim (%0.2f, %0.2f)$\n", 558 "random intensity $N \\sim (%0.2f, %0.2f)$.\n",
548 q1, m1 559 q1, m1
549 ) 560 )
550 ) 561 )
551 cat(sprintf("mean_percentile (from input parameter) is %2.0f\n\n", 562 cat(sprintf("mean_percentile (from input parameter) is %2.0f\n\n",
552 100 * mean_percentile)) 563 100 * mean_percentile))
553 cat(sprintf("sd_percentile (from input parameter) is %0.2f\n\n", 564 cat(sprintf("sd_percentile (from input parameter) is %0.2f\n\n",
554 sd_percentile)) 565 sd_percentile))
555 #ACE cat(sprintf("sd for rnorm is %0.4f\n\n", m1))
556 quant_data_imp[ind] <- 566 quant_data_imp[ind] <-
557 10 ^ rnorm(number_to_impute, mean = q1, sd = m1) 567 10 ^ rnorm(number_to_impute, mean = q1, sd = m1)
558 good_rows <- !is.na(rowMeans(quant_data_imp)) 568 good_rows <- !is.na(rowMeans(quant_data_imp))
559 } 569 }
560 ) 570 )
682 log10(quant_data_imp) 692 log10(quant_data_imp)
683 693
684 write_debug_file(quant_data_imp_log10) 694 write_debug_file(quant_data_imp_log10)
685 695
686 red_dots <- quant_data_imp_log10 * x 696 red_dots <- quant_data_imp_log10 * x
687 ylim <- c( 697 count_red <- sum(!is.na(red_dots))
698 count_blue <- sum(!is.na(blue_dots))
699 ylim_save <- ylim <- c(
688 min(red_dots, blue_dots, na.rm = TRUE), 700 min(red_dots, blue_dots, na.rm = TRUE),
689 max(red_dots, blue_dots, na.rm = TRUE) 701 max(red_dots, blue_dots, na.rm = TRUE)
690 ) 702 )
691 # ref: https://r-charts.com/distribution/add-points-boxplot/ 703 show_stripchart <-
704 50 > (count_red + count_blue) / length(sample_name_matches)
705 if (show_stripchart) {
706 boxplot_sub <- "Light blue = data before imputation; Red = imputed data"
707 } else {
708 boxplot_sub <- ""
709 }
710
692 # Vertical plot 711 # Vertical plot
693 colnames(blue_dots) <- sample_name_matches 712 colnames(blue_dots) <- sample_name_matches
694 boxplot( 713 boxplot(
695 blue_dots 714 blue_dots
696 , las = 1 # "always horizontal" 715 , las = 1 # "always horizontal"
697 , col = const_boxplot_fill 716 , col = const_boxplot_fill
698 , ylim = ylim 717 , ylim = ylim
699 , main = "Peptide intensities before and after imputation" 718 , main = "Peptide intensities before and after imputation"
700 , sub = "Light blue = data before imputation; Red = imputed data" 719 , sub = boxplot_sub
701 , xlab = "Sample" 720 , xlab = "Sample"
702 , ylab = "log10(peptide intensity)" 721 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
703 ) 722 )
704 # Points 723
705 # NA values are not plotted 724 if (show_stripchart) {
706 stripchart( 725 # Points
707 blue_dots, # Data 726 # ref: https://r-charts.com/distribution/add-points-boxplot/
708 method = "jitter", # Random noise 727 # NA values are not plotted
709 jitter = const_stripchart_jitter, 728 stripchart(
710 pch = 19, # Pch symbols 729 blue_dots, # Data
711 cex = const_stripsmall_cex, # Size of symbols reduced 730 method = "jitter", # Random noise
712 col = "lightblue", # Color of the symbol 731 jitter = const_stripchart_jitter,
713 vertical = TRUE, # Vertical mode 732 pch = 19, # Pch symbols
714 add = TRUE # Add it over 733 cex = const_stripsmall_cex, # Size of symbols reduced
715 ) 734 col = "lightblue", # Color of the symbol
716 stripchart( 735 vertical = TRUE, # Vertical mode
717 red_dots, # Data 736 add = TRUE # Add it over
718 method = "jitter", # Random noise 737 )
719 jitter = const_stripchart_jitter, 738 stripchart(
720 pch = 19, # Pch symbols 739 red_dots, # Data
721 cex = const_stripsmall_cex, # Size of symbols reduced 740 method = "jitter", # Random noise
722 col = "red", # Color of the symbol 741 jitter = const_stripchart_jitter,
723 vertical = TRUE, # Vertical mode 742 pch = 19, # Pch symbols
724 add = TRUE # Add it over 743 cex = const_stripsmall_cex, # Size of symbols reduced
725 ) 744 col = "red", # Color of the symbol
745 vertical = TRUE, # Vertical mode
746 add = TRUE # Add it over
747 )
748
749 } else {
750 # violin plot
751 cat("\\leavevmode\n\\quad\n\n\\quad\n\n")
752 vioplot::vioplot(
753 x = lapply(blue_dots, function(x) x[!is.na(x)]),
754 col = "lightblue1",
755 side = "left",
756 plotCentre = "line",
757 ylim = ylim_save,
758 main = "Distributions of observed and imputed data",
759 sub = "Light blue = observed data; Pink = imputed data",
760 xlab = "Sample",
761 ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
762 )
763 vioplot::vioplot(
764 x = lapply(red_dots, function(x) x[!is.na(x)]),
765 col = "lightpink1",
766 side = "right",
767 plotCentre = "line",
768 add = T
769 )
770 }
771
726 par(old_par) 772 par(old_par)
727 773
728 # density plot 774 # density plot
729 cat("\\leavevmode\n\n\n\n\n\n\n") 775 cat("\\leavevmode\n\n\n\n\n\n\n")
730 if (can_plot_before_after_imp) { 776 if (can_plot_before_after_imp) {
736 paste( 782 paste(
737 "Blue = data before imputation; Red = imputed data;", 783 "Blue = data before imputation; Red = imputed data;",
738 "Black = combined" 784 "Black = combined"
739 ), 785 ),
740 main = "Density of peptide intensity before and after imputation", 786 main = "Density of peptide intensity before and after imputation",
741 xlab = "log10(peptide intensity)", 787 xlab = latex2exp::TeX("$log_{10}$(peptide intensity)"),
742 ylab = "Probability density" 788 ylab = "Probability density"
743 ) 789 )
744 lines(d_original, col = "blue") 790 lines(d_original, col = "blue")
745 lines(d_imputed, col = "red") 791 lines(d_imputed, col = "red")
746 } else { 792 } else {
907 colnames(quant_data_log) <- sample_name_matches 953 colnames(quant_data_log) <- sample_name_matches
908 boxplot( 954 boxplot(
909 quant_data_log 955 quant_data_log
910 , las = 1 956 , las = 1
911 , col = const_boxplot_fill 957 , col = const_boxplot_fill
912 ) 958 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
913 # Points 959 , xlab = "Sample"
914 stripchart( 960 )
915 quant_data_log, # Data
916 method = "jitter", # Random noise
917 jitter = const_stripchart_jitter,
918 pch = 19, # Pch symbols
919 cex = const_stripchart_cex, # Size of symbols reduced
920 col = "goldenrod", # Color of the symbol
921 vertical = TRUE, # Vertical mode
922 add = TRUE # Add it over
923 )
924 par(old_par) 961 par(old_par)
925 } else { 962 } else {
926 cat("There are no peptides to plot\n") 963 cat("There are no peptides to plot\n")
927 } 964 }
928 965
934 if (nrow(quant_data_log) > 1) { 971 if (nrow(quant_data_log) > 1) {
935 quant_data_log_stack <- stack(quant_data_log) 972 quant_data_log_stack <- stack(quant_data_log)
936 ggplot( 973 ggplot(
937 quant_data_log_stack, 974 quant_data_log_stack,
938 aes(x = values) 975 aes(x = values)
939 ) + 976 ) + xlab(latex2exp::TeX("$log_{10}$(peptide intensity)")) +
977 ylab("Probability density") +
940 geom_density( 978 geom_density(
941 aes(group = ind, colour = ind), 979 aes(group = ind, colour = ind),
942 na.rm = TRUE 980 na.rm = TRUE
943 ) 981 )
944 } else { 982 } else {
947 ``` 985 ```
948 ```{r echo = FALSE, results = 'asis'} 986 ```{r echo = FALSE, results = 'asis'}
949 cat("\\leavevmode\\newpage\n") 987 cat("\\leavevmode\\newpage\n")
950 ``` 988 ```
951 989
952 ## Perform ANOVA filters 990 ## Perform ANOVA Filters
953 991
954 ```{r, echo = FALSE} 992 ```{r, echo = FALSE}
955 # Make new data frame containing only Phosphopeptides 993 # Make new data frame containing only Phosphopeptides
956 # to connect preANOVA to ANOVA (connect_df) 994 # to connect preANOVA to ANOVA (connect_df)
957 connect_df <- data.frame( 995 connect_df <- data.frame(
1123 boxplot( 1161 boxplot(
1124 filtered_data_filtered, 1162 filtered_data_filtered,
1125 main = "Imputed, normalized intensities", # no line plot 1163 main = "Imputed, normalized intensities", # no line plot
1126 las = 1, 1164 las = 1,
1127 col = const_boxplot_fill, 1165 col = const_boxplot_fill,
1128 ylab = expression(log[10](intensity)) 1166 ylab = latex2exp::TeX("$log_{10}$(peptide intensity)")
1129 ) 1167 )
1130 # Points
1131 stripchart(
1132 filtered_data_filtered, # Data
1133 method = "jitter", # Random noise
1134 jitter = const_stripchart_jitter,
1135 pch = 19, # Pch symbols
1136 cex = const_stripchart_cex, # Size of symbols reduced
1137 col = "goldenrod", # Color of the symbol
1138 vertical = TRUE, # Vertical mode
1139 add = TRUE # Add it over
1140 )
1141 par(old_par) 1168 par(old_par)
1142 } else { 1169 } else {
1143 cat(sprintf( 1170 cat(sprintf(
1144 "%s < %0.2f\n\n\n\n\n", 1171 "%s < %0.2f\n\n\n\n\n",
1145 "No peptides were found to have cutoff adjusted p-value <", 1172 "No peptides were found to have cutoff adjusted p-value",
1146 cutoff 1173 cutoff
1147 )) 1174 ))
1148 } 1175 }
1149 1176
1150 if (nrow(filtered_data_filtered) > 0) { 1177 if (nrow(filtered_data_filtered) > 0) {
1227 "whose adjusted p-value < %0.2f\n", 1254 "whose adjusted p-value < %0.2f\n",
1228 cutoff) 1255 cutoff)
1229 ) 1256 )
1230 } else { 1257 } else {
1231 if (nrow(m) == 1) { 1258 if (nrow(m) == 1) {
1259 next
1260 } else {
1232 cat( 1261 cat(
1233 sprintf("Heatmap for %d usable peptides whose", nrow(m)), 1262 sprintf("Heatmap for %d usable peptides whose", nrow(m)),
1234 sprintf("adjusted p-value < %0.2f\n", cutoff) 1263 sprintf("adjusted p-value < %0.2f\n", cutoff)
1235 ) 1264 )
1236 next
1237 } 1265 }
1238 } 1266 }
1239 cat("\n\n\n") 1267 cat("\n\n\n")
1240 cat("\n\n\n") 1268 cat("\n\n\n")
1241 try( 1269 try(
1261 } 1289 }
1262 } 1290 }
1263 } 1291 }
1264 cat("\\leavevmode\n\n\n") 1292 cat("\\leavevmode\n\n\n")
1265 ``` 1293 ```
1266
1267 <!--
1268 ## Peptide IDs, etc.
1269
1270 See output files.
1271 -->