Mercurial > repos > eschen42 > mqppep_anova
comparison mqppep_anova_script.Rmd @ 12:4deacfee76ef draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e87d28ea433cc26db7fe44768685d08c06f7a0d0"
| author | eschen42 |
|---|---|
| date | Tue, 15 Mar 2022 18:17:55 +0000 |
| parents | d728198f1ba5 |
| children | b41a077af3aa |
comparison
equal
deleted
inserted
replaced
| 11:254ab97c6a2c | 12:4deacfee76ef |
|---|---|
| 9 alphaFile: "test-data/alpha_levels.tabular" | 9 alphaFile: "test-data/alpha_levels.tabular" |
| 10 firstDataColumn: "Intensity" | 10 firstDataColumn: "Intensity" |
| 11 imputationMethod: !r c("group-median", "median", "mean", "random")[1] | 11 imputationMethod: !r c("group-median", "median", "mean", "random")[1] |
| 12 meanPercentile: 1 | 12 meanPercentile: 1 |
| 13 sdPercentile: 0.2 | 13 sdPercentile: 0.2 |
| 14 regexSampleNames: "\\.(\\d+)[A-Z]$" | 14 regexSampleNames: "\\.\\d+[A-Z]$" |
| 15 regexSampleGrouping: "(\\d+)" | 15 regexSampleGrouping: "\\d+" |
| 16 imputedDataFilename: "Upstream_Map_pST_outputfile_STEP4_QN_LT.txt" | 16 imputedDataFilename: "Upstream_Map_pST_outputfile_STEP4_QN_LT.txt" |
| 17 --- | 17 --- |
| 18 ```{r setup, include = FALSE} | 18 ```{r setup, include = FALSE} |
| 19 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285 | 19 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285 |
| 20 knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10)) | 20 knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10)) |
| 568 m2 <- regexpr(regex_sample_grouping, temp_matches, perl = TRUE) | 568 m2 <- regexpr(regex_sample_grouping, temp_matches, perl = TRUE) |
| 569 | 569 |
| 570 | 570 |
| 571 sample_factor_levels <- as.factor(regmatches(temp_matches, m2)) | 571 sample_factor_levels <- as.factor(regmatches(temp_matches, m2)) |
| 572 | 572 |
| 573 | |
| 574 if (length(levels(sample_factor_levels)) < 2) { | 573 if (length(levels(sample_factor_levels)) < 2) { |
| 574 nuke_control_sequences <- | |
| 575 function(s) { | |
| 576 s <- gsub("[\\]", "xyzzy_plugh", s) | |
| 577 s <- gsub("[$]", "\\\\$", s) | |
| 578 s <- gsub("xyzzy_plugh", "$\\\\backslash$", s) | |
| 579 return(s) | |
| 580 } | |
| 575 cat( | 581 cat( |
| 576 "ERROR!!!! Cannot perform ANOVA analysis", | 582 "ERROR!!!! Cannot perform ANOVA analysis", |
| 577 "because it requires two or more factor levels\n" | 583 "(see next page)\\newpage\n" |
| 578 ) | 584 ) |
| 579 cat("Unparsed sample names are:\n") | 585 cat( |
| 580 print(names(quant_data_imp_qn_log)) | 586 "ERROR: ANOVA analysis", |
| 581 cat(sprintf("Parsing rule for SampleNames is '%s'\n", regex_sample_names)) | 587 "requires two or more factor levels!\\newline\n" |
| 582 cat("Parsed names are:\n") | 588 ) |
| 583 print(temp_matches) | 589 |
| 584 cat(sprintf( | 590 cat("\\newline\\newline\n") |
| 585 "Parsing rule for SampleGrouping is '%s'\n", | 591 cat("Unparsed sample names are:\\newline\n", |
| 586 regex_sample_grouping | 592 "\\begin{quote}\n", |
| 587 )) | 593 paste(names(quant_data_imp_qn_log), collapse = "\\newline\n"), |
| 588 cat("Sample group assignments are:\n") | 594 "\n\\end{quote}\n\n") |
| 589 print(regmatches(temp_matches, m2)) | 595 |
| 596 regex_sample_names <- nuke_control_sequences(regex_sample_names) | |
| 597 | |
| 598 cat("\\leavevmode\\newline\n") | |
| 599 cat("Parsing rule for SampleNames is", | |
| 600 "\\newline\n", | |
| 601 "\\text{'", | |
| 602 regex_sample_names, | |
| 603 "'}\\newline\n", | |
| 604 sep = "" | |
| 605 ) | |
| 606 | |
| 607 cat("\nParsed sample names are:\n", | |
| 608 "\\begin{quote}\n", | |
| 609 paste(temp_matches, collapse = "\\newline\n"), | |
| 610 "\n\\end{quote}\n\n") | |
| 611 | |
| 612 regex_sample_grouping <- nuke_control_sequences(regex_sample_grouping) | |
| 613 | |
| 614 cat("\\leavevmode\\newline\n") | |
| 615 cat("Parsing rule for SampleGrouping is", | |
| 616 "\\newline\n", | |
| 617 "\\text{'", | |
| 618 regex_sample_grouping, | |
| 619 "'}\\newline\n", | |
| 620 sep = "" | |
| 621 ) | |
| 622 | |
| 623 cat("\\newline\n") | |
| 624 cat("Sample group assignments are:\n", | |
| 625 "\\begin{quote}\n", | |
| 626 paste(regmatches(temp_matches, m2), collapse = "\\newline\n"), | |
| 627 "\n\\end{quote}\n\n") | |
| 628 | |
| 590 } else { | 629 } else { |
| 591 p_value_data_anova_ps <- | 630 p_value_data_anova_ps <- |
| 592 apply( | 631 apply( |
| 593 quant_data_imp_qn_log, | 632 quant_data_imp_qn_log, |
| 594 1, | 633 1, |
| 705 else | 744 else |
| 706 paste0("(%0.4e) %s") | 745 paste0("(%0.4e) %s") |
| 707 } | 746 } |
| 708 ) | 747 ) |
| 709 | 748 |
| 710 | |
| 711 | |
| 712 anova_filtered <- data.table( | 749 anova_filtered <- data.table( |
| 713 anova_filtered_merge$Phosphopeptide | 750 anova_filtered_merge$Phosphopeptide |
| 714 , | 751 , |
| 715 anova_filtered_merge$Intensity | 752 anova_filtered_merge$Intensity |
| 716 , | 753 , |
| 717 anova_filtered_merge[, 2:number_of_samples + 1] | 754 anova_filtered_merge[, 2:number_of_samples + 1] |
| 718 ) | 755 ) |
| 719 colnames(anova_filtered) <- | 756 colnames(anova_filtered) <- |
| 720 c("Phosphopeptide", colnames(filtered_data_filtered)) | 757 c("Phosphopeptide", colnames(filtered_data_filtered)) |
| 721 | 758 |
| 722 # merge qualitative columns into the ANOVA data | 759 # Merge qualitative columns into the ANOVA data |
| 723 output_table <- data.frame(anova_filtered$Phosphopeptide) | 760 output_table <- data.frame(anova_filtered$Phosphopeptide) |
| 724 output_table <- merge( | 761 output_table <- merge( |
| 725 x = output_table | 762 x = output_table |
| 726 , | 763 , |
| 727 y = data_table_imp_qn_lt | 764 y = data_table_imp_qn_lt |
| 729 by.x = "anova_filtered.Phosphopeptide" | 766 by.x = "anova_filtered.Phosphopeptide" |
| 730 , | 767 , |
| 731 by.y = "Phosphopeptide" | 768 by.y = "Phosphopeptide" |
| 732 ) | 769 ) |
| 733 | 770 |
| 734 #Produce heatmap to visualize significance and the effect of imputation | 771 # Produce heatmap to visualize significance and the effect of imputation |
| 735 m <- | 772 m <- |
| 736 as.matrix(unimputed_quant_data_log[anova_filtered_merge_order, ]) | 773 as.matrix(unimputed_quant_data_log[anova_filtered_merge_order, ]) |
| 774 m_nan_rows <- rowSums( | |
| 775 matrix( | |
| 776 as.integer(is.na(m)), | |
| 777 nrow = nrow(m) | |
| 778 ) | |
| 779 ) | |
| 780 m <- m[!m_nan_rows, ] | |
| 737 if (nrow(m) > 0) { | 781 if (nrow(m) > 0) { |
| 738 rownames_m <- rownames(m) | 782 rownames_m <- rownames(m) |
| 739 rownames(m) <- sapply( | 783 rownames(m) <- sapply( |
| 740 X = seq_len(nrow(m)) | 784 X = seq_len(nrow(m)) |
| 741 , | 785 , |
| 742 FUN = function(i) { | 786 FUN = function(i) { |
| 743 sprintf( | 787 sprintf( |
| 744 anova_filtered_merge_format[i] | 788 anova_filtered_merge_format[i], |
| 745 , | 789 filtered_p$fdr_adjusted_anova_p[i], |
| 746 filtered_p$fdr_adjusted_anova_p[i] | |
| 747 , | |
| 748 rownames_m[i] | 790 rownames_m[i] |
| 749 ) | 791 ) |
| 750 } | 792 } |
| 751 ) | 793 ) |
| 752 margins <- c(max(nchar(colnames(m))) * 10 / 16 # col | 794 margins <- |
| 753 , max(nchar(rownames(m))) * 5 / 16 # row | 795 c(max(nchar(colnames(m))) * 10 / 16 # col |
| 754 ) | 796 , max(nchar(rownames(m))) * 5 / 16 # row |
| 755 how_many_peptides <- min(50, nrow(m)) | 797 ) |
| 756 | 798 how_many_peptides <- min(50, nrow(m)) |
| 757 cat("\\newpage\n") | 799 |
| 758 if (nrow(m) > 50) { | 800 cat("\\newpage\n") |
| 759 cat("Heatmap for the 50 most-significant peptides", | 801 if (nrow(m) > 50) { |
| 760 sprintf( | 802 cat("Heatmap for the 50 most-significant peptides", |
| 761 "whose adjusted p-value < %0.2f\n", | 803 sprintf( |
| 762 cutoff) | 804 "whose adjusted p-value < %0.2f\n", |
| 763 ) | 805 cutoff) |
| 764 } else { | 806 ) |
| 765 cat("Heatmap for peptides whose", | 807 } else { |
| 766 sprintf("adjusted p-value < %0.2f\n", | 808 cat("Heatmap for peptides whose", |
| 767 cutoff) | 809 sprintf("adjusted p-value < %0.2f\n", |
| 768 ) | 810 cutoff) |
| 769 } | 811 ) |
| 770 cat("\\newline\n") | 812 } |
| 771 cat("\\newline\n") | 813 cat("\\newline\n") |
| 772 op <- par("cex.main") | 814 cat("\\newline\n") |
| 773 try( | 815 op <- par("cex.main") |
| 774 if (nrow(m) > 1) { | 816 try( |
| 775 par(cex.main = 0.6) | 817 if (nrow(m) > 1) { |
| 776 heatmap( | 818 par(cex.main = 0.6) |
| 777 m[how_many_peptides:1, ], | 819 heatmap( |
| 778 Rowv = NA, | 820 m[how_many_peptides:1, ], |
| 779 Colv = NA, | 821 Rowv = NA, |
| 780 cexRow = 0.7, | 822 Colv = NA, |
| 781 cexCol = 0.8, | 823 cexRow = 0.7, |
| 782 scale = "row", | 824 cexCol = 0.8, |
| 783 margins = margins, | 825 scale = "row", |
| 784 main = | 826 #ACE scale = "none", |
| 785 "Heatmap of unimputed, unnormalized intensities", | 827 margins = margins, |
| 786 xlab = "" | 828 main = |
| 787 ) | 829 "Heatmap of unimputed, unnormalized intensities", |
| 788 } | 830 xlab = "" |
| 789 ) | 831 ) |
| 790 par(op) | 832 } |
| 833 ) | |
| 834 par(op) | |
| 791 } | 835 } |
| 792 } | 836 } |
| 793 } | 837 } |
| 794 } | 838 } |
| 795 ``` | 839 ``` |
