Mercurial > repos > eschen42 > mqppep_anova
comparison mqppep_anova_script.Rmd @ 12:4deacfee76ef draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e87d28ea433cc26db7fe44768685d08c06f7a0d0"
author | eschen42 |
---|---|
date | Tue, 15 Mar 2022 18:17:55 +0000 |
parents | d728198f1ba5 |
children | b41a077af3aa |
comparison
equal
deleted
inserted
replaced
11:254ab97c6a2c | 12:4deacfee76ef |
---|---|
9 alphaFile: "test-data/alpha_levels.tabular" | 9 alphaFile: "test-data/alpha_levels.tabular" |
10 firstDataColumn: "Intensity" | 10 firstDataColumn: "Intensity" |
11 imputationMethod: !r c("group-median", "median", "mean", "random")[1] | 11 imputationMethod: !r c("group-median", "median", "mean", "random")[1] |
12 meanPercentile: 1 | 12 meanPercentile: 1 |
13 sdPercentile: 0.2 | 13 sdPercentile: 0.2 |
14 regexSampleNames: "\\.(\\d+)[A-Z]$" | 14 regexSampleNames: "\\.\\d+[A-Z]$" |
15 regexSampleGrouping: "(\\d+)" | 15 regexSampleGrouping: "\\d+" |
16 imputedDataFilename: "Upstream_Map_pST_outputfile_STEP4_QN_LT.txt" | 16 imputedDataFilename: "Upstream_Map_pST_outputfile_STEP4_QN_LT.txt" |
17 --- | 17 --- |
18 ```{r setup, include = FALSE} | 18 ```{r setup, include = FALSE} |
19 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285 | 19 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285 |
20 knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10)) | 20 knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10)) |
568 m2 <- regexpr(regex_sample_grouping, temp_matches, perl = TRUE) | 568 m2 <- regexpr(regex_sample_grouping, temp_matches, perl = TRUE) |
569 | 569 |
570 | 570 |
571 sample_factor_levels <- as.factor(regmatches(temp_matches, m2)) | 571 sample_factor_levels <- as.factor(regmatches(temp_matches, m2)) |
572 | 572 |
573 | |
574 if (length(levels(sample_factor_levels)) < 2) { | 573 if (length(levels(sample_factor_levels)) < 2) { |
574 nuke_control_sequences <- | |
575 function(s) { | |
576 s <- gsub("[\\]", "xyzzy_plugh", s) | |
577 s <- gsub("[$]", "\\\\$", s) | |
578 s <- gsub("xyzzy_plugh", "$\\\\backslash$", s) | |
579 return(s) | |
580 } | |
575 cat( | 581 cat( |
576 "ERROR!!!! Cannot perform ANOVA analysis", | 582 "ERROR!!!! Cannot perform ANOVA analysis", |
577 "because it requires two or more factor levels\n" | 583 "(see next page)\\newpage\n" |
578 ) | 584 ) |
579 cat("Unparsed sample names are:\n") | 585 cat( |
580 print(names(quant_data_imp_qn_log)) | 586 "ERROR: ANOVA analysis", |
581 cat(sprintf("Parsing rule for SampleNames is '%s'\n", regex_sample_names)) | 587 "requires two or more factor levels!\\newline\n" |
582 cat("Parsed names are:\n") | 588 ) |
583 print(temp_matches) | 589 |
584 cat(sprintf( | 590 cat("\\newline\\newline\n") |
585 "Parsing rule for SampleGrouping is '%s'\n", | 591 cat("Unparsed sample names are:\\newline\n", |
586 regex_sample_grouping | 592 "\\begin{quote}\n", |
587 )) | 593 paste(names(quant_data_imp_qn_log), collapse = "\\newline\n"), |
588 cat("Sample group assignments are:\n") | 594 "\n\\end{quote}\n\n") |
589 print(regmatches(temp_matches, m2)) | 595 |
596 regex_sample_names <- nuke_control_sequences(regex_sample_names) | |
597 | |
598 cat("\\leavevmode\\newline\n") | |
599 cat("Parsing rule for SampleNames is", | |
600 "\\newline\n", | |
601 "\\text{'", | |
602 regex_sample_names, | |
603 "'}\\newline\n", | |
604 sep = "" | |
605 ) | |
606 | |
607 cat("\nParsed sample names are:\n", | |
608 "\\begin{quote}\n", | |
609 paste(temp_matches, collapse = "\\newline\n"), | |
610 "\n\\end{quote}\n\n") | |
611 | |
612 regex_sample_grouping <- nuke_control_sequences(regex_sample_grouping) | |
613 | |
614 cat("\\leavevmode\\newline\n") | |
615 cat("Parsing rule for SampleGrouping is", | |
616 "\\newline\n", | |
617 "\\text{'", | |
618 regex_sample_grouping, | |
619 "'}\\newline\n", | |
620 sep = "" | |
621 ) | |
622 | |
623 cat("\\newline\n") | |
624 cat("Sample group assignments are:\n", | |
625 "\\begin{quote}\n", | |
626 paste(regmatches(temp_matches, m2), collapse = "\\newline\n"), | |
627 "\n\\end{quote}\n\n") | |
628 | |
590 } else { | 629 } else { |
591 p_value_data_anova_ps <- | 630 p_value_data_anova_ps <- |
592 apply( | 631 apply( |
593 quant_data_imp_qn_log, | 632 quant_data_imp_qn_log, |
594 1, | 633 1, |
705 else | 744 else |
706 paste0("(%0.4e) %s") | 745 paste0("(%0.4e) %s") |
707 } | 746 } |
708 ) | 747 ) |
709 | 748 |
710 | |
711 | |
712 anova_filtered <- data.table( | 749 anova_filtered <- data.table( |
713 anova_filtered_merge$Phosphopeptide | 750 anova_filtered_merge$Phosphopeptide |
714 , | 751 , |
715 anova_filtered_merge$Intensity | 752 anova_filtered_merge$Intensity |
716 , | 753 , |
717 anova_filtered_merge[, 2:number_of_samples + 1] | 754 anova_filtered_merge[, 2:number_of_samples + 1] |
718 ) | 755 ) |
719 colnames(anova_filtered) <- | 756 colnames(anova_filtered) <- |
720 c("Phosphopeptide", colnames(filtered_data_filtered)) | 757 c("Phosphopeptide", colnames(filtered_data_filtered)) |
721 | 758 |
722 # merge qualitative columns into the ANOVA data | 759 # Merge qualitative columns into the ANOVA data |
723 output_table <- data.frame(anova_filtered$Phosphopeptide) | 760 output_table <- data.frame(anova_filtered$Phosphopeptide) |
724 output_table <- merge( | 761 output_table <- merge( |
725 x = output_table | 762 x = output_table |
726 , | 763 , |
727 y = data_table_imp_qn_lt | 764 y = data_table_imp_qn_lt |
729 by.x = "anova_filtered.Phosphopeptide" | 766 by.x = "anova_filtered.Phosphopeptide" |
730 , | 767 , |
731 by.y = "Phosphopeptide" | 768 by.y = "Phosphopeptide" |
732 ) | 769 ) |
733 | 770 |
734 #Produce heatmap to visualize significance and the effect of imputation | 771 # Produce heatmap to visualize significance and the effect of imputation |
735 m <- | 772 m <- |
736 as.matrix(unimputed_quant_data_log[anova_filtered_merge_order, ]) | 773 as.matrix(unimputed_quant_data_log[anova_filtered_merge_order, ]) |
774 m_nan_rows <- rowSums( | |
775 matrix( | |
776 as.integer(is.na(m)), | |
777 nrow = nrow(m) | |
778 ) | |
779 ) | |
780 m <- m[!m_nan_rows, ] | |
737 if (nrow(m) > 0) { | 781 if (nrow(m) > 0) { |
738 rownames_m <- rownames(m) | 782 rownames_m <- rownames(m) |
739 rownames(m) <- sapply( | 783 rownames(m) <- sapply( |
740 X = seq_len(nrow(m)) | 784 X = seq_len(nrow(m)) |
741 , | 785 , |
742 FUN = function(i) { | 786 FUN = function(i) { |
743 sprintf( | 787 sprintf( |
744 anova_filtered_merge_format[i] | 788 anova_filtered_merge_format[i], |
745 , | 789 filtered_p$fdr_adjusted_anova_p[i], |
746 filtered_p$fdr_adjusted_anova_p[i] | |
747 , | |
748 rownames_m[i] | 790 rownames_m[i] |
749 ) | 791 ) |
750 } | 792 } |
751 ) | 793 ) |
752 margins <- c(max(nchar(colnames(m))) * 10 / 16 # col | 794 margins <- |
753 , max(nchar(rownames(m))) * 5 / 16 # row | 795 c(max(nchar(colnames(m))) * 10 / 16 # col |
754 ) | 796 , max(nchar(rownames(m))) * 5 / 16 # row |
755 how_many_peptides <- min(50, nrow(m)) | 797 ) |
756 | 798 how_many_peptides <- min(50, nrow(m)) |
757 cat("\\newpage\n") | 799 |
758 if (nrow(m) > 50) { | 800 cat("\\newpage\n") |
759 cat("Heatmap for the 50 most-significant peptides", | 801 if (nrow(m) > 50) { |
760 sprintf( | 802 cat("Heatmap for the 50 most-significant peptides", |
761 "whose adjusted p-value < %0.2f\n", | 803 sprintf( |
762 cutoff) | 804 "whose adjusted p-value < %0.2f\n", |
763 ) | 805 cutoff) |
764 } else { | 806 ) |
765 cat("Heatmap for peptides whose", | 807 } else { |
766 sprintf("adjusted p-value < %0.2f\n", | 808 cat("Heatmap for peptides whose", |
767 cutoff) | 809 sprintf("adjusted p-value < %0.2f\n", |
768 ) | 810 cutoff) |
769 } | 811 ) |
770 cat("\\newline\n") | 812 } |
771 cat("\\newline\n") | 813 cat("\\newline\n") |
772 op <- par("cex.main") | 814 cat("\\newline\n") |
773 try( | 815 op <- par("cex.main") |
774 if (nrow(m) > 1) { | 816 try( |
775 par(cex.main = 0.6) | 817 if (nrow(m) > 1) { |
776 heatmap( | 818 par(cex.main = 0.6) |
777 m[how_many_peptides:1, ], | 819 heatmap( |
778 Rowv = NA, | 820 m[how_many_peptides:1, ], |
779 Colv = NA, | 821 Rowv = NA, |
780 cexRow = 0.7, | 822 Colv = NA, |
781 cexCol = 0.8, | 823 cexRow = 0.7, |
782 scale = "row", | 824 cexCol = 0.8, |
783 margins = margins, | 825 scale = "row", |
784 main = | 826 #ACE scale = "none", |
785 "Heatmap of unimputed, unnormalized intensities", | 827 margins = margins, |
786 xlab = "" | 828 main = |
787 ) | 829 "Heatmap of unimputed, unnormalized intensities", |
788 } | 830 xlab = "" |
789 ) | 831 ) |
790 par(op) | 832 } |
833 ) | |
834 par(op) | |
791 } | 835 } |
792 } | 836 } |
793 } | 837 } |
794 } | 838 } |
795 ``` | 839 ``` |