comparison mqppep_anova_script.Rmd @ 12:4deacfee76ef draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e87d28ea433cc26db7fe44768685d08c06f7a0d0"
author eschen42
date Tue, 15 Mar 2022 18:17:55 +0000
parents d728198f1ba5
children b41a077af3aa
comparison
equal deleted inserted replaced
11:254ab97c6a2c 12:4deacfee76ef
9 alphaFile: "test-data/alpha_levels.tabular" 9 alphaFile: "test-data/alpha_levels.tabular"
10 firstDataColumn: "Intensity" 10 firstDataColumn: "Intensity"
11 imputationMethod: !r c("group-median", "median", "mean", "random")[1] 11 imputationMethod: !r c("group-median", "median", "mean", "random")[1]
12 meanPercentile: 1 12 meanPercentile: 1
13 sdPercentile: 0.2 13 sdPercentile: 0.2
14 regexSampleNames: "\\.(\\d+)[A-Z]$" 14 regexSampleNames: "\\.\\d+[A-Z]$"
15 regexSampleGrouping: "(\\d+)" 15 regexSampleGrouping: "\\d+"
16 imputedDataFilename: "Upstream_Map_pST_outputfile_STEP4_QN_LT.txt" 16 imputedDataFilename: "Upstream_Map_pST_outputfile_STEP4_QN_LT.txt"
17 --- 17 ---
18 ```{r setup, include = FALSE} 18 ```{r setup, include = FALSE}
19 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285 19 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285
20 knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10)) 20 knitr::opts_chunk$set(echo = FALSE, fig.dim = c(9, 10))
568 m2 <- regexpr(regex_sample_grouping, temp_matches, perl = TRUE) 568 m2 <- regexpr(regex_sample_grouping, temp_matches, perl = TRUE)
569 569
570 570
571 sample_factor_levels <- as.factor(regmatches(temp_matches, m2)) 571 sample_factor_levels <- as.factor(regmatches(temp_matches, m2))
572 572
573
574 if (length(levels(sample_factor_levels)) < 2) { 573 if (length(levels(sample_factor_levels)) < 2) {
574 nuke_control_sequences <-
575 function(s) {
576 s <- gsub("[\\]", "xyzzy_plugh", s)
577 s <- gsub("[$]", "\\\\$", s)
578 s <- gsub("xyzzy_plugh", "$\\\\backslash$", s)
579 return(s)
580 }
575 cat( 581 cat(
576 "ERROR!!!! Cannot perform ANOVA analysis", 582 "ERROR!!!! Cannot perform ANOVA analysis",
577 "because it requires two or more factor levels\n" 583 "(see next page)\\newpage\n"
578 ) 584 )
579 cat("Unparsed sample names are:\n") 585 cat(
580 print(names(quant_data_imp_qn_log)) 586 "ERROR: ANOVA analysis",
581 cat(sprintf("Parsing rule for SampleNames is '%s'\n", regex_sample_names)) 587 "requires two or more factor levels!\\newline\n"
582 cat("Parsed names are:\n") 588 )
583 print(temp_matches) 589
584 cat(sprintf( 590 cat("\\newline\\newline\n")
585 "Parsing rule for SampleGrouping is '%s'\n", 591 cat("Unparsed sample names are:\\newline\n",
586 regex_sample_grouping 592 "\\begin{quote}\n",
587 )) 593 paste(names(quant_data_imp_qn_log), collapse = "\\newline\n"),
588 cat("Sample group assignments are:\n") 594 "\n\\end{quote}\n\n")
589 print(regmatches(temp_matches, m2)) 595
596 regex_sample_names <- nuke_control_sequences(regex_sample_names)
597
598 cat("\\leavevmode\\newline\n")
599 cat("Parsing rule for SampleNames is",
600 "\\newline\n",
601 "\\text{'",
602 regex_sample_names,
603 "'}\\newline\n",
604 sep = ""
605 )
606
607 cat("\nParsed sample names are:\n",
608 "\\begin{quote}\n",
609 paste(temp_matches, collapse = "\\newline\n"),
610 "\n\\end{quote}\n\n")
611
612 regex_sample_grouping <- nuke_control_sequences(regex_sample_grouping)
613
614 cat("\\leavevmode\\newline\n")
615 cat("Parsing rule for SampleGrouping is",
616 "\\newline\n",
617 "\\text{'",
618 regex_sample_grouping,
619 "'}\\newline\n",
620 sep = ""
621 )
622
623 cat("\\newline\n")
624 cat("Sample group assignments are:\n",
625 "\\begin{quote}\n",
626 paste(regmatches(temp_matches, m2), collapse = "\\newline\n"),
627 "\n\\end{quote}\n\n")
628
590 } else { 629 } else {
591 p_value_data_anova_ps <- 630 p_value_data_anova_ps <-
592 apply( 631 apply(
593 quant_data_imp_qn_log, 632 quant_data_imp_qn_log,
594 1, 633 1,
705 else 744 else
706 paste0("(%0.4e) %s") 745 paste0("(%0.4e) %s")
707 } 746 }
708 ) 747 )
709 748
710
711
712 anova_filtered <- data.table( 749 anova_filtered <- data.table(
713 anova_filtered_merge$Phosphopeptide 750 anova_filtered_merge$Phosphopeptide
714 , 751 ,
715 anova_filtered_merge$Intensity 752 anova_filtered_merge$Intensity
716 , 753 ,
717 anova_filtered_merge[, 2:number_of_samples + 1] 754 anova_filtered_merge[, 2:number_of_samples + 1]
718 ) 755 )
719 colnames(anova_filtered) <- 756 colnames(anova_filtered) <-
720 c("Phosphopeptide", colnames(filtered_data_filtered)) 757 c("Phosphopeptide", colnames(filtered_data_filtered))
721 758
722 # merge qualitative columns into the ANOVA data 759 # Merge qualitative columns into the ANOVA data
723 output_table <- data.frame(anova_filtered$Phosphopeptide) 760 output_table <- data.frame(anova_filtered$Phosphopeptide)
724 output_table <- merge( 761 output_table <- merge(
725 x = output_table 762 x = output_table
726 , 763 ,
727 y = data_table_imp_qn_lt 764 y = data_table_imp_qn_lt
729 by.x = "anova_filtered.Phosphopeptide" 766 by.x = "anova_filtered.Phosphopeptide"
730 , 767 ,
731 by.y = "Phosphopeptide" 768 by.y = "Phosphopeptide"
732 ) 769 )
733 770
734 #Produce heatmap to visualize significance and the effect of imputation 771 # Produce heatmap to visualize significance and the effect of imputation
735 m <- 772 m <-
736 as.matrix(unimputed_quant_data_log[anova_filtered_merge_order, ]) 773 as.matrix(unimputed_quant_data_log[anova_filtered_merge_order, ])
774 m_nan_rows <- rowSums(
775 matrix(
776 as.integer(is.na(m)),
777 nrow = nrow(m)
778 )
779 )
780 m <- m[!m_nan_rows, ]
737 if (nrow(m) > 0) { 781 if (nrow(m) > 0) {
738 rownames_m <- rownames(m) 782 rownames_m <- rownames(m)
739 rownames(m) <- sapply( 783 rownames(m) <- sapply(
740 X = seq_len(nrow(m)) 784 X = seq_len(nrow(m))
741 , 785 ,
742 FUN = function(i) { 786 FUN = function(i) {
743 sprintf( 787 sprintf(
744 anova_filtered_merge_format[i] 788 anova_filtered_merge_format[i],
745 , 789 filtered_p$fdr_adjusted_anova_p[i],
746 filtered_p$fdr_adjusted_anova_p[i]
747 ,
748 rownames_m[i] 790 rownames_m[i]
749 ) 791 )
750 } 792 }
751 ) 793 )
752 margins <- c(max(nchar(colnames(m))) * 10 / 16 # col 794 margins <-
753 , max(nchar(rownames(m))) * 5 / 16 # row 795 c(max(nchar(colnames(m))) * 10 / 16 # col
754 ) 796 , max(nchar(rownames(m))) * 5 / 16 # row
755 how_many_peptides <- min(50, nrow(m)) 797 )
756 798 how_many_peptides <- min(50, nrow(m))
757 cat("\\newpage\n") 799
758 if (nrow(m) > 50) { 800 cat("\\newpage\n")
759 cat("Heatmap for the 50 most-significant peptides", 801 if (nrow(m) > 50) {
760 sprintf( 802 cat("Heatmap for the 50 most-significant peptides",
761 "whose adjusted p-value < %0.2f\n", 803 sprintf(
762 cutoff) 804 "whose adjusted p-value < %0.2f\n",
763 ) 805 cutoff)
764 } else { 806 )
765 cat("Heatmap for peptides whose", 807 } else {
766 sprintf("adjusted p-value < %0.2f\n", 808 cat("Heatmap for peptides whose",
767 cutoff) 809 sprintf("adjusted p-value < %0.2f\n",
768 ) 810 cutoff)
769 } 811 )
770 cat("\\newline\n") 812 }
771 cat("\\newline\n") 813 cat("\\newline\n")
772 op <- par("cex.main") 814 cat("\\newline\n")
773 try( 815 op <- par("cex.main")
774 if (nrow(m) > 1) { 816 try(
775 par(cex.main = 0.6) 817 if (nrow(m) > 1) {
776 heatmap( 818 par(cex.main = 0.6)
777 m[how_many_peptides:1, ], 819 heatmap(
778 Rowv = NA, 820 m[how_many_peptides:1, ],
779 Colv = NA, 821 Rowv = NA,
780 cexRow = 0.7, 822 Colv = NA,
781 cexCol = 0.8, 823 cexRow = 0.7,
782 scale = "row", 824 cexCol = 0.8,
783 margins = margins, 825 scale = "row",
784 main = 826 #ACE scale = "none",
785 "Heatmap of unimputed, unnormalized intensities", 827 margins = margins,
786 xlab = "" 828 main =
787 ) 829 "Heatmap of unimputed, unnormalized intensities",
788 } 830 xlab = ""
789 ) 831 )
790 par(op) 832 }
833 )
834 par(op)
791 } 835 }
792 } 836 }
793 } 837 }
794 } 838 }
795 ``` 839 ```