Mercurial > repos > eschen42 > mqppep_anova
comparison mqppep_anova_script.Rmd @ 24:8582a9797c18 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit c9e47049958ea3b12e30b9bd8884d48147c45edd
author | eschen42 |
---|---|
date | Thu, 14 Jul 2022 02:12:33 +0000 |
parents | 61adb8801b73 |
children | f9cd87ac8006 |
comparison
equal
deleted
inserted
replaced
23:3911581e639a | 24:8582a9797c18 |
---|---|
19 params: | 19 params: |
20 alphaFile: "test-data/alpha_levels.tabular" | 20 alphaFile: "test-data/alpha_levels.tabular" |
21 inputFile: "test-data/test_input_for_anova.tabular" | 21 inputFile: "test-data/test_input_for_anova.tabular" |
22 preprocDb: "test-data/test_input_for_anova.sqlite" | 22 preprocDb: "test-data/test_input_for_anova.sqlite" |
23 kseaAppPrepDb: !r c(":memory:", "test-data/mqppep.sqlite")[2] | 23 kseaAppPrepDb: !r c(":memory:", "test-data/mqppep.sqlite")[2] |
24 regexSampleNames: "\\.\\d+[A-Z]$" | |
25 regexSampleGrouping: "\\d+" | |
24 show_toc: true | 26 show_toc: true |
25 firstDataColumn: "^Intensity[^_]" | 27 firstDataColumn: "^Intensity[^_]" |
26 imputationMethod: !r c("group-median", "median", "mean", "random")[1] | 28 imputationMethod: !r c("group-median", "median", "mean", "random")[4] |
27 meanPercentile: 1 | 29 meanPercentile: 1 |
28 sdPercentile: 1.0 | 30 sdPercentile: 1.0 |
29 regexSampleNames: "\\.\\d+[A-Z]$" | |
30 regexSampleGrouping: "\\d+" | |
31 imputedDataFilename: "test-data/limbo/imputedDataFilename.txt" | 31 imputedDataFilename: "test-data/limbo/imputedDataFilename.txt" |
32 imputedQNLTDataFile: "test-data/limbo/imputedQNLTDataFile.txt" | 32 imputedQNLTDataFile: "test-data/limbo/imputedQNLTDataFile.txt" |
33 anovaKseaMetadata: "test-data/limbo/anovaKseaMetadata.txt" | 33 anovaKseaMetadata: "test-data/limbo/anovaKseaMetadata.txt" |
34 oneWayManyCategories: !r c("aov", "kruskal.test", "oneway.test")[1] | 34 oneWayManyCategories: !r c("aov", "kruskal.test", "oneway.test")[1] |
35 oneWayTwoCategories: !r c("aov", "kruskal.test", "oneway.test")[3] | 35 oneWayTwoCategories: !r c("aov", "kruskal.test", "oneway.test")[3] |
37 kseaCutoffThreshold: !r c( 0.1, 0.05)[2] | 37 kseaCutoffThreshold: !r c( 0.1, 0.05)[2] |
38 kseaMinKinaseCount: 1 | 38 kseaMinKinaseCount: 1 |
39 intensityHeatmapRows: 75 | 39 intensityHeatmapRows: 75 |
40 --- | 40 --- |
41 <!-- | 41 <!-- |
42 kseaCutoffStatistic: !r c("p.value", "FDR")[2] | |
43 kseaCutoffThreshold: !r c(0.05, 0.1)[1] | |
44 | |
45 alphaFile: "test-data/alpha_levels.tabular" | 42 alphaFile: "test-data/alpha_levels.tabular" |
46 inputFile: "test-data/test_input_for_anova.tabular" | 43 inputFile: "test-data/test_input_for_anova.tabular" |
47 preprocDb: "test-data/test_input_for_anova.sqlite" | 44 preprocDb: "test-data/test_input_for_anova.sqlite" |
48 kseaAppPrepDb: !r c(":memory:", "test-data/mqppep.sqlite")[2] | 45 kseaAppPrepDb: !r c(":memory:", "test-data/mqppep.sqlite")[2] |
46 regexSampleNames: "\\.\\d+[A-Z]$" | |
47 regexSampleGrouping: "\\d+" | |
48 | |
49 alphaFile: "test-data/alpha_levels.tabular" | |
50 inputFile: "test-data/PDX_pST_by_trt.ppep_intensities.ppep_map.preproc_tab.tabular" | |
51 preprocDb: "test-data/PDX_pST_by_trt.ppep_intensities.ppep_map.preproc_sqlite.sqlite" | |
52 kseaAppPrepDb: !r c(":memory:", "test-data/mqppep.sqlite")[2] | |
53 regexSampleNames: "\\.\\w+\\.\\d+[A-Z]$" | |
54 regexSampleGrouping: "\\w+" | |
55 | |
56 kseaCutoffStatistic: !r c("p.value", "FDR")[2] | |
57 kseaCutoffThreshold: !r c(0.05, 0.1)[1] | |
49 | 58 |
50 alphaFile: "test-data/alpha_levels.tabular" | 59 alphaFile: "test-data/alpha_levels.tabular" |
51 inputFile: "test-data/UT_phospho_ST_sites.preproc.tabular" | 60 inputFile: "test-data/UT_phospho_ST_sites.preproc.tabular" |
52 preprocDb: "test-data/UT_phospho_ST_sites.preproc.sqlite" | 61 preprocDb: "test-data/UT_phospho_ST_sites.preproc.sqlite" |
53 kseaAppPrepDb: !r c(":memory:", "test-data/UT_phospho_ST_sites.ksea.sqlite")[2] | 62 kseaAppPrepDb: !r c(":memory:", "test-data/UT_phospho_ST_sites.ksea.sqlite")[2] |
63 regexSampleNames: "\\.\\d+[A-Z]$" | |
64 regexSampleGrouping: "\\d+" | |
54 | 65 |
55 alphaFile: "test-data/alpha_levels.tabular" | 66 alphaFile: "test-data/alpha_levels.tabular" |
56 inputFile: "test-data/pY_Sites_NancyDu.txt.ppep_intensities.ppep_map.preproc.tabular" | 67 inputFile: "test-data/pY_Sites_NancyDu.txt.ppep_intensities.ppep_map.preproc.tabular" |
57 preprocDb: "test-data/pY_Sites_NancyDu.txt.ppep_intensities.ppep_map.preproc.sqlite" | 68 preprocDb: "test-data/pY_Sites_NancyDu.txt.ppep_intensities.ppep_map.preproc.sqlite" |
58 kseaAppPrepDb: !r c(":memory:", "test-data/pST_Sites_NancyDu.ksea.sqlite")[2] | 69 kseaAppPrepDb: !r c(":memory:", "test-data/pY_Sites_NancyDu.ksea.sqlite")[2] |
70 regexSampleNames: "\\.\\d+[A-Z]$" | |
71 regexSampleGrouping: "\\d+" | |
59 | 72 |
60 alphaFile: "test-data/alpha_levels.tabular" | 73 alphaFile: "test-data/alpha_levels.tabular" |
61 inputFile: "test-data/pST_Sites_NancyDu.txt.preproc.tabular" | 74 inputFile: "test-data/pST_Sites_NancyDu.txt.preproc.tabular" |
62 preprocDb: "test-data/pST_Sites_NancyDu.txt.preproc.sqlite" | 75 preprocDb: "test-data/pST_Sites_NancyDu.txt.preproc.sqlite" |
63 kseaAppPrepDb: !r c(":memory:", "test-data/pST_Sites_NancyDu.ksea.sqlite")[2] | 76 kseaAppPrepDb: !r c(":memory:", "test-data/pST_Sites_NancyDu.ksea.sqlite")[2] |
666 ) | 679 ) |
667 ) | 680 ) |
668 | 681 |
669 k <- k[selector < ksea_cutoff_threshold, ] | 682 k <- k[selector < ksea_cutoff_threshold, ] |
670 | 683 |
671 if (nrow(k) > 1) { | 684 if (nrow(k) > 0) { |
672 op <- par(mai = c(1, 1.5, 0.4, 0.4)) | 685 op <- par(mai = c(1, 1.5, 0.4, 0.4)) |
673 numeric_z_score <- as.numeric(k$z_score) | 686 numeric_z_score <- as.numeric(k$z_score) |
674 z_score_order <- order(numeric_z_score) | 687 z_score_order <- order(numeric_z_score) |
675 kinase_name <- k$kinase_gene | 688 kinase_name <- k$kinase_gene |
676 long_caption <- | 689 long_caption <- |
685 barplot( | 698 barplot( |
686 height = numeric_z_score[z_score_order], | 699 height = numeric_z_score[z_score_order], |
687 border = NA, | 700 border = NA, |
688 xpd = FALSE, | 701 xpd = FALSE, |
689 cex.names = 1.0, | 702 cex.names = 1.0, |
690 cex.axis = 1.0, | |
691 main = long_caption, | 703 main = long_caption, |
692 cex.main = my_cex_caption, | 704 cex.main = my_cex_caption, |
693 names.arg = kinase_name[z_score_order], | 705 names.arg = kinase_name[z_score_order], |
694 horiz = TRUE, | 706 horiz = TRUE, |
695 srt = 45, | 707 srt = 45, |
696 las = 1) | 708 las = 1, |
709 cex.axis = 0.9 | |
710 ) | |
697 par(op) | 711 par(op) |
698 } | 712 } |
699 } | 713 } |
700 } | 714 } |
701 | 715 |
850 } | 864 } |
851 } | 865 } |
852 | 866 |
853 # create_breaks is a helper for ksea_heatmap | 867 # create_breaks is a helper for ksea_heatmap |
854 create_breaks <- function(merged_scores) { | 868 create_breaks <- function(merged_scores) { |
869 if (sum(!is.na(merged_scores)) < 2) | |
870 return(NULL) | |
855 if (min(merged_scores, na.rm = TRUE) < -1.6) { | 871 if (min(merged_scores, na.rm = TRUE) < -1.6) { |
856 breaks_neg <- seq(-1.6, 0, length.out = 30) | 872 breaks_neg <- seq(-1.6, 0, length.out = 30) |
857 breaks_neg <- | 873 breaks_neg <- |
858 append( | 874 append( |
859 seq(min(merged_scores, na.rm = TRUE), -1.6, length.out = 10), | 875 seq(min(merged_scores, na.rm = TRUE), -1.6, length.out = 10), |
907 typeof(x), | 923 typeof(x), |
908 "' rather than 'matrix'.\n\n" | 924 "' rather than 'matrix'.\n\n" |
909 ) | 925 ) |
910 ) | 926 ) |
911 } else if (nrow(x) < 2) { | 927 } else if (nrow(x) < 2) { |
912 cat("No plot because matrix x has ", nrow(x), " rows.\n\n") | 928 cat("No plot because matrix has ", nrow(x), " rows.\n\n") |
913 cat("\\begin{verbatim}\n") | 929 return(FALSE) |
914 str(x) | |
915 cat("\\end{verbatim}\n") | |
916 } else if (ncol(x) < 2) { | 930 } else if (ncol(x) < 2) { |
917 cat("No plot because matrix x has ", ncol(x), " columns.\n\n") | 931 cat("No plot because matrix x has ", ncol(x), " columns.\n\n") |
918 cat("\\begin{verbatim}\n") | 932 return(FALSE) |
919 str(x) | |
920 cat("\\end{verbatim}\n") | |
921 } else { | 933 } else { |
934 my_limit <- 25 | |
935 my_cex_col <- my_limit / (my_limit + ncol(x)) | |
936 my_cex_row <- my_limit / (my_limit + nrow(x)) | |
937 my_scale <- 12.0 | |
938 if (ncol(x) < 10 && nrow(x) < 10) | |
939 my_scale <- my_scale * 10 / (10 - nrow(x)) * 10 / (10 - ncol(x)) | |
922 gplots::heatmap.2( | 940 gplots::heatmap.2( |
923 x = merged_scores, | 941 x = merged_scores, |
924 Colv = sample_cluster, | 942 Colv = sample_cluster, |
943 breaks = color_breaks[[1]], | |
944 cellnote = merged_asterisk, | |
945 cexCol = 0.9 * my_cex_col, | |
946 cexRow = 2 * my_cex_row, | |
947 col = color_breaks[[2]], | |
948 density.info = "none", | |
949 key = FALSE, | |
950 lhei = c(0.4, 8.0, 1.1), | |
951 lmat = rbind(c(0, 3), c(2, 1), c(0, 4)), | |
952 lwid = c(0.5, 3), | |
953 margins = margins, | |
954 notecex = my_scale * my_cex_row * my_cex_col, | |
955 notecol = "white", | |
925 scale = "none", | 956 scale = "none", |
926 cellnote = merged_asterisk, | |
927 notecol = "white", | |
928 cexCol = 0.9, | |
929 # Heuristically assign size of row labels | |
930 cexRow = min(1.0, ((3 * my_cex_row) ^ 1.7) / 2.25), | |
931 srtCol = 45, | 957 srtCol = 45, |
932 srtRow = 45, | 958 srtRow = 45, |
933 notecex = 3 * my_cex_row, | |
934 col = color_breaks[[2]], | |
935 density.info = "none", | |
936 trace = "none", | 959 trace = "none", |
937 breaks = color_breaks[[1]], | |
938 lmat = rbind(c(0, 3), c(2, 1), c(0, 4)), | |
939 lhei = c(0.4, 8.0, 1.1), | |
940 lwid = c(0.5, 3), | |
941 key = FALSE, | |
942 margins = margins, | |
943 ... | 960 ... |
944 ) | 961 ) |
962 return(TRUE) | |
945 } | 963 } |
946 } | 964 } |
947 | 965 |
948 # Adapted from KSEAapp::KSEA.Heatmap | 966 # Adapted from KSEAapp::KSEA.Heatmap |
949 ksea_heatmap <- function( | 967 ksea_heatmap <- function( |
991 paste(names, i, sep = ".") | 1009 paste(names, i, sep = ".") |
992 } | 1010 } |
993 master <- | 1011 master <- |
994 Reduce( | 1012 Reduce( |
995 f = function(...) { | 1013 f = function(...) { |
996 base::merge(..., by = "Kinase.Gene", all = FALSE) | 1014 base::merge(..., by = "Kinase.Gene", all = TRUE) |
997 }, | 1015 }, |
998 x = score_list_m | 1016 x = score_list_m |
999 ) | 1017 ) |
1000 | 1018 |
1001 row.names(master) <- master$Kinase.Gene | 1019 row.names(master) <- master$Kinase.Gene |
1023 asterisk_rows <- rowSums(merged_asterisk == "*") > 0 | 1041 asterisk_rows <- rowSums(merged_asterisk == "*") > 0 |
1024 all_rows <- rownames(merged_stats) | 1042 all_rows <- rownames(merged_stats) |
1025 names(asterisk_rows) <- all_rows | 1043 names(asterisk_rows) <- all_rows |
1026 non_asterisk_rows <- names(asterisk_rows[asterisk_rows == FALSE]) | 1044 non_asterisk_rows <- names(asterisk_rows[asterisk_rows == FALSE]) |
1027 asterisk_rows <- names(asterisk_rows[asterisk_rows == TRUE]) | 1045 asterisk_rows <- names(asterisk_rows[asterisk_rows == TRUE]) |
1028 merged_scores_asterisk <- merged_scores[names(asterisk_rows), ] | 1046 merged_scores_asterisk <- merged_scores[names(asterisk_rows), , drop = FALSE] |
1029 merged_scores_non_asterisk <- merged_scores[names(non_asterisk_rows), ] | 1047 merged_scores_non_asterisk <- merged_scores[names(non_asterisk_rows), , drop = FALSE] |
1030 # end hack to print only significant rows | 1048 # end hack to print only significant rows |
1031 | 1049 |
1032 row_list <- list() | 1050 row_list <- list() |
1033 row_list[[const_ksea_astrsk_kinases]] <- asterisk_rows | 1051 row_list[[const_ksea_astrsk_kinases]] <- asterisk_rows |
1034 row_list[[const_ksea_all_kinases]] <- all_rows | 1052 row_list[[const_ksea_all_kinases]] <- all_rows |
1035 row_list[[const_ksea_nonastrsk_kinases]] <- non_asterisk_rows | 1053 row_list[[const_ksea_nonastrsk_kinases]] <- non_asterisk_rows |
1036 | 1054 |
1037 i <- which_kinases | 1055 i <- which_kinases |
1038 my_row_names <- row_list[[i]] | 1056 my_row_names <- row_list[[i]] |
1039 scrs <- merged_scores[my_row_names, ] | 1057 scrs <- merged_scores[my_row_names, , drop = FALSE] |
1040 stts <- merged_stats[my_row_names, ] | 1058 stts <- merged_stats[my_row_names, , drop = FALSE] |
1041 merged_asterisk <- as.matrix(asterisk(stts, p_cutoff)) | 1059 merged_asterisk <- as.matrix(asterisk(stts, p_cutoff)) |
1042 | 1060 |
1043 color_breaks <- create_breaks(scrs) | 1061 color_breaks <- create_breaks(scrs) |
1062 if (is.null(color_breaks)) { | |
1063 cat("No plot because matrix has too many missing values.\n\n") | |
1064 return(NULL) | |
1065 } | |
1044 plot_height <- nrow(scrs) ^ 0.55 | 1066 plot_height <- nrow(scrs) ^ 0.55 |
1045 plot_width <- ncol(scrs) ^ 0.7 | 1067 plot_width <- ncol(scrs) ^ 0.7 |
1046 my_cex_row <- 0.25 * 16 / plot_height | 1068 my_cex_row <- 0.25 * 16 / plot_height |
1047 if (export == "TRUE") { | 1069 if (export == "TRUE") { |
1048 png( | 1070 png( |
1051 height = 2 * plot_height * 300, | 1073 height = 2 * plot_height * 300, |
1052 res = 300, | 1074 res = 300, |
1053 pointsize = 14 | 1075 pointsize = 14 |
1054 ) | 1076 ) |
1055 } | 1077 } |
1056 draw_kseaapp_summary_heatmap( | 1078 did_draw <- draw_kseaapp_summary_heatmap( |
1057 x = scrs, | 1079 x = scrs, |
1058 sample_cluster = sample_cluster, | 1080 sample_cluster = sample_cluster, |
1059 merged_asterisk = merged_asterisk, | 1081 merged_asterisk = merged_asterisk, |
1060 my_cex_row = my_cex_row, | 1082 my_cex_row = my_cex_row, |
1061 color_breaks = color_breaks, | 1083 color_breaks = color_breaks, |
1062 margins = margins | 1084 margins = margins |
1063 ) | 1085 ) |
1064 if (export == "TRUE") { | 1086 if (export == "TRUE") { |
1065 dev.off() | 1087 dev.off() |
1066 } | 1088 } |
1089 if (!did_draw) | |
1090 return(NULL) | |
1067 return(my_row_names) | 1091 return(my_row_names) |
1068 } | 1092 } |
1069 | 1093 |
1070 # helper for heatmaps of phosphopeptide intensities | 1094 # helper for heatmaps of phosphopeptide intensities |
1071 | 1095 |
1072 draw_intensity_heatmap <- | 1096 draw_ppep_heatmap <- |
1073 function( | 1097 function( |
1074 m, # matrix with rownames already formatted | 1098 m, # matrix with rownames already formatted |
1075 cutoff, # cutoff used by hm_heading_function | 1099 cutoff, # cutoff used by hm_heading_function |
1076 hm_heading_function, # construct and cat heading from m and cutoff | 1100 hm_heading_function, # construct and cat heading from m and cutoff |
1077 hm_main_title, # main title for plot (drawn below heading) | 1101 hm_main_title, # main title for plot (drawn below heading) |
1082 ) { | 1106 ) { |
1083 peptide_count <- 0 | 1107 peptide_count <- 0 |
1084 # emit the heading for the heatmap | 1108 # emit the heading for the heatmap |
1085 if (hm_heading_function(m, cutoff)) { | 1109 if (hm_heading_function(m, cutoff)) { |
1086 peptide_count <- min(max_peptide_count, nrow(m)) | 1110 peptide_count <- min(max_peptide_count, nrow(m)) |
1087 if (nrow(m) > 1) { | 1111 if (nrow(m) > 0) { |
1088 m_margin <- m[peptide_count:1, ] | 1112 m_margin <- m[peptide_count:1, ] |
1089 # Margin setting was heuristically derived | 1113 # Margin setting was heuristically derived |
1090 margins <- | 1114 margins <- |
1091 c(0.5, # col | 1115 c(0.5, # col |
1092 max(80, sqrt(nchar(rownames(m_margin)))) * 5 / 16 # row | 1116 max(80, sqrt(nchar(rownames(m_margin)))) * 5 / 16 # row |
1093 ) | 1117 ) |
1094 } | 1118 } |
1095 if (nrow(m) > 1) { | 1119 if (nrow(m) > 0) { |
1120 hm_call <- NULL | |
1096 tryCatch( | 1121 tryCatch( |
1097 { | 1122 { |
1098 old_oma <- par("oma") | 1123 old_oma <- par("oma") |
1099 par(cex.main = 0.6) | 1124 par(cex.main = 0.6) |
1100 # Heuristically determined character size adjustment formula | 1125 # Heuristically determined character size adjustment formula |
1101 char_contractor <- | 1126 my_cex_row <- |
1102 250000 / ( | 1127 250000 / ( |
1103 max(4500, (nchar(rownames(m_margin)))^2) * intensity_hm_rows | 1128 max(4500, (nchar(rownames(m_margin)))^2) * intensity_hm_rows |
1104 ) | 1129 ) |
1105 heatmap( | 1130 m_hm <- m[peptide_count:1, , drop = FALSE] |
1106 m[peptide_count:1, ], | 1131 my_limit <- 60 |
1107 Rowv = if (suppress_row_dendrogram) NA else NULL, | 1132 my_cex_col <- 0.75 * my_limit / (my_limit + ncol(m_hm)) |
1108 Colv = NA, | 1133 hm_call <- function(x, scaling, title) { |
1109 cexRow = char_contractor, | 1134 heatmap( |
1110 cexCol = char_contractor * 50 / max_peptide_count, | 1135 x, |
1111 scale = "row", | 1136 Rowv = if (suppress_row_dendrogram) NA else NULL, |
1112 margins = margins, | 1137 Colv = NA, |
1113 main = | 1138 cexRow = my_cex_row, |
1114 "Unimputed, unnormalized log(intensities)", | 1139 cexCol = my_cex_col, |
1115 xlab = "", | 1140 scale = scaling, |
1116 las = 1, | 1141 margins = margins, |
1117 ... | 1142 main = title, |
1118 ) | 1143 xlab = "", |
1144 las = 1, | |
1145 ... | |
1146 ) | |
1147 } | |
1148 if (sum(rowSums(!is.na(m_hm)) < 2)) | |
1149 hm_call( | |
1150 m_hm, | |
1151 "none", | |
1152 "log(intensities), unscaled, unimputed, and unnormalized" | |
1153 ) | |
1154 else | |
1155 hm_call( | |
1156 m_hm, | |
1157 "row", | |
1158 "log(intensities), row-scaled, unimputed, and unnormalized" | |
1159 ) | |
1119 }, | 1160 }, |
1120 error = function(e) { | 1161 error = function(e) { |
1121 cat( | 1162 if (!is.null(hm_call)) { |
1122 sprintf( | 1163 m_hm[is.na(m_hm)] <- 0 |
1123 "\nCould not draw heatmap, possibly because of too many missing values. Internal message: %s\n", | 1164 tryCatch( |
1124 e$message | 1165 { |
1166 if (nrow(m_hm) > 1) | |
1167 hm_call( | |
1168 m_hm, | |
1169 "none", | |
1170 paste( | |
1171 "log(intensities), unscaled, unimputed,", | |
1172 "NAs zeroed, unnormalized" | |
1173 ) | |
1174 ) | |
1175 else | |
1176 cat("\nThere are too few peptides to produce a heatmap.\n") | |
1177 }, | |
1178 error = function(r) { | |
1179 cat( | |
1180 sprintf( | |
1181 "\n%s %s Internal message: %s\n", | |
1182 "Could not draw heatmap,", | |
1183 "possibly because of too many missing values.", | |
1184 r$message | |
1185 ) | |
1186 ) | |
1187 } | |
1188 ) | |
1189 } else { | |
1190 cat( | |
1191 "\nCould not draw heatmap, possibly because of too many missing values.\n" | |
1125 ) | 1192 ) |
1126 ) | 1193 } |
1127 }, | 1194 }, |
1128 finally = par(old_oma) | 1195 finally = par(old_oma) |
1129 ) | 1196 ) |
1130 } | 1197 } |
1131 } | 1198 } |
1132 return(peptide_count) | 1199 return(peptide_count) |
1289 quote = "", | 1356 quote = "", |
1290 check.names = FALSE | 1357 check.names = FALSE |
1291 ) | 1358 ) |
1292 ``` | 1359 ``` |
1293 | 1360 |
1294 # Extract Sample Names and Treatment Levels | 1361 # Extract Sample Classes and Names |
1295 | 1362 |
1296 Column names parsed from input file are shown in Table 1; sample names and treatment levels, in Table 2. | 1363 Column names parsed from input file are shown in Table 1; sample classes and names, in Table 2. |
1297 | 1364 |
1298 ```{r echo = FALSE, results = 'asis'} | 1365 ```{r echo = FALSE, results = 'asis'} |
1299 | 1366 |
1300 data_column_indices <- grep(first_data_column, names(full_data), perl = TRUE) | 1367 data_column_indices <- grep(first_data_column, names(full_data), perl = TRUE) |
1301 | 1368 |
1321 # Write column names as a LaTeX enumerated list. | 1388 # Write column names as a LaTeX enumerated list. |
1322 column_name_df <- data.frame( | 1389 column_name_df <- data.frame( |
1323 column = seq_len(length(colnames(full_data))), | 1390 column = seq_len(length(colnames(full_data))), |
1324 name = paste0("\\verb@", colnames(full_data), "@") | 1391 name = paste0("\\verb@", colnames(full_data), "@") |
1325 ) | 1392 ) |
1393 cat("\n\\begin{tiny}\n") | |
1326 data_frame_latex( | 1394 data_frame_latex( |
1327 x = column_name_df, | 1395 x = column_name_df, |
1328 justification = "l l", | 1396 justification = "l l", |
1329 centered = TRUE, | 1397 centered = TRUE, |
1330 caption = "Input data column names", | 1398 caption = "Input data column names", |
1331 anchor = const_table_anchor_bp, | 1399 anchor = const_table_anchor_bp, |
1332 underscore_whack = FALSE | 1400 underscore_whack = FALSE |
1333 ) | 1401 ) |
1402 cat("\n\\end{tiny}\n") | |
1334 | 1403 |
1335 ``` | 1404 ``` |
1336 | 1405 |
1337 ```{r echo = FALSE, results = 'asis'} | 1406 ```{r echo = FALSE, results = 'asis'} |
1338 quant_data <- full_data[first_data_column:length(full_data)] | 1407 quant_data <- full_data[first_data_column:length(full_data)] |
1356 | 1425 |
1357 rx_match <- regexpr(regex_sample_grouping, sample_name_matches, perl = TRUE) | 1426 rx_match <- regexpr(regex_sample_grouping, sample_name_matches, perl = TRUE) |
1358 sample_treatment_levels <- as.factor(regmatches(sample_name_matches, rx_match)) | 1427 sample_treatment_levels <- as.factor(regmatches(sample_name_matches, rx_match)) |
1359 number_of_samples <- length(sample_name_matches) | 1428 number_of_samples <- length(sample_name_matches) |
1360 sample_treatment_df <- data.frame( | 1429 sample_treatment_df <- data.frame( |
1361 level = sample_treatment_levels, | 1430 class = sample_treatment_levels, |
1431 sample = sample_name_matches | |
1432 ) | |
1433 # reorder data | |
1434 if (TRUE) { | |
1435 my_order <- with(sample_treatment_df, order(class, sample)) | |
1436 quant_data <- quant_data[, my_order] | |
1437 sample_name_matches <- sample_name_matches[my_order] | |
1438 sample_treatment_levels <- sample_treatment_levels[my_order] | |
1439 } | |
1440 sample_treatment_df <- data.frame( | |
1441 class = sample_treatment_levels, | |
1362 sample = sample_name_matches | 1442 sample = sample_name_matches |
1363 ) | 1443 ) |
1364 data_frame_latex( | 1444 data_frame_latex( |
1365 x = sample_treatment_df, | 1445 x = sample_treatment_df, |
1366 justification = "rp{0.2\\linewidth} lp{0.3\\linewidth}", | 1446 justification = "rp{0.2\\linewidth} lp{0.3\\linewidth}", |
1367 centered = TRUE, | 1447 centered = TRUE, |
1368 caption = "Treatment levels", | 1448 caption = "Sample classes", |
1369 anchor = const_table_anchor_tbp, | 1449 anchor = const_table_anchor_tbp, |
1370 underscore_whack = FALSE | 1450 underscore_whack = FALSE |
1371 ) | 1451 ) |
1452 sample_name_shrink <- 10 / (10 + max(nchar(sample_name_matches))) | |
1372 ``` | 1453 ``` |
1373 | 1454 |
1374 ```{r echo = FALSE, results = 'asis'} | 1455 ```{r echo = FALSE, results = 'asis'} |
1375 cat("\\newpage\n") | 1456 cat("\\newpage\n") |
1376 ``` | 1457 ``` |
1393 ) | 1474 ) |
1394 # ref: https://r-charts.com/distribution/add-points-boxplot/ | 1475 # ref: https://r-charts.com/distribution/add-points-boxplot/ |
1395 # Vertical plot | 1476 # Vertical plot |
1396 boxplot( | 1477 boxplot( |
1397 quant_data_log | 1478 quant_data_log |
1398 , las = 1 | 1479 , las = 2 |
1480 , cex.axis = 0.9 * sample_name_shrink | |
1399 , col = const_boxplot_fill | 1481 , col = const_boxplot_fill |
1400 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") | 1482 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") |
1401 , xlab = "Sample" | 1483 , xlab = "Sample" |
1402 ) | 1484 ) |
1403 par(old_par) | 1485 par(old_par) |
1783 | 1865 |
1784 # Vertical plot | 1866 # Vertical plot |
1785 colnames(blue_dots) <- sample_name_matches | 1867 colnames(blue_dots) <- sample_name_matches |
1786 boxplot( | 1868 boxplot( |
1787 blue_dots | 1869 blue_dots |
1788 , las = 1 # "always horizontal" | 1870 , las = 2 # "always vertical" |
1871 , cex.axis = 0.9 * sample_name_shrink | |
1789 , col = const_boxplot_fill | 1872 , col = const_boxplot_fill |
1790 , ylim = ylim | 1873 , ylim = ylim |
1791 , main = "Peptide intensities after eliminating unusable peptides" | 1874 , main = "Peptide intensities after eliminating unusable peptides" |
1792 , sub = boxplot_sub | 1875 , sub = boxplot_sub |
1793 , xlab = "Sample" | 1876 , xlab = "Sample" |
1829 side = "left", | 1912 side = "left", |
1830 plotCentre = "line", | 1913 plotCentre = "line", |
1831 ylim = ylim_save, | 1914 ylim = ylim_save, |
1832 main = "Distributions of observed and imputed data", | 1915 main = "Distributions of observed and imputed data", |
1833 sub = "Light blue = observed data; Pink = imputed data", | 1916 sub = "Light blue = observed data; Pink = imputed data", |
1917 las = 2, | |
1918 cex.axis = 0.9 * sample_name_shrink, | |
1834 xlab = "Sample", | 1919 xlab = "Sample", |
1835 ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") | 1920 ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") |
1836 ) | 1921 ) |
1837 red_violins <- lapply(red_dots, function(x) x[!is.na(x)]) | 1922 red_violins <- lapply(red_dots, function(x) x[!is.na(x)]) |
1838 cols_to_delete <- c() | 1923 cols_to_delete <- c() |
2041 # ref: https://r-charts.com/distribution/add-points-boxplot/ | 2126 # ref: https://r-charts.com/distribution/add-points-boxplot/ |
2042 # Vertical plot | 2127 # Vertical plot |
2043 colnames(quant_data_log) <- sample_name_matches | 2128 colnames(quant_data_log) <- sample_name_matches |
2044 boxplot( | 2129 boxplot( |
2045 quant_data_log | 2130 quant_data_log |
2046 , las = 1 | 2131 , las = 2 |
2132 , cex.axis = 0.9 * sample_name_shrink | |
2047 , col = const_boxplot_fill | 2133 , col = const_boxplot_fill |
2048 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") | 2134 , ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") |
2049 , xlab = "Sample" | 2135 , xlab = "Sample" |
2050 ) | 2136 ) |
2051 par(old_par) | 2137 par(old_par) |
2082 , data_table_imp_qn_lt[, first_data_column] | 2168 , data_table_imp_qn_lt[, first_data_column] |
2083 ) | 2169 ) |
2084 colnames(connect_df) <- c("Phosphopeptide", "Intensity") | 2170 colnames(connect_df) <- c("Phosphopeptide", "Intensity") |
2085 ``` | 2171 ``` |
2086 | 2172 |
2087 ```{r echo = FALSE, fig.dim = c(9, 10), results = 'asis'} | 2173 ```{r anova, echo = FALSE, fig.dim = c(9, 10), results = 'asis'} |
2088 count_of_treatment_levels <- length(levels(sample_treatment_levels)) | 2174 count_of_treatment_levels <- length(levels(sample_treatment_levels)) |
2089 if (count_of_treatment_levels < 2) { | 2175 if (count_of_treatment_levels < 2) { |
2090 nuke_control_sequences <- | 2176 nuke_control_sequences <- |
2091 function(s) { | 2177 function(s) { |
2092 s <- gsub("[\\]", "xyzzy_plugh", s) | 2178 s <- gsub("[\\]", "xyzzy_plugh", s) |
2154 ) | 2240 ) |
2155 | 2241 |
2156 p_value_data_anova_ps_fdr <- | 2242 p_value_data_anova_ps_fdr <- |
2157 p.adjust(p_value_data_anova_ps, method = "fdr") | 2243 p.adjust(p_value_data_anova_ps, method = "fdr") |
2158 p_value_data <- data.frame( | 2244 p_value_data <- data.frame( |
2159 phosphopeptide = full_data[, 1] | 2245 phosphopeptide = full_data[, 1], |
2160 , | 2246 raw_anova_p = p_value_data_anova_ps, |
2161 raw_anova_p = p_value_data_anova_ps | |
2162 , | |
2163 fdr_adjusted_anova_p = p_value_data_anova_ps_fdr | 2247 fdr_adjusted_anova_p = p_value_data_anova_ps_fdr |
2164 ) | 2248 ) |
2165 | 2249 |
2166 # output ANOVA file to constructed filename, | 2250 # output ANOVA file to constructed filename, |
2167 # e.g. "Outputfile_pST_ANOVA_STEP5.txt" | 2251 # e.g. "Outputfile_pST_ANOVA_STEP5.txt" |
2255 colnames(filtered_data_filtered) <- sample_name_matches | 2339 colnames(filtered_data_filtered) <- sample_name_matches |
2256 tryCatch( | 2340 tryCatch( |
2257 boxplot( | 2341 boxplot( |
2258 filtered_data_filtered, | 2342 filtered_data_filtered, |
2259 main = "Imputed, normalized intensities", # no line plot | 2343 main = "Imputed, normalized intensities", # no line plot |
2260 las = 1, | 2344 las = 2, |
2345 cex.axis = 0.9 * sample_name_shrink, | |
2261 col = const_boxplot_fill, | 2346 col = const_boxplot_fill, |
2262 ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") | 2347 ylab = latex2exp::TeX("$log_{10}$(peptide intensity)") |
2263 ), | 2348 ), |
2264 error = function(e) print(e) | 2349 error = function(e) print(e) |
2265 ) | 2350 ) |
2307 | 2392 |
2308 anova_filtered_merge_format <- sapply( | 2393 anova_filtered_merge_format <- sapply( |
2309 X = filtered_p$fdr_adjusted_anova_p | 2394 X = filtered_p$fdr_adjusted_anova_p |
2310 , | 2395 , |
2311 FUN = function(x) { | 2396 FUN = function(x) { |
2312 if (x > 0.0001) | 2397 if (x > 0.01) |
2313 paste0("(%0.", 1 + ceiling(-log10(x)), "f) %s") | 2398 paste0("%s (%0.", 1 + ceiling(-log10(x)), "f)") |
2314 else | 2399 else |
2315 paste0("(%0.4e) %s") | 2400 paste0("%s (%0.2e)") |
2316 } | 2401 } |
2317 ) | 2402 ) |
2318 | 2403 |
2319 cat_hm_heading <- function(m, cutoff) { | 2404 cat_hm_heading <- function(m, cutoff) { |
2320 cat("\\newpage\n") | |
2321 if (nrow(m) > intensity_hm_rows) { | 2405 if (nrow(m) > intensity_hm_rows) { |
2406 cat("\\newpage\n") | |
2322 subsection_header( | 2407 subsection_header( |
2323 paste( | 2408 paste( |
2324 sprintf("Heatmap for the %d most-significant peptides", | 2409 sprintf("Heatmap for the %d most-significant peptides", |
2325 intensity_hm_rows), | 2410 intensity_hm_rows), |
2326 sprintf("whose adjusted p-value < %0.2f\n", cutoff) | 2411 sprintf("whose adjusted p-value < %0.2f\n", cutoff) |
2327 ) | 2412 ) |
2328 ) | 2413 ) |
2329 } else { | 2414 } else { |
2330 if (nrow(m) == 1) { | 2415 if (nrow(m) == 0) { |
2331 return(FALSE) | 2416 return(FALSE) |
2332 } else { | 2417 } else { |
2333 subsection_header( | 2418 subsection_header( |
2334 paste( | 2419 paste( |
2335 sprintf("Heatmap for %d usable peptides whose", nrow(m)), | 2420 sprintf("Heatmap for %d usable peptides whose", nrow(m)), |
2352 X = seq_len(nrow(m)) | 2437 X = seq_len(nrow(m)) |
2353 , | 2438 , |
2354 FUN = function(i) { | 2439 FUN = function(i) { |
2355 sprintf( | 2440 sprintf( |
2356 anova_filtered_merge_format[i], | 2441 anova_filtered_merge_format[i], |
2357 filtered_p$fdr_adjusted_anova_p[i], | 2442 rownames_m[i], |
2358 rownames_m[i] | 2443 signif(filtered_p$fdr_adjusted_anova_p[i], 2) |
2359 ) | 2444 ) |
2360 } | 2445 } |
2361 ) | 2446 ) |
2362 } | 2447 } |
2363 # draw the heading and heatmap | 2448 # draw the heading and heatmap |
2364 if (nrow(m) > 0) { | 2449 if (nrow(m) > 0) { |
2365 number_of_peptides_found <- | 2450 number_of_peptides_found <- |
2366 draw_intensity_heatmap( | 2451 draw_ppep_heatmap( |
2367 m = m, | 2452 m = m, |
2368 cutoff = cutoff, | 2453 cutoff = cutoff, |
2369 hm_heading_function = cat_hm_heading, | 2454 hm_heading_function = cat_hm_heading, |
2370 hm_main_title = "Unimputed, unnormalized log(intensities)", | 2455 hm_main_title = |
2456 "log(intensities), row-scaled, unimputed, unnormalized", | |
2371 suppress_row_dendrogram = FALSE | 2457 suppress_row_dendrogram = FALSE |
2372 ) | 2458 ) |
2373 } | 2459 } |
2374 } | 2460 } |
2375 } | 2461 } |
2376 } | 2462 } |
2377 cat("\\leavevmode\n\n\n") | 2463 cat("\\leavevmode\n") |
2464 cat("The adjusted ANOVA \\textit{p}-value is shown in parentheses | |
2465 after the phosphopeptide sequence.\n\n") | |
2378 ``` | 2466 ``` |
2379 | 2467 |
2380 ```{r sqlite, echo = FALSE, fig.dim = c(9, 10), results = 'asis'} | 2468 ```{r sqlite, echo = FALSE, fig.dim = c(9, 10), results = 'asis'} |
2381 | 2469 |
2382 if (count_of_treatment_levels > 1) { | 2470 if (count_of_treatment_levels > 1) { |
2708 apply( | 2796 apply( |
2709 X = contrast_cast_data, | 2797 X = contrast_cast_data, |
2710 MARGIN = 1, # apply to rows | 2798 MARGIN = 1, # apply to rows |
2711 FUN = anova_func, | 2799 FUN = anova_func, |
2712 grouping_factor = | 2800 grouping_factor = |
2713 as.factor(as.numeric(grouping_factor$level)), # anova_func arg2 | 2801 as.factor(grouping_factor$level), # anova_func arg2 |
2714 one_way_f = one_way_two_categories, # anova_func arg3 | 2802 one_way_f = one_way_two_categories, # anova_func arg3 |
2715 simplify = TRUE # TRUE is the default for simplify | 2803 simplify = TRUE # TRUE is the default for simplify |
2716 ) | 2804 ) |
2717 contrast_data_adj_p_values <- p.adjust( | 2805 contrast_data_adj_p_values <- p.adjust( |
2718 p = p_value_data_contrast_ps, | 2806 p = p_value_data_contrast_ps, |
3044 cntrst_b_level <- contrast_metadata_df[i_cntrst, "b_level"] | 3132 cntrst_b_level <- contrast_metadata_df[i_cntrst, "b_level"] |
3045 cntrst_fold_change <- contrast_metadata_df[i_cntrst, 6] | 3133 cntrst_fold_change <- contrast_metadata_df[i_cntrst, 6] |
3046 contrast_label <- sprintf("%s -> %s", cntrst_b_level, cntrst_a_level) | 3134 contrast_label <- sprintf("%s -> %s", cntrst_b_level, cntrst_a_level) |
3047 contrast_longlabel <- ( | 3135 contrast_longlabel <- ( |
3048 sprintf( | 3136 sprintf( |
3049 "Trt %s {%s} -> Trt %s {%s}", | 3137 "Class %s -> Class %s", |
3050 contrast_metadata_df[i_cntrst, "b_level"], | 3138 contrast_metadata_df[i_cntrst, "b_level"], |
3051 gsub( | 3139 contrast_metadata_df[i_cntrst, "a_level"] |
3052 pattern = ";", | |
3053 replacement = ", ", | |
3054 x = contrast_metadata_df[i_cntrst, "b_samples"], | |
3055 fixed = TRUE | |
3056 ), | |
3057 contrast_metadata_df[i_cntrst, "a_level"], | |
3058 gsub( | |
3059 pattern = ";", | |
3060 replacement = ", ", | |
3061 x = contrast_metadata_df[i_cntrst, "a_samples"], | |
3062 fixed = TRUE | |
3063 ) | |
3064 ) | 3140 ) |
3065 ) | 3141 ) |
3066 kseaapp_input <- | 3142 kseaapp_input <- |
3067 sqldf::sqldf( | 3143 sqldf::sqldf( |
3068 x = sprintf(" | 3144 x = sprintf(" |
3163 # - 1 : all kinases | 3239 # - 1 : all kinases |
3164 # - 2 : significant kinases | 3240 # - 2 : significant kinases |
3165 # - 3 : non-significant kinases | 3241 # - 3 : non-significant kinases |
3166 which_kinases = which_kinases | 3242 which_kinases = which_kinases |
3167 ) | 3243 ) |
3168 cat("\\begin{center}\n") | 3244 if (!is.null(plotted_kinases)) { |
3169 cat("Color intensities reflects $z$-score magnitudes; hue reflects $z$-score sign. Asterisks reflect significance.\n") | 3245 cat("\\begin{center}\n") |
3170 cat("\\end{center}\n") | 3246 cat("Color intensity reflects $z$-score magnitudes; hue reflects $z$-score sign. Asterisks reflect significance.\n") |
3247 cat("\\end{center}\n") | |
3248 } | |
3171 } # end for (i in ... | 3249 } # end for (i in ... |
3172 } # end if (length ... | 3250 } # end if (length ... |
3173 | 3251 |
3174 for (i_cntrst in seq_len(length(rslt$score_list))) { | 3252 for (i_cntrst in seq_len(length(rslt$score_list))) { |
3175 next_index <- i_cntrst | 3253 next_index <- i_cntrst |
3177 cntrst_b_level <- contrast_metadata_df[i_cntrst, "b_level"] | 3255 cntrst_b_level <- contrast_metadata_df[i_cntrst, "b_level"] |
3178 cntrst_fold_change <- contrast_metadata_df[i_cntrst, 6] | 3256 cntrst_fold_change <- contrast_metadata_df[i_cntrst, 6] |
3179 contrast_label <- sprintf("%s -> %s", cntrst_b_level, cntrst_a_level) | 3257 contrast_label <- sprintf("%s -> %s", cntrst_b_level, cntrst_a_level) |
3180 contrast_longlabel <- ( | 3258 contrast_longlabel <- ( |
3181 sprintf( | 3259 sprintf( |
3182 "Trt %s {%s} -> Trt %s {%s}", | 3260 "Class %s -> Class %s", |
3183 contrast_metadata_df[i_cntrst, "b_level"], | 3261 contrast_metadata_df[i_cntrst, "b_level"], |
3184 gsub( | 3262 contrast_metadata_df[i_cntrst, "a_level"] |
3185 pattern = ";", | |
3186 replacement = ", ", | |
3187 x = contrast_metadata_df[i_cntrst, "b_samples"], | |
3188 fixed = TRUE | |
3189 ), | |
3190 contrast_metadata_df[i_cntrst, "a_level"], | |
3191 gsub( | |
3192 pattern = ";", | |
3193 replacement = ", ", | |
3194 x = contrast_metadata_df[i_cntrst, "a_samples"], | |
3195 fixed = TRUE | |
3196 ) | |
3197 ) | 3263 ) |
3198 ) | 3264 ) |
3199 main_title <- ( | 3265 main_title <- ( |
3200 sprintf( | 3266 sprintf( |
3201 "Change from treatment %s to treatment %s", | 3267 "Change from treatment %s to treatment %s", |
3231 enriched_kinases <- data.frame(kinase = ls(ksea_asterisk_hash)) | 3297 enriched_kinases <- data.frame(kinase = ls(ksea_asterisk_hash)) |
3232 all_enriched_substrates <- sqldf(" | 3298 all_enriched_substrates <- sqldf(" |
3233 SELECT | 3299 SELECT |
3234 gene AS kinase, | 3300 gene AS kinase, |
3235 ppep, | 3301 ppep, |
3236 '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label | 3302 sub_gene, |
3303 '('||group_concat(gene||'-'||sub_gene)||') '||ppep AS label, | |
3304 fdr_adjusted_anova_p | |
3237 FROM ( | 3305 FROM ( |
3238 SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep | 3306 SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep |
3239 FROM pseudo_ksdata | 3307 FROM pseudo_ksdata |
3240 WHERE GENE IN (SELECT kinase FROM enriched_kinases) | 3308 WHERE gene IN (SELECT kinase FROM enriched_kinases) |
3241 ) | 3309 ), |
3310 p_value_data | |
3311 WHERE ppep = phosphopeptide | |
3242 GROUP BY ppep | 3312 GROUP BY ppep |
3313 ORDER BY fdr_adjusted_anova_p | |
3243 ") | 3314 ") |
3244 | 3315 |
3245 # helper used to label per-kinase substrate enrichment figure | 3316 # helper used to label per-kinase substrate enrichment figure |
3246 cat_enriched_heading <- function(m, cut_args) { | 3317 cat_enriched_heading <- function(m, cut_args) { |
3247 cutoff <- cut_args$cutoff | 3318 cutoff <- cut_args$cutoff |
3260 ), | 3331 ), |
3261 sprintf(" KSEA %s < %0.2f\n", statistic, threshold) | 3332 sprintf(" KSEA %s < %0.2f\n", statistic, threshold) |
3262 ) | 3333 ) |
3263 ) | 3334 ) |
3264 } else { | 3335 } else { |
3265 if (nrow(m) == 1) { | 3336 if (nrow(m) == 0) { |
3266 return(FALSE) | 3337 return(FALSE) |
3267 } else { | 3338 } else { |
3268 subsection_header( | 3339 subsection_header( |
3269 paste( | 3340 paste( |
3270 sprintf( | 3341 sprintf( |
3285 cat("\n\n\n") | 3356 cat("\n\n\n") |
3286 return(TRUE) | 3357 return(TRUE) |
3287 } | 3358 } |
3288 | 3359 |
3289 # Disabling heatmaps for substrates pending decision whether to eliminate them altogether | 3360 # Disabling heatmaps for substrates pending decision whether to eliminate them altogether |
3290 if (FALSE) | 3361 if (TRUE) |
3291 for (kinase_name in sort(enriched_kinases$kinase)) { | 3362 for (kinase_name in sort(enriched_kinases$kinase)) { |
3292 enriched_substrates <- | 3363 enriched_substrates <- |
3293 all_enriched_substrates[ | 3364 all_enriched_substrates[ |
3294 all_enriched_substrates$kinase == kinase_name, | 3365 all_enriched_substrates$kinase == kinase_name, |
3295 , | 3366 , |
3296 drop = FALSE | 3367 drop = FALSE |
3297 ] | 3368 ] |
3369 enriched_substrates$label <- with( | |
3370 enriched_substrates, | |
3371 sprintf( | |
3372 "(%s-%s) %s (%0.2g)", | |
3373 kinase, | |
3374 sub("$FAILED_MATCH_GENE_NAME", "unidentified", sub_gene, fixed = TRUE), | |
3375 ppep, | |
3376 fdr_adjusted_anova_p | |
3377 ) | |
3378 ) | |
3298 # Get the intensity values for the heatmap | 3379 # Get the intensity values for the heatmap |
3299 enriched_intensities <- | 3380 enriched_intensities <- |
3300 as.matrix(unimputed_quant_data_log[enriched_substrates$ppep, , drop = FALSE]) | 3381 as.matrix(unimputed_quant_data_log[enriched_substrates$ppep, , drop = FALSE]) |
3301 # Remove rows having too many NA values to be relevant | 3382 # Remove rows having too many NA values to be relevant |
3302 na_counter <- is.na(enriched_intensities) | |
3303 na_counts <- apply(na_counter, 1, sum) | |
3304 enriched_intensities <- | |
3305 enriched_intensities[na_counts < ncol(enriched_intensities) / 2, , drop = FALSE] | |
3306 # Rename the rows with the display-name for the heatmap | 3383 # Rename the rows with the display-name for the heatmap |
3307 rownames(enriched_intensities) <- | 3384 rownames(enriched_intensities) <- |
3308 sapply( | 3385 sapply( |
3309 X = rownames(enriched_intensities), | 3386 X = rownames(enriched_intensities), |
3310 FUN = function(rn) { | 3387 FUN = function(rn) { |
3319 cut_args$cutoff <- cutoff | 3396 cut_args$cutoff <- cutoff |
3320 cut_args$kinase <- kinase_name | 3397 cut_args$kinase <- kinase_name |
3321 cut_args$statistic <- ksea_cutoff_statistic | 3398 cut_args$statistic <- ksea_cutoff_statistic |
3322 cut_args$threshold <- ksea_cutoff_threshold | 3399 cut_args$threshold <- ksea_cutoff_threshold |
3323 number_of_peptides_found <- | 3400 number_of_peptides_found <- |
3324 draw_intensity_heatmap( | 3401 draw_ppep_heatmap( |
3325 m = m, | 3402 m = m, |
3326 cutoff = cut_args, | 3403 cutoff = cut_args, |
3327 hm_heading_function = cat_enriched_heading, | 3404 hm_heading_function = cat_enriched_heading, |
3328 hm_main_title | 3405 hm_main_title |
3329 = "Unnormalized (zero-imputed) intensities of enriched kinase-substrates", | 3406 = "Unnormalized (zero-imputed) intensities of enriched kinase-substrates", |
3330 suppress_row_dendrogram = FALSE | 3407 suppress_row_dendrogram = FALSE |
3331 ) | 3408 ) |
3409 if (number_of_peptides_found > 1) { | |
3410 cat("\\leavevmode\n") | |
3411 cat("The kinase-subsrate pair is shown in parentheses | |
3412 before the phosphopeptide sequence.\n\n") | |
3413 cat("The adjusted ANOVA \\textit{p}-value is shown in parentheses | |
3414 after the phosphopeptide sequence.\n\n") | |
3415 } | |
3416 if (nrow(m) == 1) { | |
3417 cat( | |
3418 sprintf( | |
3419 "\n\nSubstrate is %s, | |
3420 \nphopshopeptide is %s, | |
3421 \n\nand adjusted ANOVA \\textit{p}-value is %0.2g.\n", | |
3422 enriched_substrates[1, "sub_gene"], | |
3423 enriched_substrates[1, "ppep"], | |
3424 enriched_substrates[1, "fdr_adjusted_anova_p"] | |
3425 ) | |
3426 ) | |
3427 } | |
3332 } | 3428 } |
3333 } | 3429 } |
3334 | 3430 |
3335 # Write output tabular files | 3431 # Write output tabular files |
3336 | 3432 |
3471 # write parameters to report | 3567 # write parameters to report |
3472 | 3568 |
3473 param_unlist <- unlist(as.list(params)) | 3569 param_unlist <- unlist(as.list(params)) |
3474 param_df <- data.frame( | 3570 param_df <- data.frame( |
3475 parameter = paste0("\\verb@", names(param_unlist), "@"), | 3571 parameter = paste0("\\verb@", names(param_unlist), "@"), |
3476 value = paste0("\\verb@", gsub("$", "\\$", param_unlist, fixed = TRUE), "@") | 3572 value = paste0( |
3573 "\n\\begin{tiny}\n\\verb@", | |
3574 gsub("$", "\\$", param_unlist, fixed = TRUE), | |
3575 "@\n\\end{tiny}" | |
3576 ) | |
3477 ) | 3577 ) |
3478 | 3578 |
3479 data_frame_latex( | 3579 data_frame_latex( |
3480 x = param_df, | 3580 x = param_df, |
3481 justification = "p{0.35\\linewidth} p{0.6\\linewidth}", | 3581 justification = "p{0.35\\linewidth} p{0.6\\linewidth}", |