Repository 'mqppep_anova'
hg clone https://eddie.galaxyproject.org/repos/eschen42/mqppep_anova

Changeset 24:8582a9797c18 (2022-07-14)
Previous changeset 23:3911581e639a (2022-07-11) Next changeset 25:f9cd87ac8006 (2022-07-14)
Commit message:
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit c9e47049958ea3b12e30b9bd8884d48147c45edd
modified:
mqppep_anova_script.Rmd
b
diff -r 3911581e639a -r 8582a9797c18 mqppep_anova_script.Rmd
--- a/mqppep_anova_script.Rmd Mon Jul 11 13:51:14 2022 +0000
+++ b/mqppep_anova_script.Rmd Thu Jul 14 02:12:33 2022 +0000
[
b'@@ -21,13 +21,13 @@\n   inputFile:            "test-data/test_input_for_anova.tabular"\n   preprocDb:            "test-data/test_input_for_anova.sqlite"\n   kseaAppPrepDb:        !r c(":memory:", "test-data/mqppep.sqlite")[2]\n+  regexSampleNames:     "\\\\.\\\\d+[A-Z]$"\n+  regexSampleGrouping:  "\\\\d+"\n   show_toc:             true\n   firstDataColumn:      "^Intensity[^_]"\n-  imputationMethod:     !r c("group-median", "median", "mean", "random")[1]\n+  imputationMethod:     !r c("group-median", "median", "mean", "random")[4]\n   meanPercentile:       1\n   sdPercentile:         1.0\n-  regexSampleNames:     "\\\\.\\\\d+[A-Z]$"\n-  regexSampleGrouping:  "\\\\d+"\n   imputedDataFilename:  "test-data/limbo/imputedDataFilename.txt"\n   imputedQNLTDataFile:  "test-data/limbo/imputedQNLTDataFile.txt"\n   anovaKseaMetadata:    "test-data/limbo/anovaKseaMetadata.txt"\n@@ -39,23 +39,36 @@\n   intensityHeatmapRows: 75\n ---\n <!--\n-  kseaCutoffStatistic:  !r c("p.value", "FDR")[2]\n-  kseaCutoffThreshold:  !r c(0.05, 0.1)[1]\n-\n   alphaFile:            "test-data/alpha_levels.tabular"\n   inputFile:            "test-data/test_input_for_anova.tabular"\n   preprocDb:            "test-data/test_input_for_anova.sqlite"\n   kseaAppPrepDb:        !r c(":memory:", "test-data/mqppep.sqlite")[2]\n+  regexSampleNames:     "\\\\.\\\\d+[A-Z]$"\n+  regexSampleGrouping:  "\\\\d+"\n+\n+  alphaFile:            "test-data/alpha_levels.tabular"\n+  inputFile:            "test-data/PDX_pST_by_trt.ppep_intensities.ppep_map.preproc_tab.tabular"\n+  preprocDb:            "test-data/PDX_pST_by_trt.ppep_intensities.ppep_map.preproc_sqlite.sqlite"\n+  kseaAppPrepDb:        !r c(":memory:", "test-data/mqppep.sqlite")[2]\n+  regexSampleNames:     "\\\\.\\\\w+\\\\.\\\\d+[A-Z]$"\n+  regexSampleGrouping:  "\\\\w+"\n+\n+  kseaCutoffStatistic:  !r c("p.value", "FDR")[2]\n+  kseaCutoffThreshold:  !r c(0.05, 0.1)[1]\n \n   alphaFile:            "test-data/alpha_levels.tabular"\n   inputFile:            "test-data/UT_phospho_ST_sites.preproc.tabular"\n   preprocDb:            "test-data/UT_phospho_ST_sites.preproc.sqlite"\n   kseaAppPrepDb:        !r c(":memory:", "test-data/UT_phospho_ST_sites.ksea.sqlite")[2]\n+  regexSampleNames:     "\\\\.\\\\d+[A-Z]$"\n+  regexSampleGrouping:  "\\\\d+"\n \n   alphaFile:            "test-data/alpha_levels.tabular"\n   inputFile:            "test-data/pY_Sites_NancyDu.txt.ppep_intensities.ppep_map.preproc.tabular"\n   preprocDb:            "test-data/pY_Sites_NancyDu.txt.ppep_intensities.ppep_map.preproc.sqlite"\n-  kseaAppPrepDb:        !r c(":memory:", "test-data/pST_Sites_NancyDu.ksea.sqlite")[2]\n+  kseaAppPrepDb:        !r c(":memory:", "test-data/pY_Sites_NancyDu.ksea.sqlite")[2]\n+  regexSampleNames:     "\\\\.\\\\d+[A-Z]$"\n+  regexSampleGrouping:  "\\\\d+"\n \n   alphaFile:            "test-data/alpha_levels.tabular"\n   inputFile:            "test-data/pST_Sites_NancyDu.txt.preproc.tabular"\n@@ -668,7 +681,7 @@\n \n     k <- k[selector < ksea_cutoff_threshold, ]\n \n-    if (nrow(k) > 1) {\n+    if (nrow(k) > 0) {\n       op <- par(mai = c(1, 1.5, 0.4, 0.4))\n       numeric_z_score <- as.numeric(k$z_score)\n       z_score_order <- order(numeric_z_score)\n@@ -687,13 +700,14 @@\n         border = NA,\n         xpd = FALSE,\n         cex.names = 1.0,\n-        cex.axis = 1.0,\n         main = long_caption,\n         cex.main = my_cex_caption,\n         names.arg = kinase_name[z_score_order],\n         horiz = TRUE,\n         srt = 45,\n-        las = 1)\n+        las = 1,\n+        cex.axis = 0.9\n+        )\n       par(op)\n     }\n   }\n@@ -852,6 +866,8 @@\n \n # create_breaks is a helper for ksea_heatmap\n create_breaks <- function(merged_scores) {\n+  if (sum(!is.na(merged_scores)) < 2)\n+    return(NULL)\n   if (min(merged_scores, na.rm = TRUE) < -1.6) {\n     breaks_neg <- seq(-1.6, 0, length.out = 30)\n     breaks_neg <-\n@@ -909,39 +925,41 @@\n         )\n       )\n   } else if (nrow(x) < 2) {\n-    cat("No plot because matrix x has ", nrow(x), " rows.\\n\\n")\n-    cat("\\\\begin{verbatim}\\n")\n-    str(x)\n-    cat("\\\\end{verbatim}\\n")\n+    cat("No p'..b'eplacement = ", ",\n-        x = contrast_metadata_df[i_cntrst, "b_samples"],\n-        fixed = TRUE\n-      ),\n-      contrast_metadata_df[i_cntrst, "a_level"],\n-      gsub(\n-        pattern = ";",\n-        replacement = ", ",\n-        x = contrast_metadata_df[i_cntrst, "a_samples"],\n-        fixed = TRUE\n-      )\n+      contrast_metadata_df[i_cntrst, "a_level"]\n     )\n   )\n   main_title <- (\n@@ -3233,13 +3299,18 @@\n   SELECT\n     gene AS kinase,\n     ppep,\n-    \'(\'||group_concat(gene||\'-\'||sub_gene)||\') \'||ppep AS label\n+    sub_gene,\n+    \'(\'||group_concat(gene||\'-\'||sub_gene)||\') \'||ppep AS label,\n+    fdr_adjusted_anova_p\n   FROM (\n     SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep\n       FROM pseudo_ksdata\n-      WHERE GENE IN (SELECT kinase FROM enriched_kinases)\n-    )\n+      WHERE gene IN (SELECT kinase FROM enriched_kinases)\n+    ),\n+    p_value_data\n+  WHERE ppep = phosphopeptide\n   GROUP BY ppep\n+  ORDER BY fdr_adjusted_anova_p\n   ")\n \n # helper used to label per-kinase substrate enrichment figure\n@@ -3262,7 +3333,7 @@\n       )\n     )\n   } else {\n-    if (nrow(m) == 1) {\n+    if (nrow(m) == 0) {\n       return(FALSE)\n     } else {\n       subsection_header(\n@@ -3287,7 +3358,7 @@\n }\n \n # Disabling heatmaps for substrates pending decision whether to eliminate them altogether\n-if (FALSE)\n+if (TRUE)\n   for (kinase_name in sort(enriched_kinases$kinase)) {\n     enriched_substrates <-\n       all_enriched_substrates[\n@@ -3295,14 +3366,20 @@\n         ,\n         drop = FALSE\n         ]\n+    enriched_substrates$label <- with(\n+      enriched_substrates,\n+      sprintf(\n+        "(%s-%s) %s (%0.2g)",\n+        kinase,\n+        sub("$FAILED_MATCH_GENE_NAME", "unidentified", sub_gene, fixed = TRUE),\n+        ppep,\n+        fdr_adjusted_anova_p\n+        )\n+      )\n     # Get the intensity values for the heatmap\n     enriched_intensities <-\n       as.matrix(unimputed_quant_data_log[enriched_substrates$ppep, , drop = FALSE])\n     # Remove rows having too many NA values to be relevant\n-    na_counter <- is.na(enriched_intensities)\n-    na_counts <- apply(na_counter, 1, sum)\n-    enriched_intensities <-\n-      enriched_intensities[na_counts < ncol(enriched_intensities) / 2, , drop = FALSE]\n     # Rename the rows with the display-name for the heatmap\n     rownames(enriched_intensities) <-\n       sapply(\n@@ -3321,7 +3398,7 @@\n       cut_args$statistic <- ksea_cutoff_statistic\n       cut_args$threshold <- ksea_cutoff_threshold\n       number_of_peptides_found <-\n-        draw_intensity_heatmap(\n+        draw_ppep_heatmap(\n           m                       = m,\n           cutoff                  = cut_args,\n           hm_heading_function     = cat_enriched_heading,\n@@ -3329,6 +3406,25 @@\n             = "Unnormalized (zero-imputed) intensities of enriched kinase-substrates",\n           suppress_row_dendrogram = FALSE\n         )\n+        if (number_of_peptides_found > 1) {\n+          cat("\\\\leavevmode\\n")\n+          cat("The kinase-subsrate pair is shown in parentheses\n+            before the phosphopeptide sequence.\\n\\n")\n+          cat("The adjusted ANOVA \\\\textit{p}-value is shown in parentheses\n+            after the phosphopeptide sequence.\\n\\n")\n+        }\n+      if (nrow(m) == 1) {\n+        cat(\n+          sprintf(\n+            "\\n\\nSubstrate is %s,\n+            \\nphopshopeptide is %s,\n+            \\n\\nand adjusted ANOVA \\\\textit{p}-value is %0.2g.\\n",\n+            enriched_substrates[1, "sub_gene"],\n+            enriched_substrates[1, "ppep"],\n+            enriched_substrates[1, "fdr_adjusted_anova_p"]\n+            )\n+        )\n+      }\n     }\n   }\n \n@@ -3473,7 +3569,11 @@\n param_unlist <- unlist(as.list(params))\n param_df <- data.frame(\n   parameter = paste0("\\\\verb@", names(param_unlist), "@"),\n-  value = paste0("\\\\verb@", gsub("$", "\\\\$", param_unlist, fixed = TRUE), "@")\n+  value = paste0(\n+    "\\n\\\\begin{tiny}\\n\\\\verb@",\n+    gsub("$", "\\\\$", param_unlist, fixed = TRUE),\n+    "@\\n\\\\end{tiny}"\n+    )\n   )\n \n data_frame_latex(\n'