Previous changeset 23:3911581e639a (2022-07-11) Next changeset 25:f9cd87ac8006 (2022-07-14) |
Commit message:
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit c9e47049958ea3b12e30b9bd8884d48147c45edd |
modified:
mqppep_anova_script.Rmd |
b |
diff -r 3911581e639a -r 8582a9797c18 mqppep_anova_script.Rmd --- a/mqppep_anova_script.Rmd Mon Jul 11 13:51:14 2022 +0000 +++ b/mqppep_anova_script.Rmd Thu Jul 14 02:12:33 2022 +0000 |
[ |
b'@@ -21,13 +21,13 @@\n inputFile: "test-data/test_input_for_anova.tabular"\n preprocDb: "test-data/test_input_for_anova.sqlite"\n kseaAppPrepDb: !r c(":memory:", "test-data/mqppep.sqlite")[2]\n+ regexSampleNames: "\\\\.\\\\d+[A-Z]$"\n+ regexSampleGrouping: "\\\\d+"\n show_toc: true\n firstDataColumn: "^Intensity[^_]"\n- imputationMethod: !r c("group-median", "median", "mean", "random")[1]\n+ imputationMethod: !r c("group-median", "median", "mean", "random")[4]\n meanPercentile: 1\n sdPercentile: 1.0\n- regexSampleNames: "\\\\.\\\\d+[A-Z]$"\n- regexSampleGrouping: "\\\\d+"\n imputedDataFilename: "test-data/limbo/imputedDataFilename.txt"\n imputedQNLTDataFile: "test-data/limbo/imputedQNLTDataFile.txt"\n anovaKseaMetadata: "test-data/limbo/anovaKseaMetadata.txt"\n@@ -39,23 +39,36 @@\n intensityHeatmapRows: 75\n ---\n <!--\n- kseaCutoffStatistic: !r c("p.value", "FDR")[2]\n- kseaCutoffThreshold: !r c(0.05, 0.1)[1]\n-\n alphaFile: "test-data/alpha_levels.tabular"\n inputFile: "test-data/test_input_for_anova.tabular"\n preprocDb: "test-data/test_input_for_anova.sqlite"\n kseaAppPrepDb: !r c(":memory:", "test-data/mqppep.sqlite")[2]\n+ regexSampleNames: "\\\\.\\\\d+[A-Z]$"\n+ regexSampleGrouping: "\\\\d+"\n+\n+ alphaFile: "test-data/alpha_levels.tabular"\n+ inputFile: "test-data/PDX_pST_by_trt.ppep_intensities.ppep_map.preproc_tab.tabular"\n+ preprocDb: "test-data/PDX_pST_by_trt.ppep_intensities.ppep_map.preproc_sqlite.sqlite"\n+ kseaAppPrepDb: !r c(":memory:", "test-data/mqppep.sqlite")[2]\n+ regexSampleNames: "\\\\.\\\\w+\\\\.\\\\d+[A-Z]$"\n+ regexSampleGrouping: "\\\\w+"\n+\n+ kseaCutoffStatistic: !r c("p.value", "FDR")[2]\n+ kseaCutoffThreshold: !r c(0.05, 0.1)[1]\n \n alphaFile: "test-data/alpha_levels.tabular"\n inputFile: "test-data/UT_phospho_ST_sites.preproc.tabular"\n preprocDb: "test-data/UT_phospho_ST_sites.preproc.sqlite"\n kseaAppPrepDb: !r c(":memory:", "test-data/UT_phospho_ST_sites.ksea.sqlite")[2]\n+ regexSampleNames: "\\\\.\\\\d+[A-Z]$"\n+ regexSampleGrouping: "\\\\d+"\n \n alphaFile: "test-data/alpha_levels.tabular"\n inputFile: "test-data/pY_Sites_NancyDu.txt.ppep_intensities.ppep_map.preproc.tabular"\n preprocDb: "test-data/pY_Sites_NancyDu.txt.ppep_intensities.ppep_map.preproc.sqlite"\n- kseaAppPrepDb: !r c(":memory:", "test-data/pST_Sites_NancyDu.ksea.sqlite")[2]\n+ kseaAppPrepDb: !r c(":memory:", "test-data/pY_Sites_NancyDu.ksea.sqlite")[2]\n+ regexSampleNames: "\\\\.\\\\d+[A-Z]$"\n+ regexSampleGrouping: "\\\\d+"\n \n alphaFile: "test-data/alpha_levels.tabular"\n inputFile: "test-data/pST_Sites_NancyDu.txt.preproc.tabular"\n@@ -668,7 +681,7 @@\n \n k <- k[selector < ksea_cutoff_threshold, ]\n \n- if (nrow(k) > 1) {\n+ if (nrow(k) > 0) {\n op <- par(mai = c(1, 1.5, 0.4, 0.4))\n numeric_z_score <- as.numeric(k$z_score)\n z_score_order <- order(numeric_z_score)\n@@ -687,13 +700,14 @@\n border = NA,\n xpd = FALSE,\n cex.names = 1.0,\n- cex.axis = 1.0,\n main = long_caption,\n cex.main = my_cex_caption,\n names.arg = kinase_name[z_score_order],\n horiz = TRUE,\n srt = 45,\n- las = 1)\n+ las = 1,\n+ cex.axis = 0.9\n+ )\n par(op)\n }\n }\n@@ -852,6 +866,8 @@\n \n # create_breaks is a helper for ksea_heatmap\n create_breaks <- function(merged_scores) {\n+ if (sum(!is.na(merged_scores)) < 2)\n+ return(NULL)\n if (min(merged_scores, na.rm = TRUE) < -1.6) {\n breaks_neg <- seq(-1.6, 0, length.out = 30)\n breaks_neg <-\n@@ -909,39 +925,41 @@\n )\n )\n } else if (nrow(x) < 2) {\n- cat("No plot because matrix x has ", nrow(x), " rows.\\n\\n")\n- cat("\\\\begin{verbatim}\\n")\n- str(x)\n- cat("\\\\end{verbatim}\\n")\n+ cat("No p'..b'eplacement = ", ",\n- x = contrast_metadata_df[i_cntrst, "b_samples"],\n- fixed = TRUE\n- ),\n- contrast_metadata_df[i_cntrst, "a_level"],\n- gsub(\n- pattern = ";",\n- replacement = ", ",\n- x = contrast_metadata_df[i_cntrst, "a_samples"],\n- fixed = TRUE\n- )\n+ contrast_metadata_df[i_cntrst, "a_level"]\n )\n )\n main_title <- (\n@@ -3233,13 +3299,18 @@\n SELECT\n gene AS kinase,\n ppep,\n- \'(\'||group_concat(gene||\'-\'||sub_gene)||\') \'||ppep AS label\n+ sub_gene,\n+ \'(\'||group_concat(gene||\'-\'||sub_gene)||\') \'||ppep AS label,\n+ fdr_adjusted_anova_p\n FROM (\n SELECT DISTINCT gene, sub_gene, SUB_MOD_RSD AS ppep\n FROM pseudo_ksdata\n- WHERE GENE IN (SELECT kinase FROM enriched_kinases)\n- )\n+ WHERE gene IN (SELECT kinase FROM enriched_kinases)\n+ ),\n+ p_value_data\n+ WHERE ppep = phosphopeptide\n GROUP BY ppep\n+ ORDER BY fdr_adjusted_anova_p\n ")\n \n # helper used to label per-kinase substrate enrichment figure\n@@ -3262,7 +3333,7 @@\n )\n )\n } else {\n- if (nrow(m) == 1) {\n+ if (nrow(m) == 0) {\n return(FALSE)\n } else {\n subsection_header(\n@@ -3287,7 +3358,7 @@\n }\n \n # Disabling heatmaps for substrates pending decision whether to eliminate them altogether\n-if (FALSE)\n+if (TRUE)\n for (kinase_name in sort(enriched_kinases$kinase)) {\n enriched_substrates <-\n all_enriched_substrates[\n@@ -3295,14 +3366,20 @@\n ,\n drop = FALSE\n ]\n+ enriched_substrates$label <- with(\n+ enriched_substrates,\n+ sprintf(\n+ "(%s-%s) %s (%0.2g)",\n+ kinase,\n+ sub("$FAILED_MATCH_GENE_NAME", "unidentified", sub_gene, fixed = TRUE),\n+ ppep,\n+ fdr_adjusted_anova_p\n+ )\n+ )\n # Get the intensity values for the heatmap\n enriched_intensities <-\n as.matrix(unimputed_quant_data_log[enriched_substrates$ppep, , drop = FALSE])\n # Remove rows having too many NA values to be relevant\n- na_counter <- is.na(enriched_intensities)\n- na_counts <- apply(na_counter, 1, sum)\n- enriched_intensities <-\n- enriched_intensities[na_counts < ncol(enriched_intensities) / 2, , drop = FALSE]\n # Rename the rows with the display-name for the heatmap\n rownames(enriched_intensities) <-\n sapply(\n@@ -3321,7 +3398,7 @@\n cut_args$statistic <- ksea_cutoff_statistic\n cut_args$threshold <- ksea_cutoff_threshold\n number_of_peptides_found <-\n- draw_intensity_heatmap(\n+ draw_ppep_heatmap(\n m = m,\n cutoff = cut_args,\n hm_heading_function = cat_enriched_heading,\n@@ -3329,6 +3406,25 @@\n = "Unnormalized (zero-imputed) intensities of enriched kinase-substrates",\n suppress_row_dendrogram = FALSE\n )\n+ if (number_of_peptides_found > 1) {\n+ cat("\\\\leavevmode\\n")\n+ cat("The kinase-subsrate pair is shown in parentheses\n+ before the phosphopeptide sequence.\\n\\n")\n+ cat("The adjusted ANOVA \\\\textit{p}-value is shown in parentheses\n+ after the phosphopeptide sequence.\\n\\n")\n+ }\n+ if (nrow(m) == 1) {\n+ cat(\n+ sprintf(\n+ "\\n\\nSubstrate is %s,\n+ \\nphopshopeptide is %s,\n+ \\n\\nand adjusted ANOVA \\\\textit{p}-value is %0.2g.\\n",\n+ enriched_substrates[1, "sub_gene"],\n+ enriched_substrates[1, "ppep"],\n+ enriched_substrates[1, "fdr_adjusted_anova_p"]\n+ )\n+ )\n+ }\n }\n }\n \n@@ -3473,7 +3569,11 @@\n param_unlist <- unlist(as.list(params))\n param_df <- data.frame(\n parameter = paste0("\\\\verb@", names(param_unlist), "@"),\n- value = paste0("\\\\verb@", gsub("$", "\\\\$", param_unlist, fixed = TRUE), "@")\n+ value = paste0(\n+ "\\n\\\\begin{tiny}\\n\\\\verb@",\n+ gsub("$", "\\\\$", param_unlist, fixed = TRUE),\n+ "@\\n\\\\end{tiny}"\n+ )\n )\n \n data_frame_latex(\n' |