diff mqppep_anova_script.Rmd @ 12:4deacfee76ef draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit e87d28ea433cc26db7fe44768685d08c06f7a0d0"
author eschen42
date Tue, 15 Mar 2022 18:17:55 +0000
parents d728198f1ba5
children b41a077af3aa
line wrap: on
line diff
--- a/mqppep_anova_script.Rmd	Tue Mar 15 12:44:40 2022 +0000
+++ b/mqppep_anova_script.Rmd	Tue Mar 15 18:17:55 2022 +0000
@@ -11,8 +11,8 @@
   imputationMethod: !r c("group-median", "median", "mean", "random")[1]
   meanPercentile: 1
   sdPercentile: 0.2
-  regexSampleNames: "\\.(\\d+)[A-Z]$"
-  regexSampleGrouping: "(\\d+)"
+  regexSampleNames: "\\.\\d+[A-Z]$"
+  regexSampleGrouping: "\\d+"
   imputedDataFilename: "Upstream_Map_pST_outputfile_STEP4_QN_LT.txt"
 ---
 ```{r setup, include = FALSE}
@@ -570,23 +570,62 @@
 
 sample_factor_levels <- as.factor(regmatches(temp_matches, m2))
 
-
 if (length(levels(sample_factor_levels)) < 2) {
+  nuke_control_sequences <-
+    function(s) {
+      s <- gsub("[\\]", "xyzzy_plugh", s)
+      s <- gsub("[$]", "\\\\$", s)
+      s <- gsub("xyzzy_plugh", "$\\\\backslash$", s)
+      return(s)
+    }
   cat(
     "ERROR!!!! Cannot perform ANOVA analysis",
-    "because it requires two or more factor levels\n"
+    "(see next page)\\newpage\n"
+  )
+  cat(
+    "ERROR: ANOVA analysis",
+    "requires two or more factor levels!\\newline\n"
   )
-  cat("Unparsed sample names are:\n")
-  print(names(quant_data_imp_qn_log))
-  cat(sprintf("Parsing rule for SampleNames is '%s'\n", regex_sample_names))
-  cat("Parsed names are:\n")
-  print(temp_matches)
-  cat(sprintf(
-    "Parsing rule for SampleGrouping is '%s'\n",
-    regex_sample_grouping
-  ))
-  cat("Sample group assignments are:\n")
-  print(regmatches(temp_matches, m2))
+
+  cat("\\newline\\newline\n")
+  cat("Unparsed sample names are:\\newline\n",
+    "\\begin{quote}\n",
+    paste(names(quant_data_imp_qn_log), collapse = "\\newline\n"),
+    "\n\\end{quote}\n\n")
+
+  regex_sample_names <- nuke_control_sequences(regex_sample_names)
+
+  cat("\\leavevmode\\newline\n")
+  cat("Parsing rule for SampleNames is",
+    "\\newline\n",
+    "\\text{'",
+    regex_sample_names,
+    "'}\\newline\n",
+    sep = ""
+    )
+
+  cat("\nParsed sample names are:\n",
+    "\\begin{quote}\n",
+    paste(temp_matches, collapse = "\\newline\n"),
+    "\n\\end{quote}\n\n")
+
+  regex_sample_grouping <- nuke_control_sequences(regex_sample_grouping)
+
+  cat("\\leavevmode\\newline\n")
+  cat("Parsing rule for SampleGrouping is",
+    "\\newline\n",
+    "\\text{'",
+    regex_sample_grouping,
+    "'}\\newline\n",
+    sep = ""
+    )
+
+  cat("\\newline\n")
+  cat("Sample group assignments are:\n",
+    "\\begin{quote}\n",
+    paste(regmatches(temp_matches, m2), collapse = "\\newline\n"),
+    "\n\\end{quote}\n\n")
+
 } else {
   p_value_data_anova_ps <-
     apply(
@@ -707,8 +746,6 @@
         }
       )
 
-
-
       anova_filtered <- data.table(
         anova_filtered_merge$Phosphopeptide
         ,
@@ -719,7 +756,7 @@
       colnames(anova_filtered) <-
         c("Phosphopeptide", colnames(filtered_data_filtered))
 
-      # merge qualitative columns into the ANOVA data
+      # Merge qualitative columns into the ANOVA data
       output_table <- data.frame(anova_filtered$Phosphopeptide)
       output_table <- merge(
         x = output_table
@@ -731,9 +768,16 @@
         by.y = "Phosphopeptide"
       )
 
-      #Produce heatmap to visualize significance and the effect of imputation
+      # Produce heatmap to visualize significance and the effect of imputation
       m <-
         as.matrix(unimputed_quant_data_log[anova_filtered_merge_order, ])
+      m_nan_rows <- rowSums(
+        matrix(
+          as.integer(is.na(m)),
+          nrow = nrow(m)
+          )
+        )
+      m <- m[!m_nan_rows, ]
       if (nrow(m) > 0) {
         rownames_m <- rownames(m)
         rownames(m) <- sapply(
@@ -741,53 +785,53 @@
           ,
           FUN = function(i) {
             sprintf(
-              anova_filtered_merge_format[i]
-              ,
-              filtered_p$fdr_adjusted_anova_p[i]
-              ,
+              anova_filtered_merge_format[i],
+              filtered_p$fdr_adjusted_anova_p[i],
               rownames_m[i]
             )
           }
         )
-        margins <- c(max(nchar(colnames(m))) * 10 / 16 # col
-                     , max(nchar(rownames(m))) * 5 / 16 # row
-                     )
-                     how_many_peptides <- min(50, nrow(m))
+        margins <-
+          c(max(nchar(colnames(m))) * 10 / 16 # col
+            , max(nchar(rownames(m))) * 5 / 16 # row
+            )
+        how_many_peptides <- min(50, nrow(m))
 
-                     cat("\\newpage\n")
-                     if (nrow(m) > 50) {
-                       cat("Heatmap for the 50 most-significant peptides",
-                         sprintf(
-                           "whose adjusted p-value < %0.2f\n",
-                           cutoff)
-                       )
-                     } else {
-                       cat("Heatmap for peptides whose",
-                         sprintf("adjusted p-value < %0.2f\n",
-                         cutoff)
-                       )
-                     }
-                     cat("\\newline\n")
-                     cat("\\newline\n")
-                     op <- par("cex.main")
-                     try(
-                       if (nrow(m) > 1) {
-                         par(cex.main = 0.6)
-                         heatmap(
-                           m[how_many_peptides:1, ],
-                           Rowv = NA,
-                           Colv = NA,
-                           cexRow = 0.7,
-                           cexCol = 0.8,
-                           scale = "row",
-                           margins = margins,
-                           main =
-                             "Heatmap of unimputed, unnormalized intensities",
-                           xlab = ""
-                           )
-                       }
-                     )
-                     par(op)
+        cat("\\newpage\n")
+        if (nrow(m) > 50) {
+          cat("Heatmap for the 50 most-significant peptides",
+            sprintf(
+              "whose adjusted p-value < %0.2f\n",
+              cutoff)
+          )
+        } else {
+          cat("Heatmap for peptides whose",
+            sprintf("adjusted p-value < %0.2f\n",
+            cutoff)
+          )
+        }
+        cat("\\newline\n")
+        cat("\\newline\n")
+        op <- par("cex.main")
+        try(
+          if (nrow(m) > 1) {
+            par(cex.main = 0.6)
+            heatmap(
+              m[how_many_peptides:1, ],
+              Rowv = NA,
+              Colv = NA,
+              cexRow = 0.7,
+              cexCol = 0.8,
+              scale = "row",
+              #ACE scale = "none",
+              margins = margins,
+              main =
+                "Heatmap of unimputed, unnormalized intensities",
+              xlab = ""
+              )
+          }
+        )
+        par(op)
       }
     }
   }