Mercurial > repos > eschen42 > mqppep_anova

diff mqppep_anova.R @ 13:b41a077af3aa draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit 040e4945da00a279cb60daae799fce9489f99c50"
author: eschen42
date: Tue, 22 Mar 2022 20:47:40 +0000
parents: d728198f1ba5
children: 2c5f1a2fe16a
--- a/mqppep_anova.R	Tue Mar 15 18:17:55 2022 +0000
+++ b/mqppep_anova.R	Tue Mar 22 20:47:40 2022 +0000
@@ -1,207 +1,243 @@
-#!/usr/bin/env Rscript
-# libraries
-library(optparse)
-library(data.table)
-library(stringr)
-# bioconductor-preprocesscore
-#  - libopenblas
-#  - r-data.table
-#  - r-rmarkdown
-#  - r-ggplot2
-#  - texlive-core
-
-# ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285
-
-# parse options
-option_list <- list(
-  make_option(
-    c("-i", "--inputFile"),
-    action = "store",
-    default = NA,
-    type = "character",
-    help = "Phosphopeptide Intensities sparse input file path"
-  ),
-  make_option(
-    c("-a", "--alphaFile"),
-    action = "store",
-    default = NA,
-    type = "character",
-    help = paste0("List of alpha cutoff values for significance testing;",
-             " path to text file having one column and no header")
-  ),
-  make_option(
-    c("-f", "--firstDataColumn"),
-    action = "store",
-    default = "10",
-    type = "character",
-    help = "First column of intensity values"
-  ),
-  make_option(
-    c("-m", "--imputationMethod"),
-    action = "store",
-    default = "group-median",
-    type = "character",
-    help = paste0("Method for missing-value imputation,",
-             " one of c('group-median','median','mean','random')")
-  ),
-  make_option(
-    c("-p", "--meanPercentile"),
-    action = "store",
-    default = 3,
-    type = "integer",
-    help = paste0("Mean percentile for randomly generated imputed values;",
-              ", range [1,99]")
-  ),
-  make_option(
-    c("-d", "--sdPercentile"),
-    action = "store",
-    default = 3,
-    type = "double",
-    help = paste0("Adjustment value for standard deviation of",
-              " randomly generated imputed values; real")
-  ),
-  make_option(
-    c("-s", "--regexSampleNames"),
-    action = "store",
-    default = "\\.(\\d+)[A-Z]$",
-    type = "character",
-    help = "Regular expression extracting sample-names"
-  ),
-  make_option(
-    c("-g", "--regexSampleGrouping"),
-    action = "store",
-    default = "(\\d+)",
-    type = "character",
-    help = paste0("Regular expression extracting sample-group",
-             " from an extracted sample-name")
-  ),
-  make_option(
-    c("-o", "--imputedDataFile"),
-    action = "store",
-    default = "output_imputed.tsv",
-    type = "character",
-    help = "Imputed Phosphopeptide Intensities output file path"
-  ),
-  make_option(
-    c("-r", "--reportFile"),
-    action = "store",
-    default = "QuantDataProcessingScript.html",
-    type = "character",
-    help = "HTML report file path"
-  )
-)
-args <- parse_args(OptionParser(option_list = option_list))
-
-# Check parameter values
-
-if (! file.exists(args$inputFile)) {
-  stop((paste("Input file", args$inputFile, "does not exist")))
-}
-input_file <- args$inputFile
-alpha_file <- args$alphaFile
-first_data_column <- args$firstDataColumn
-imputation_method <- args$imputationMethod
-mean_percentile <- args$meanPercentile
-sd_percentile <- args$sdPercentile
-
-regex_sample_names    <- gsub("^[ \t\n]*", "",
-                         readChar(args$regexSampleNames,  1000)
-                       )
-regex_sample_names    <- gsub("[ \t\n]*$", "",
-                         regex_sample_names
-                       )
-cat(regex_sample_names)
-cat("\n")
-
-regex_sample_grouping <- gsub("^[ \t\n]*", "",
-                           readChar(args$regexSampleGrouping, 1000)
-                         )
-regex_sample_grouping <- gsub("[ \t\n]*$", "",
-                           regex_sample_grouping
-                         )
-cat(regex_sample_grouping)
-cat("\n")
-
-imputed_data_file_name <- args$imputedDataFile
-report_file_name <- args$reportFile
-
-print("args is:")
-cat(str(args))
-
-print("regex_sample_names is:")
-cat(str(regex_sample_names))
-
-print("regex_sample_grouping is:")
-cat(str(regex_sample_grouping))
-
-# from: https://github.com/molgenis/molgenis-pipelines/wiki/
-#   How-to-source-another_file.R-from-within-your-R-script
-# Function location_of_this_script returns the location of this .R script
-#   (may be needed to source other files in same dir)
-location_of_this_script <- function() {
-    this_file <- NULL
-    # This file may be 'sourced'
-    for (i in - (1:sys.nframe())) {
-        if (identical(sys.function(i), base::source)) {
-            this_file <- (normalizePath(sys.frame(i)$ofile))
-        }
-    }
-
-    if (!is.null(this_file)) return(dirname(this_file))
-
-    # But it may also be called from the command line
-    cmd_args <- commandArgs(trailingOnly = FALSE)
-    cmd_args_trailing <- commandArgs(trailingOnly = TRUE)
-    cmd_args <- cmd_args[
-      seq.int(
-        from = 1,
-        length.out = length(cmd_args) - length(cmd_args_trailing)
-        )
-      ]
-    res <- gsub("^(?:--file=(.*)|.*)$", "\\1", cmd_args)
-
-    # If multiple --file arguments are given, R uses the last one
-    res <- tail(res[res != ""], 1)
-    if (0 < length(res)) return(dirname(res))
-
-    # Both are not the case. Maybe we are in an R GUI?
-    return(NULL)
-}
-
-script_dir <-  location_of_this_script()
-
-rmarkdown_params <- list(
-    inputFile = input_file
-  , alphaFile = alpha_file
-  , firstDataColumn = first_data_column
-  , imputationMethod = imputation_method
-  , meanPercentile = mean_percentile
-  , sdPercentile = sd_percentile
-  , regexSampleNames = regex_sample_names
-  , regexSampleGrouping = regex_sample_grouping
-  , imputedDataFilename = imputed_data_file_name
-  )
-
-str(rmarkdown_params)
-
-# BUG
-# Must render as HTML for the time being until this issue is resolved:
-#   https://github.com/conda-forge/texlive-core-feedstock/issues/19
-# for reason:
-#   "The following dependencies are not available in conda"
-# reported here:
-#   https://github.com/ami-iit/bipedal-locomotion-framework/pull/457
-
-# freeze the random number generator so the same results will be produced
-#  from run to run
-set.seed(28571)
-
-
-library(tinytex)
-tinytex::install_tinytex()
-rmarkdown::render(
-  input = paste(script_dir, "mqppep_anova_script.Rmd", sep = "/")
-, output_format = rmarkdown::pdf_document()
-, output_file = report_file_name
-, params = rmarkdown_params
-)
+#!/usr/bin/env Rscript
+# libraries
+library(optparse)
+library(data.table)
+library(stringr)
+# bioconductor-preprocesscore
+#  - libopenblas
+#  - r-data.table
+#  - r-rmarkdown
+#  - r-ggplot2
+#  - texlive-core
+
+# ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285
+
+# parse options
+option_list <- list(
+  make_option(
+    c("-i", "--inputFile"),
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "Phosphopeptide Intensities sparse input file path"
+  ),
+  make_option(
+    c("-a", "--alphaFile"),
+    action = "store",
+    default = NA,
+    type = "character",
+    help = paste0("List of alpha cutoff values for significance testing;",
+             " path to text file having one column and no header")
+  ),
+  make_option(
+    c("-f", "--firstDataColumn"),
+    action = "store",
+    default = "10",
+    type = "character",
+    help = "First column of intensity values"
+  ),
+  make_option(
+    c("-m", "--imputationMethod"),
+    action = "store",
+    default = "random",
+    type = "character",
+    help = paste0("Method for missing-value imputation,",
+             " one of c('group-median','median','mean','random')")
+  ),
+  make_option(
+    c("-p", "--meanPercentile"),
+    action = "store",
+    default = 3,
+    type = "integer",
+    help = paste0("Mean percentile for randomly generated imputed values;",
+              ", range [1,99]")
+  ),
+  make_option(
+    c("-d", "--sdPercentile"),
+    action = "store",
+    default = 3,
+    type = "double",
+    help = paste0("Adjustment value for standard deviation of",
+              " randomly generated imputed values; real")
+  ),
+  make_option(
+    c("-s", "--regexSampleNames"),
+    action = "store",
+    default = "\\.(\\d+)[A-Z]$",
+    type = "character",
+    help = "Regular expression extracting sample-names"
+  ),
+  make_option(
+    c("-g", "--regexSampleGrouping"),
+    action = "store",
+    default = "(\\d+)",
+    type = "character",
+    help = paste0("Regular expression extracting sample-group",
+             " from an extracted sample-name")
+  ),
+  make_option(
+    c("-o", "--imputedDataFile"),
+    action = "store",
+    default = "output_imputed.tsv",
+    type = "character",
+    help = "Imputed Phosphopeptide Intensities output file path"
+  ),
+  make_option(
+    c("-n", "--imputedQNLTDataFile"),
+    action = "store",
+    default = "output_imp_qn_lt.tsv",
+    type = "character",
+    help =
+      paste(
+        "Imputed, Quantile-Normalized Log-Transformed Phosphopeptide",
+        "Intensities output file path"
+        )
+  ),
+  make_option(
+    c("-r", "--reportFile"),
+    action = "store",
+    default = "QuantDataProcessingScript.html",
+    type = "character",
+    help = "HTML report file path"
+  )
+)
+args <- parse_args(OptionParser(option_list = option_list))
+print("args is:")
+cat(str(args))
+
+# Check parameter values
+
+if (! file.exists(args$inputFile)) {
+  stop((paste("Input file", args$inputFile, "does not exist")))
+}
+input_file <- args$inputFile
+alpha_file <- args$alphaFile
+first_data_column <- args$firstDataColumn
+imputation_method <- args$imputationMethod
+print(
+  grepl(
+    pattern = imputation_method,
+    x = c("group-median", "median", "mean", "random")
+    )
+  )
+
+if (
+  sum(
+    grepl(
+      pattern = imputation_method,
+      x = c("group-median", "median", "mean", "random")
+      )
+    ) < 1
+  ) {
+    print(sprintf("bad imputationMethod argument: %s", imputation_method))
+    return(-1)
+    }
+
+mean_percentile <- args$meanPercentile
+print("mean_percentile is:")
+cat(str(mean_percentile))
+
+sd_percentile <- args$sdPercentile
+print("sd_percentile is:")
+cat(str(mean_percentile))
+
+
+regex_sample_names    <- gsub("^[ \t\n]*", "",
+                         readChar(args$regexSampleNames,  1000)
+                       )
+regex_sample_names    <- gsub("[ \t\n]*$", "",
+                         regex_sample_names
+                       )
+cat(regex_sample_names)
+cat("\n")
+
+regex_sample_grouping <- gsub("^[ \t\n]*", "",
+                           readChar(args$regexSampleGrouping, 1000)
+                         )
+regex_sample_grouping <- gsub("[ \t\n]*$", "",
+                           regex_sample_grouping
+                         )
+cat(regex_sample_grouping)
+cat("\n")
+
+imputed_data_file_name <- args$imputedDataFile
+imp_qn_lt_data_filenm <-  args$imputedQNLTDataFile
+report_file_name <- args$reportFile
+
+print("regex_sample_names is:")
+cat(str(regex_sample_names))
+
+print("regex_sample_grouping is:")
+cat(str(regex_sample_grouping))
+
+# from: https://github.com/molgenis/molgenis-pipelines/wiki/
+#   How-to-source-another_file.R-from-within-your-R-script
+# Function location_of_this_script returns the location of this .R script
+#   (may be needed to source other files in same dir)
+location_of_this_script <- function() {
+    this_file <- NULL
+    # This file may be 'sourced'
+    for (i in - (1:sys.nframe())) {
+        if (identical(sys.function(i), base::source)) {
+            this_file <- (normalizePath(sys.frame(i)$ofile))
+        }
+    }
+
+    if (!is.null(this_file)) return(dirname(this_file))
+
+    # But it may also be called from the command line
+    cmd_args <- commandArgs(trailingOnly = FALSE)
+    cmd_args_trailing <- commandArgs(trailingOnly = TRUE)
+    cmd_args <- cmd_args[
+      seq.int(
+        from = 1,
+        length.out = length(cmd_args) - length(cmd_args_trailing)
+        )
+      ]
+    res <- gsub("^(?:--file=(.*)|.*)$", "\\1", cmd_args)
+
+    # If multiple --file arguments are given, R uses the last one
+    res <- tail(res[res != ""], 1)
+    if (0 < length(res)) return(dirname(res))
+
+    # Both are not the case. Maybe we are in an R GUI?
+    return(NULL)
+}
+
+script_dir <-  location_of_this_script()
+
+rmarkdown_params <- list(
+    inputFile = input_file
+  , alphaFile = alpha_file
+  , firstDataColumn = first_data_column
+  , imputationMethod = imputation_method
+  , meanPercentile = mean_percentile
+  , sdPercentile = sd_percentile
+  , regexSampleNames = regex_sample_names
+  , regexSampleGrouping = regex_sample_grouping
+  , imputedDataFilename = imputed_data_file_name
+  , imputedQNLTDataFile = imp_qn_lt_data_filenm
+  )
+
+print("rmarkdown_params")
+str(rmarkdown_params)
+
+# freeze the random number generator so the same results will be produced
+#  from run to run
+set.seed(28571)
+
+# BUG (or "opportunity")
+# To render as PDF for the time being requires installing the conda
+# package `r-texlive` until this issue in `texlive-core` is resolved:
+#   https://github.com/conda-forge/texlive-core-feedstock/issues/19
+# This workaround is detailed in the fourth comment of:
+#   https://github.com/conda-forge/texlive-core-feedstock/issues/61
+
+library(tinytex)
+tinytex::install_tinytex()
+rmarkdown::render(
+  input = paste(script_dir, "mqppep_anova_script.Rmd", sep = "/")
+, output_format = rmarkdown::pdf_document(toc = TRUE)
+, output_file = report_file_name
+, params = rmarkdown_params
+)
author	eschen42
date	Tue, 22 Mar 2022 20:47:40 +0000
parents	d728198f1ba5
children	2c5f1a2fe16a