diff mqppep_anova.R @ 0:c1403d18c189 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
author eschen42
date Mon, 07 Mar 2022 19:05:01 +0000
parents
children d4d531006735
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mqppep_anova.R	Mon Mar 07 19:05:01 2022 +0000
@@ -0,0 +1,191 @@
+#!/usr/bin/env Rscript
+# libraries
+library(optparse)
+library(data.table)
+library(stringr)
+#library(ggplot2)
+#library(PTXQC)
+#require(PTXQC)
+#require(methods)
+# bioconductor-preprocesscore
+#  - libopenblas
+#  - r-data.table
+#  - r-rmarkdown
+#  - r-ggplot2
+#  - texlive-core
+
+# ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285
+
+# parse options
+option_list <- list(
+  # <param name="inputFilename" type="data" format="tabular" label="Phosphopeptide Intensities" help="First column label 'Phosphopeptide'; sample-intensities must begin in column 10 and must have column labels to match argument regexSampleNames"/>
+  make_option(
+    c("-i", "--inputFile"),
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "Phosphopeptide Intensities sparse input file path"
+  ),
+  make_option(
+    c("-a", "--alphaFile"),
+    action = "store",
+    default = NA,
+    type = "character",
+    help = "List of alpha cutoff values for significance testing; path to text file having one column and no header"
+  ),
+  make_option(
+    c("-f", "--firstDataColumn"),
+    action = "store",
+    default = "10",
+    type = "character",
+    help = "First column of intensity values"
+  ),
+  make_option( # imputationMethod <- c("group-median","median","mean","random")[1]
+    c("-m", "--imputationMethod"),
+    action = "store",
+    default = "group-median",
+    type = "character",
+    help = "Method for missing-value imputation, one of c('group-median','median','mean','random')"
+  ),
+  make_option(
+    c("-p", "--meanPercentile"),
+    action = "store",
+    default = 3,
+    type = "integer",
+    help = "Mean percentile for randomly generated imputed values; range [1,99]"
+  ),
+  make_option(
+    c("-d", "--sdPercentile"),
+    action = "store",
+    default = 3,
+    type = "double",
+    help = "Adjustment value for standard deviation of randomly generated imputed values; real"
+  ),
+  make_option(
+    c("-s", "--regexSampleNames"),
+    action = "store",
+    default = "\\.(\\d+)[A-Z]$",
+    type = "character",
+    help = "Regular expression extracting sample-names"
+  ),
+  make_option(
+    c("-g", "--regexSampleGrouping"),
+    action = "store",
+    default = "(\\d+)",
+    type = "character",
+    help = "Regular expression extracting sample-group from an extracted sample-name"
+  ),
+  # <data name="imputed_data_file" format="tabular" label="${input_file.name}.intensities_${imputation.imputation_method}-imputed_QN_LT" ></data>
+  make_option(
+    c("-o", "--imputedDataFile"),
+    action = "store",
+    default = "output_imputed.tsv",
+    type = "character",
+    help = "Imputed Phosphopeptide Intensities output file path"
+  ),
+  # <data name="report_file" format="html" label="report (download/unzip to view)" ></data>
+  make_option(
+    c("-r", "--reportFile"),
+    action = "store",
+    default = "QuantDataProcessingScript.html",
+    type = "character",
+    help = "HTML report file path"
+  )
+)
+args <- parse_args(OptionParser(option_list=option_list))
+# Check parameter values
+
+if (! file.exists(args$inputFile)) {
+  stop((paste("Input file", args$inputFile, "does not exist")))
+}
+inputFile <- args$inputFile
+alphaFile <- args$alphaFile
+firstDataColumn <- args$firstDataColumn
+imputationMethod <- args$imputationMethod
+meanPercentile <- args$meanPercentile
+sdPercentile <- args$sdPercentile
+
+regexSampleNames    <- gsub('^[ \t\n]*', ''  , readChar(args$regexSampleNames,  1000))
+regexSampleNames    <- gsub('[ \t\n]*$', ''  ,               regexSampleNames        )
+# regexSampleNames    <- gsub('\\\\'     , '@@',               regexSampleNames        )
+# regexSampleNames    <- gsub('@@'       , '\\',               regexSampleNames        )
+cat(regexSampleNames)
+cat('\n')
+
+regexSampleGrouping <- gsub('^[ \t\n]*', '', readChar(args$regexSampleGrouping, 1000))
+regexSampleGrouping <- gsub('[ \t\n]*$', '',               regexSampleGrouping       )
+# regexSampleGrouping <- gsub('\\\\'     , '@@',             regexSampleGrouping       )
+cat(regexSampleGrouping)
+cat('\n')
+
+# regexSampleGrouping <- gsub('@@'       , '\\',             regexSampleGrouping       )
+imputedDataFilename <- args$imputedDataFile
+reportFileName <- args$reportFile
+
+print("args is:")
+cat(str(args))
+
+print("regexSampleNames is:")
+cat(str(regexSampleNames))
+
+print("regexSampleGrouping is:")
+cat(str(regexSampleGrouping))
+
+# from: https://github.com/molgenis/molgenis-pipelines/wiki/How-to-source-another_file.R-from-within-your-R-script
+LocationOfThisScript = function() # Function LocationOfThisScript returns the location of this .R script (may be needed to source other files in same dir)
+{
+    this.file = NULL
+    # This file may be 'sourced'
+    for (i in -(1:sys.nframe())) {
+        if (identical(sys.function(i), base::source)) this.file = (normalizePath(sys.frame(i)$ofile))
+    }
+
+    if (!is.null(this.file)) return(dirname(this.file))
+
+    # But it may also be called from the command line
+    cmd.args = commandArgs(trailingOnly = FALSE)
+    cmd.args.trailing = commandArgs(trailingOnly = TRUE)
+    cmd.args = cmd.args[seq.int(from=1, length.out=length(cmd.args) - length(cmd.args.trailing))]
+    res = gsub("^(?:--file=(.*)|.*)$", "\\1", cmd.args)
+
+    # If multiple --file arguments are given, R uses the last one
+    res = tail(res[res != ""], 1)
+    if (0 < length(res)) return(dirname(res))
+
+    # Both are not the case. Maybe we are in an R GUI?
+    return(NULL)
+}
+
+script.dir <-  LocationOfThisScript()
+
+rmarkdown_params <- list(
+    inputFile = inputFile
+  , alphaFile = alphaFile
+  , firstDataColumn = firstDataColumn
+  , imputationMethod = imputationMethod
+  , meanPercentile = meanPercentile
+  , sdPercentile = sdPercentile
+  , regexSampleNames = regexSampleNames
+  , regexSampleGrouping = regexSampleGrouping
+  , imputedDataFilename = imputedDataFilename
+  )
+
+str(rmarkdown_params)
+
+# BUG
+# Must render as HTML for the time being until this issue is resolved:
+#   https://github.com/conda-forge/texlive-core-feedstock/issues/19
+# for reason:
+#   "The following dependencies are not available in conda"
+# reported here:
+#   https://github.com/ami-iit/bipedal-locomotion-framework/pull/457/commits/e98ccef8c8cb63e207df36628192af6ce22feb13
+
+# freeze the random number generator so the same results will be produced from run to run
+set.seed(28571)
+
+rmarkdown::render(
+  input = paste(script.dir, "mqppep_anova_script.Rmd", sep="/")
+, output_format = rmarkdown::html_document(pandoc_args = "--self-contained")
+, output_file = reportFileName
+, params = rmarkdown_params
+)