comparison mqppep_anova.R @ 0:c1403d18c189 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
author eschen42
date Mon, 07 Mar 2022 19:05:01 +0000
parents
children d4d531006735
comparison
equal deleted inserted replaced
-1:000000000000 0:c1403d18c189
1 #!/usr/bin/env Rscript
2 # libraries
3 library(optparse)
4 library(data.table)
5 library(stringr)
6 #library(ggplot2)
7 #library(PTXQC)
8 #require(PTXQC)
9 #require(methods)
10 # bioconductor-preprocesscore
11 # - libopenblas
12 # - r-data.table
13 # - r-rmarkdown
14 # - r-ggplot2
15 # - texlive-core
16
17 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285
18
19 # parse options
20 option_list <- list(
21 # <param name="inputFilename" type="data" format="tabular" label="Phosphopeptide Intensities" help="First column label 'Phosphopeptide'; sample-intensities must begin in column 10 and must have column labels to match argument regexSampleNames"/>
22 make_option(
23 c("-i", "--inputFile"),
24 action = "store",
25 default = NA,
26 type = "character",
27 help = "Phosphopeptide Intensities sparse input file path"
28 ),
29 make_option(
30 c("-a", "--alphaFile"),
31 action = "store",
32 default = NA,
33 type = "character",
34 help = "List of alpha cutoff values for significance testing; path to text file having one column and no header"
35 ),
36 make_option(
37 c("-f", "--firstDataColumn"),
38 action = "store",
39 default = "10",
40 type = "character",
41 help = "First column of intensity values"
42 ),
43 make_option( # imputationMethod <- c("group-median","median","mean","random")[1]
44 c("-m", "--imputationMethod"),
45 action = "store",
46 default = "group-median",
47 type = "character",
48 help = "Method for missing-value imputation, one of c('group-median','median','mean','random')"
49 ),
50 make_option(
51 c("-p", "--meanPercentile"),
52 action = "store",
53 default = 3,
54 type = "integer",
55 help = "Mean percentile for randomly generated imputed values; range [1,99]"
56 ),
57 make_option(
58 c("-d", "--sdPercentile"),
59 action = "store",
60 default = 3,
61 type = "double",
62 help = "Adjustment value for standard deviation of randomly generated imputed values; real"
63 ),
64 make_option(
65 c("-s", "--regexSampleNames"),
66 action = "store",
67 default = "\\.(\\d+)[A-Z]$",
68 type = "character",
69 help = "Regular expression extracting sample-names"
70 ),
71 make_option(
72 c("-g", "--regexSampleGrouping"),
73 action = "store",
74 default = "(\\d+)",
75 type = "character",
76 help = "Regular expression extracting sample-group from an extracted sample-name"
77 ),
78 # <data name="imputed_data_file" format="tabular" label="${input_file.name}.intensities_${imputation.imputation_method}-imputed_QN_LT" ></data>
79 make_option(
80 c("-o", "--imputedDataFile"),
81 action = "store",
82 default = "output_imputed.tsv",
83 type = "character",
84 help = "Imputed Phosphopeptide Intensities output file path"
85 ),
86 # <data name="report_file" format="html" label="report (download/unzip to view)" ></data>
87 make_option(
88 c("-r", "--reportFile"),
89 action = "store",
90 default = "QuantDataProcessingScript.html",
91 type = "character",
92 help = "HTML report file path"
93 )
94 )
95 args <- parse_args(OptionParser(option_list=option_list))
96 # Check parameter values
97
98 if (! file.exists(args$inputFile)) {
99 stop((paste("Input file", args$inputFile, "does not exist")))
100 }
101 inputFile <- args$inputFile
102 alphaFile <- args$alphaFile
103 firstDataColumn <- args$firstDataColumn
104 imputationMethod <- args$imputationMethod
105 meanPercentile <- args$meanPercentile
106 sdPercentile <- args$sdPercentile
107
108 regexSampleNames <- gsub('^[ \t\n]*', '' , readChar(args$regexSampleNames, 1000))
109 regexSampleNames <- gsub('[ \t\n]*$', '' , regexSampleNames )
110 # regexSampleNames <- gsub('\\\\' , '@@', regexSampleNames )
111 # regexSampleNames <- gsub('@@' , '\\', regexSampleNames )
112 cat(regexSampleNames)
113 cat('\n')
114
115 regexSampleGrouping <- gsub('^[ \t\n]*', '', readChar(args$regexSampleGrouping, 1000))
116 regexSampleGrouping <- gsub('[ \t\n]*$', '', regexSampleGrouping )
117 # regexSampleGrouping <- gsub('\\\\' , '@@', regexSampleGrouping )
118 cat(regexSampleGrouping)
119 cat('\n')
120
121 # regexSampleGrouping <- gsub('@@' , '\\', regexSampleGrouping )
122 imputedDataFilename <- args$imputedDataFile
123 reportFileName <- args$reportFile
124
125 print("args is:")
126 cat(str(args))
127
128 print("regexSampleNames is:")
129 cat(str(regexSampleNames))
130
131 print("regexSampleGrouping is:")
132 cat(str(regexSampleGrouping))
133
134 # from: https://github.com/molgenis/molgenis-pipelines/wiki/How-to-source-another_file.R-from-within-your-R-script
135 LocationOfThisScript = function() # Function LocationOfThisScript returns the location of this .R script (may be needed to source other files in same dir)
136 {
137 this.file = NULL
138 # This file may be 'sourced'
139 for (i in -(1:sys.nframe())) {
140 if (identical(sys.function(i), base::source)) this.file = (normalizePath(sys.frame(i)$ofile))
141 }
142
143 if (!is.null(this.file)) return(dirname(this.file))
144
145 # But it may also be called from the command line
146 cmd.args = commandArgs(trailingOnly = FALSE)
147 cmd.args.trailing = commandArgs(trailingOnly = TRUE)
148 cmd.args = cmd.args[seq.int(from=1, length.out=length(cmd.args) - length(cmd.args.trailing))]
149 res = gsub("^(?:--file=(.*)|.*)$", "\\1", cmd.args)
150
151 # If multiple --file arguments are given, R uses the last one
152 res = tail(res[res != ""], 1)
153 if (0 < length(res)) return(dirname(res))
154
155 # Both are not the case. Maybe we are in an R GUI?
156 return(NULL)
157 }
158
159 script.dir <- LocationOfThisScript()
160
161 rmarkdown_params <- list(
162 inputFile = inputFile
163 , alphaFile = alphaFile
164 , firstDataColumn = firstDataColumn
165 , imputationMethod = imputationMethod
166 , meanPercentile = meanPercentile
167 , sdPercentile = sdPercentile
168 , regexSampleNames = regexSampleNames
169 , regexSampleGrouping = regexSampleGrouping
170 , imputedDataFilename = imputedDataFilename
171 )
172
173 str(rmarkdown_params)
174
175 # BUG
176 # Must render as HTML for the time being until this issue is resolved:
177 # https://github.com/conda-forge/texlive-core-feedstock/issues/19
178 # for reason:
179 # "The following dependencies are not available in conda"
180 # reported here:
181 # https://github.com/ami-iit/bipedal-locomotion-framework/pull/457/commits/e98ccef8c8cb63e207df36628192af6ce22feb13
182
183 # freeze the random number generator so the same results will be produced from run to run
184 set.seed(28571)
185
186 rmarkdown::render(
187 input = paste(script.dir, "mqppep_anova_script.Rmd", sep="/")
188 , output_format = rmarkdown::html_document(pandoc_args = "--self-contained")
189 , output_file = reportFileName
190 , params = rmarkdown_params
191 )