Mercurial > repos > eschen42 > mqppep_anova
comparison mqppep_anova.R @ 0:c1403d18c189 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/mqppep commit bb6c941be50db4c0719efdeaa904d7cb7aa1d182"
author | eschen42 |
---|---|
date | Mon, 07 Mar 2022 19:05:01 +0000 |
parents | |
children | d4d531006735 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:c1403d18c189 |
---|---|
1 #!/usr/bin/env Rscript | |
2 # libraries | |
3 library(optparse) | |
4 library(data.table) | |
5 library(stringr) | |
6 #library(ggplot2) | |
7 #library(PTXQC) | |
8 #require(PTXQC) | |
9 #require(methods) | |
10 # bioconductor-preprocesscore | |
11 # - libopenblas | |
12 # - r-data.table | |
13 # - r-rmarkdown | |
14 # - r-ggplot2 | |
15 # - texlive-core | |
16 | |
17 # ref for parameterizing Rmd document: https://stackoverflow.com/a/37940285 | |
18 | |
19 # parse options | |
20 option_list <- list( | |
21 # <param name="inputFilename" type="data" format="tabular" label="Phosphopeptide Intensities" help="First column label 'Phosphopeptide'; sample-intensities must begin in column 10 and must have column labels to match argument regexSampleNames"/> | |
22 make_option( | |
23 c("-i", "--inputFile"), | |
24 action = "store", | |
25 default = NA, | |
26 type = "character", | |
27 help = "Phosphopeptide Intensities sparse input file path" | |
28 ), | |
29 make_option( | |
30 c("-a", "--alphaFile"), | |
31 action = "store", | |
32 default = NA, | |
33 type = "character", | |
34 help = "List of alpha cutoff values for significance testing; path to text file having one column and no header" | |
35 ), | |
36 make_option( | |
37 c("-f", "--firstDataColumn"), | |
38 action = "store", | |
39 default = "10", | |
40 type = "character", | |
41 help = "First column of intensity values" | |
42 ), | |
43 make_option( # imputationMethod <- c("group-median","median","mean","random")[1] | |
44 c("-m", "--imputationMethod"), | |
45 action = "store", | |
46 default = "group-median", | |
47 type = "character", | |
48 help = "Method for missing-value imputation, one of c('group-median','median','mean','random')" | |
49 ), | |
50 make_option( | |
51 c("-p", "--meanPercentile"), | |
52 action = "store", | |
53 default = 3, | |
54 type = "integer", | |
55 help = "Mean percentile for randomly generated imputed values; range [1,99]" | |
56 ), | |
57 make_option( | |
58 c("-d", "--sdPercentile"), | |
59 action = "store", | |
60 default = 3, | |
61 type = "double", | |
62 help = "Adjustment value for standard deviation of randomly generated imputed values; real" | |
63 ), | |
64 make_option( | |
65 c("-s", "--regexSampleNames"), | |
66 action = "store", | |
67 default = "\\.(\\d+)[A-Z]$", | |
68 type = "character", | |
69 help = "Regular expression extracting sample-names" | |
70 ), | |
71 make_option( | |
72 c("-g", "--regexSampleGrouping"), | |
73 action = "store", | |
74 default = "(\\d+)", | |
75 type = "character", | |
76 help = "Regular expression extracting sample-group from an extracted sample-name" | |
77 ), | |
78 # <data name="imputed_data_file" format="tabular" label="${input_file.name}.intensities_${imputation.imputation_method}-imputed_QN_LT" ></data> | |
79 make_option( | |
80 c("-o", "--imputedDataFile"), | |
81 action = "store", | |
82 default = "output_imputed.tsv", | |
83 type = "character", | |
84 help = "Imputed Phosphopeptide Intensities output file path" | |
85 ), | |
86 # <data name="report_file" format="html" label="report (download/unzip to view)" ></data> | |
87 make_option( | |
88 c("-r", "--reportFile"), | |
89 action = "store", | |
90 default = "QuantDataProcessingScript.html", | |
91 type = "character", | |
92 help = "HTML report file path" | |
93 ) | |
94 ) | |
95 args <- parse_args(OptionParser(option_list=option_list)) | |
96 # Check parameter values | |
97 | |
98 if (! file.exists(args$inputFile)) { | |
99 stop((paste("Input file", args$inputFile, "does not exist"))) | |
100 } | |
101 inputFile <- args$inputFile | |
102 alphaFile <- args$alphaFile | |
103 firstDataColumn <- args$firstDataColumn | |
104 imputationMethod <- args$imputationMethod | |
105 meanPercentile <- args$meanPercentile | |
106 sdPercentile <- args$sdPercentile | |
107 | |
108 regexSampleNames <- gsub('^[ \t\n]*', '' , readChar(args$regexSampleNames, 1000)) | |
109 regexSampleNames <- gsub('[ \t\n]*$', '' , regexSampleNames ) | |
110 # regexSampleNames <- gsub('\\\\' , '@@', regexSampleNames ) | |
111 # regexSampleNames <- gsub('@@' , '\\', regexSampleNames ) | |
112 cat(regexSampleNames) | |
113 cat('\n') | |
114 | |
115 regexSampleGrouping <- gsub('^[ \t\n]*', '', readChar(args$regexSampleGrouping, 1000)) | |
116 regexSampleGrouping <- gsub('[ \t\n]*$', '', regexSampleGrouping ) | |
117 # regexSampleGrouping <- gsub('\\\\' , '@@', regexSampleGrouping ) | |
118 cat(regexSampleGrouping) | |
119 cat('\n') | |
120 | |
121 # regexSampleGrouping <- gsub('@@' , '\\', regexSampleGrouping ) | |
122 imputedDataFilename <- args$imputedDataFile | |
123 reportFileName <- args$reportFile | |
124 | |
125 print("args is:") | |
126 cat(str(args)) | |
127 | |
128 print("regexSampleNames is:") | |
129 cat(str(regexSampleNames)) | |
130 | |
131 print("regexSampleGrouping is:") | |
132 cat(str(regexSampleGrouping)) | |
133 | |
134 # from: https://github.com/molgenis/molgenis-pipelines/wiki/How-to-source-another_file.R-from-within-your-R-script | |
135 LocationOfThisScript = function() # Function LocationOfThisScript returns the location of this .R script (may be needed to source other files in same dir) | |
136 { | |
137 this.file = NULL | |
138 # This file may be 'sourced' | |
139 for (i in -(1:sys.nframe())) { | |
140 if (identical(sys.function(i), base::source)) this.file = (normalizePath(sys.frame(i)$ofile)) | |
141 } | |
142 | |
143 if (!is.null(this.file)) return(dirname(this.file)) | |
144 | |
145 # But it may also be called from the command line | |
146 cmd.args = commandArgs(trailingOnly = FALSE) | |
147 cmd.args.trailing = commandArgs(trailingOnly = TRUE) | |
148 cmd.args = cmd.args[seq.int(from=1, length.out=length(cmd.args) - length(cmd.args.trailing))] | |
149 res = gsub("^(?:--file=(.*)|.*)$", "\\1", cmd.args) | |
150 | |
151 # If multiple --file arguments are given, R uses the last one | |
152 res = tail(res[res != ""], 1) | |
153 if (0 < length(res)) return(dirname(res)) | |
154 | |
155 # Both are not the case. Maybe we are in an R GUI? | |
156 return(NULL) | |
157 } | |
158 | |
159 script.dir <- LocationOfThisScript() | |
160 | |
161 rmarkdown_params <- list( | |
162 inputFile = inputFile | |
163 , alphaFile = alphaFile | |
164 , firstDataColumn = firstDataColumn | |
165 , imputationMethod = imputationMethod | |
166 , meanPercentile = meanPercentile | |
167 , sdPercentile = sdPercentile | |
168 , regexSampleNames = regexSampleNames | |
169 , regexSampleGrouping = regexSampleGrouping | |
170 , imputedDataFilename = imputedDataFilename | |
171 ) | |
172 | |
173 str(rmarkdown_params) | |
174 | |
175 # BUG | |
176 # Must render as HTML for the time being until this issue is resolved: | |
177 # https://github.com/conda-forge/texlive-core-feedstock/issues/19 | |
178 # for reason: | |
179 # "The following dependencies are not available in conda" | |
180 # reported here: | |
181 # https://github.com/ami-iit/bipedal-locomotion-framework/pull/457/commits/e98ccef8c8cb63e207df36628192af6ce22feb13 | |
182 | |
183 # freeze the random number generator so the same results will be produced from run to run | |
184 set.seed(28571) | |
185 | |
186 rmarkdown::render( | |
187 input = paste(script.dir, "mqppep_anova_script.Rmd", sep="/") | |
188 , output_format = rmarkdown::html_document(pandoc_args = "--self-contained") | |
189 , output_file = reportFileName | |
190 , params = rmarkdown_params | |
191 ) |