comparison anticipated_purity_dims.R @ 0:4b417094bf71 draft

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 0aa10df0ec1ed71601f932cfb11d7d4d4f620d80-dirty
author tomnl
date Wed, 02 May 2018 13:09:23 -0400
parents
children 3f1f2d545f3a
comparison
equal deleted inserted replaced
-1:000000000000 0:4b417094bf71
1 library(msPurity)
2 library(optparse)
3 print(sessionInfo())
4
5 option_list <- list(
6 make_option(c("--mzML_file"), type="character"),
7 make_option(c("--peaks_file"), type="character"),
8 make_option(c("-o", "--out_dir"), type="character"),
9 make_option("--minOffset", default=0.5),
10 make_option("--maxOffset", default=0.5),
11 make_option("--ilim", default=0.05),
12 make_option("--ppm", default=4),
13 make_option("--dimspy", action="store_true"),
14 make_option("--sim", action="store_true"),
15 make_option("--remove_nas", action="store_true"),
16 make_option("--iwNorm", default="none", type="character"),
17 make_option("--file_num_dimspy", default=1),
18 make_option("--exclude_isotopes", action="store_true"),
19 make_option("--isotope_matrix", type="character")
20 )
21
22 # store options
23 opt<- parse_args(OptionParser(option_list=option_list))
24
25 print(sessionInfo())
26 print(opt)
27
28 if (is.null(opt$dimspy)){
29
30 df <- read.table(opt$peaks_file, header = TRUE, sep='\t')
31 filename = NA
32 }else{
33 indf <- read.table(opt$peaks_file,
34 header = TRUE, sep='\t', stringsAsFactors = FALSE)
35
36 filename = colnames(indf)[8:ncol(indf)][opt$file_num_dimspy]
37 # check if the data file is mzML or RAW (can only use mzML currently) so
38 # we expect an mzML file of the same name in the same folder
39 indf$i <- indf[,colnames(indf)==filename]
40 indf[,colnames(indf)==filename] <- as.numeric(indf[,colnames(indf)==filename])
41
42 filename = sub("raw", "mzML", filename, ignore.case = TRUE)
43
44 df <- indf[4:nrow(indf),]
45 if ('blank_flag' %in% colnames(df)){
46 df <- df[df$blank_flag==1,]
47 }
48
49 colnames(df)[colnames(df)=='m.z'] <- 'mz'
50
51 if ('nan' %in% df$mz){
52 df[df$mz=='nan',]$mz <- NA
53 }
54 df$mz <- as.numeric(df$mz)
55
56
57
58
59 }
60
61 if (!is.null(opt$remove_nas)){
62 df <- df[!is.na(df$mz),]
63 }
64
65 if (is.null(opt$isotope_matrix)){
66 im <- NULL
67 }else{
68 im <- read.table(opt$isotope_matrix,
69 header = TRUE, sep='\t', stringsAsFactors = FALSE)
70 }
71
72 if (is.null(opt$exclude_isotopes)){
73 isotopes <- FALSE
74 }else{
75 isotopes <- TRUE
76 }
77
78
79 if (dir.exists(opt$mzML_file)){
80 # if directory then we need to add a file name
81 print(filename)
82 if (is.na(filename)){
83 print('ERROR: If a directory is provided then a filename needs to be entered
84 directory or automatically obtained by using a dimspy output')
85 quit()
86 }else{
87 mzml_file <- file.path(opt$mzML_file, filename)
88 }
89 }else{
90 mzml_file <- opt$mzML_file
91 }
92
93 if (is.null(opt$sim)){
94 sim=FALSE
95 }else{
96 sim=TRUE
97 }
98
99 minOffset = as.numeric(opt$minOffset)
100 maxOffset = as.numeric(opt$maxOffset)
101
102
103
104 if (opt$iwNorm=='none'){
105 iwNorm = FALSE
106 iwNormFun = NULL
107 }else if (opt$iwNorm=='gauss'){
108 iwNorm = TRUE
109 iwNormFun = msPurity::iwNormGauss(minOff=-minOffset, maxOff=maxOffset)
110 }else if (opt$iwNorm=='rcosine'){
111 iwNorm = TRUE
112 iwNormFun = msPurity::iwNormRcosine(minOff=-minOffset, maxOff=maxOffset)
113 }else if (opt$iwNorm=='QE5'){
114 iwNorm = TRUE
115 iwNormFun = msPurity::iwNormQE.5()
116 }
117
118 print('FIRST ROWS OF PEAK FILE')
119 print(head(df))
120
121 predicted <- msPurity::dimsPredictPuritySingle(df$mz,
122 filepth=mzml_file,
123 minOffset=minOffset,
124 maxOffset=maxOffset,
125 ppm=opt$ppm,
126 mzML=TRUE,
127 sim = sim,
128 ilim = opt$ilim,
129 isotopes = isotopes,
130 im = im,
131 iwNorm = iwNorm,
132 iwNormFun = iwNormFun
133 )
134 predicted <- cbind(df, predicted)
135
136 print(head(predicted))
137 print(file.path(opt$out_dir, 'anticipated_purity_dims.tsv'))
138 write.table(predicted, file.path(opt$out_dir, 'anticipated_purity_dims.tsv'), row.names=FALSE, sep='\t')