comparison create_msp.R @ 5:f2683ec717fe draft default tip

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit a164f06c09dc1614c2909c247ebf390aab433527-dirty
author tomnl
date Wed, 18 Sep 2019 05:46:09 -0400
parents 769ec2496d14
children
comparison
equal deleted inserted replaced
4:769ec2496d14 5:f2683ec717fe
1 suppressWarnings(suppressPackageStartupMessages(require(optparse,quietly = T)))
2 suppressWarnings(suppressPackageStartupMessages(require(msPurity)))
3 print(sessionInfo())
4 # Get the parameter
5 option_list <- list(
6 make_option(c("-i","--purity"),type="character"),
7 make_option(c("-p","--ppm"),type="numeric"),
8 make_option(c("-m","--mode"),type="character")
9 )
10 opt <- parse_args(OptionParser(option_list=option_list))
11
12 print(opt)
13
14 load(opt$purity)
15
16 grped_df <- pa@grped_df
17
18 msms <- pa@grped_ms2
19
20 puritydf <- pa@puritydf
21
22
23
24 #grped_df$fileid <- sapply(grped_df$filename, function(x) which(basename(pa@fileList)==x))
25 #puritydf$fileid <- sapply(puritydf$filename, function(x) which(basename(pa@fileList)==x))
26
27
28
29
30 selfrag <- as.numeric(unique(grped_df$grpid))
31
32 of <- file(description = "outfile.msp", open = "w+a")
33
34 write.msp <- function(name,precmz,prectype,spectra,ofile){
35
36 cat(paste("NAME: ", name, "\r\n", sep = ""), file = ofile)
37
38 cat(paste("PRECURSORMZ: ", precmz , "\r\n", sep = ""), file = ofile)
39
40 #cat(paste("PRECURSORTYPE: ", prectype, "\r\n", sep = ""), file = ofile) # No adducts? Annotation
41
42 cat("Comment:\r\n", file = ofile)
43
44 cat(paste("Num Peaks: ", nrow(spectra), "\r\n", sep = ""), file = ofile)
45
46 cat(paste(paste(spectra[,1], spectra[,2], sep = "\t"), sep = "\r\n"), sep = "\r\n", file = ofile)
47
48 cat("\r\n", file = ofile)
49 }
50
51 for(i in selfrag){
52
53 j <- which(grped_df$grpid==i)
54
55 spec <- msms[[as.character(i)]]
56
57 if (length(j)>1){
58
59 grpd <- grped_df[j,]
60
61 if (opt$mode=="all"){
62
63 for(jj in 1:length(j)){
64
65 idj <- paste(i,jj,sep=".")
66
67 specj <- spec[[jj]]
68
69
70
71 grpdj <- grpd[jj,]
72 if ('sample' %in% colnames(grpd)){
73 fileid = grpdj$sample
74 }else{
75 fileid = grpdj$fileid
76 }
77 name = paste(i, fileid, grpdj$pid, sep='-')
78
79 write.msp(name,grpdj$precurMtchMZ,"",specj,of)
80 }
81
82 }else{
83
84 prec_int <- sapply(grpd$precurMtchID, function(x) puritydf[ which(puritydf$seqNum==x & puritydf$fileid==grpd$fileid[1]), "precursorIntensity"] )
85
86 if (opt$mode=="max"){
87
88 idx <- which(prec_int==max(prec_int))
89
90 grpd <- grpd[idx,]
91 if ('sample' %in% colnames(grpd)){
92 fileid = grpd$sample
93 }else{
94 fileid = grpd$fileid
95 }
96
97 name = paste(i, fileid, grpd$pid, sep='-')
98 write.msp(name,grpd$precurMtchMZ,"",specj[[idx]], of)
99 }
100
101 if (opt$mode=="average"){
102
103 file_weights <- prec_int/prec_int[which(prec_int==min(prec_int))] # spectra of the most intense precursor, largest weight
104
105 merged_msms <- do.call("rbind", spec)
106
107 colnames(merged_msms) <- c("mz","int")
108
109 file_weights <-rep(file_weights,sapply(spec,nrow))
110
111 merged_msms <- data.frame(cbind(merged_msms,file_weights))
112
113 umzs <- sort(merged_msms$mz,index.return=T)$ix
114
115 merged_msms <- merged_msms[umzs,]
116
117 umzs <- merged_msms$mz
118
119 mz_groups <- list() # mz windows to bind
120
121 mz_grouped <- c() # used mzs
122
123 ppm <- opt$ppm # ppm level to bind mzs
124
125 for(y in 1:length(umzs)){ # spectra averaging
126
127 z <- umzs[y]
128
129 if(!(z %in% mz_grouped)){
130
131 mz_range <- z*(ppm/1e6)
132
133 mz_range <- c(z-mz_range,z+mz_range)
134
135 mz_group <- which(umzs>mz_range[1] & umzs<mz_range[2])
136
137 if(length(mz_group)>1){
138
139 zz <- umzs[max(mz_group)]
140
141 mz_range <- zz*(ppm/1e6)
142
143 mz_range <- c(zz-mz_range,zz+mz_range)
144
145 mz_group2 <- which(umzs>mz_range[1] & umzs<mz_range[2])
146
147 mz_group <- append(mz_group,mz_group2)
148 }
149
150 mz_group <- unique(mz_group)
151
152 mz_grouped <- append(mz_grouped,umzs[mz_group])
153
154 mz_group <- list(mz_group)
155
156 mz_groups <- append(mz_groups,mz_group)
157 }
158 }
159
160 averaged_spec <- t(sapply(mz_groups,function(x){
161
162 if(length(x)==1){
163
164 mz <- merged_msms$mz[x]
165
166 nint <- merged_msms[x,]
167
168 nint <- nint$int*nint$file_weights
169
170 }else{
171
172 mz <- mean(merged_msms$mz[x])
173
174 nint <- sum(sapply(x,function(y){
175 nint <- merged_msms[y,]
176 nint <- nint$int*nint$file_weights
177 }))
178 }
179 return(c(mz,nint))
180 }))
181
182 write.msp(i,mean(grpd$precurMtchMZ),"",averaged_spec,of)
183
184 }
185 }
186 }else{
187 spec <- spec[[1]]
188
189 grpd <- grped_df[j,]
190 if ('sample' %in% colnames(grpd)){
191 fileid = grpd$sample
192 }else{
193 fileid = grpd$fileid
194 }
195 name = paste(i, fileid, grpd$pid, sep='-')
196 write.msp(name,grpd$precurMtchMZ,"",spec,of)
197 }
198 }
199
200