comparison maldi_quant_peakdetection.xml @ 1:96264fce1847 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/MALDIquant commit 0825a4ccd3ebf4ca8a298326d14f3e7b25ae8415
author galaxyp
date Mon, 01 Oct 2018 01:03:26 -0400
parents 3a8a502fbbc1
children 41c148280a08
comparison
equal deleted inserted replaced
0:3a8a502fbbc1 1:96264fce1847
1 <tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="1.18.0.0"> 1 <tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="@VERSION@.1">
2 <description> 2 <description>
3 Peak detection, binning and filtering for mass-spectrometry imaging data 3 Peak detection, binning and filtering for mass-spectrometry imaging data
4 </description> 4 </description>
5 <macros> 5 <macros>
6 <import>maldi_macros.xml</import> 6 <import>maldi_macros.xml</import>
13 cp '${infile.extra_files_path}/ibd' infile.ibd && 13 cp '${infile.extra_files_path}/ibd' infile.ibd &&
14 #elif $infile.ext == 'analyze75' 14 #elif $infile.ext == 'analyze75'
15 cp '${infile.extra_files_path}/hdr' infile.hdr && 15 cp '${infile.extra_files_path}/hdr' infile.hdr &&
16 cp '${infile.extra_files_path}/img' infile.img && 16 cp '${infile.extra_files_path}/img' infile.img &&
17 cp '${infile.extra_files_path}/t2m' infile.t2m && 17 cp '${infile.extra_files_path}/t2m' infile.t2m &&
18 #else
19 ln -s $infile infile.RData &&
18 #end if 20 #end if
19 Rscript '${maldi_quant_peak_detection}'&& 21 Rscript '${maldi_quant_peak_detection}'&&
20 mkdir $outfile_imzml.files_path && 22 mkdir $outfile_imzml.files_path &&
21 mv ./out.imzMl "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true && 23 mv ./out.imzMl "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true &&
22 mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true && 24 mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true &&
27 <configfiles> 29 <configfiles>
28 <configfile name="maldi_quant_peak_detection"><![CDATA[ 30 <configfile name="maldi_quant_peak_detection"><![CDATA[
29 31
30 @R_IMPORTS@ 32 @R_IMPORTS@
31 33
32 summarized_spectra = FALSE 34
33 35
34 #if $restriction_conditional.restriction == 'restrict': 36 #if $restriction_conditional.restriction == 'restrict':
35 37
36 print('Reading mask region') 38 print('Reading mask region')
39
37 ## Import imzML file 40 ## Import imzML file
38 coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = FALSE, stringsAsFactors = FALSE))[,1:2] 41 coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = $restriction_conditional.coordinates_header, stringsAsFactors = FALSE))[,1:2]
39 42
40 maldi_data <- importImzMl('infile.imzML', 43 maldi_data <- importImzMl('infile.imzML',
41 coordinates = coordinate_matrix, centroided = $centroids) 44 coordinates = coordinate_matrix, centroided = $centroids)
42 pixelnames = paste0("x = ", coordinates(maldi_data)[,1],", y = ", coordinates(maldi_data)[,2]) 45 pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_")
46
43 47
44 #else: 48 #else:
45 49
46 print('Reading entire file') 50 print('Reading entire file')
47 ## Import imzML file 51 ## Import imzML file
48 52
49
50 #if $infile.ext == 'imzml' 53 #if $infile.ext == 'imzml'
51 54 print('imzML file')
52 #if str($centroids) == "TRUE" 55 #if str($centroids) == "TRUE"
53 peaks <- importImzMl('infile.imzML', centroided = $centroids) 56 peaks <- importImzMl('infile.imzML', centroided = $centroids)
54 pixelnames = paste0("x = ", coordinates(peaks)[,1],", y = ", coordinates(peaks)[,2]) 57 pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_")
55
56 #else 58 #else
57 maldi_data <- importImzMl('infile.imzML', centroided = $centroids) 59 maldi_data <- importImzMl('infile.imzML', centroided = $centroids)
58 pixelnames = paste0("x = ", coordinates(maldi_data)[,1],", y = ", coordinates(maldi_data)[,2]) 60 pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_")
59 #end if 61 #end if
62 coordinates_info = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data)))
63
60 #elif $infile.ext == 'tabular' 64 #elif $infile.ext == 'tabular'
61 65 print('tabular file')
66 #set $centroids = "TRUE" ## will be used in some if conditions
62 peak_tabular = read.delim("$infile", header = TRUE, stringsAsFactors = FALSE) 67 peak_tabular = read.delim("$infile", header = TRUE, stringsAsFactors = FALSE)
63 peak_list = split(peak_tabular, f = peak_tabular\$spectrum) ## will be ordered according to spectrum 68 peak_list = split(peak_tabular, f = peak_tabular\$spectrum) ## will be ordered according to spectrum
64 pixelnames = unique(peak_tabular\$spectrum) 69 pixelnames = unique(peak_tabular\$spectrum)
65 70
66 peaks = list() 71 peaks = list()
67 for (spectra in 1:length(peak_list)) 72 for (spectra in 1:length(peak_list))
68 { 73 {
69 single_peaks = createMassPeaks(peak_list[[spectra]]\$mass, peak_list[[spectra]]\$intensity, snr=peak_list[[spectra]]\$snr) 74 single_peaks = createMassPeaks(peak_list[[spectra]]\$mass, peak_list[[spectra]]\$intensity, snr=peak_list[[spectra]]\$snr)
70 peaks[[spectra]] = single_peaks 75 peaks[[spectra]] = single_peaks
71 } 76 }
72 77
78 #else
79 print('rdata file')
80 loadRData <- function(fileName){
81 #loads an RData file, and returns it
82 load(fileName)
83 get(ls()[ls() != "fileName"])
84 }
85 msidata = loadRData('infile.RData')
86 centroided(msidata) = $centroids
87 pixelnames = gsub(", y = ", "_", names(Cardinal::pixels(msidata)))
88 pixelnames = gsub(" = ", "y_", pixelnames)
89
90 cardinal_coordinates = as.matrix(Cardinal::coord(msidata)[,1:2])
91
92 if (centroided(msidata) == FALSE){
93 ## create mass spectrum object
94 cardinal_mzs = Cardinal::mz(msidata)
95 maldi_data = list()
96 for(number_spectra in 1:ncol(msidata)){
97 maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra])
98 coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data)))}
99 coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data)))
100 }else{
101 peaks = list()
102 for (spectra in 1:ncol(msidata))
103 {
104 single_peaks = createMassPeaks(Cardinal::mz(msidata), Cardinal::spectra(msidata)[,spectra], snr=as.numeric(rep("NA", nrow(msidata))))
105 peaks[[spectra]] = single_peaks
106 }}
73 #end if 107 #end if
74
75
76 #end if 108 #end if
77 109
110
111
112
113
114
115
116
117
118
119 ## default summarized = FALSE
120 summarized_spectra = FALSE
121
78 ## Quality control plots during peak detection 122 ## Quality control plots during peak detection
79
80 pdf("peaks_qc_plot.pdf", fonts = "Times", pointsize = 12) 123 pdf("peaks_qc_plot.pdf", fonts = "Times", pointsize = 12)
81 plot(0,type='n',axes=FALSE,ann=FALSE) 124 plot(0,type='n',axes=FALSE,ann=FALSE)
82 125
83 ## if no filename is given, name of file in Galaxy history is used 126 ## if no filename is given, name of file in Galaxy history is used
84 #set $filename = $infile.display_name 127 #set $filename = $infile.display_name
85 128
86 title(main=paste("$filename")) 129 title(main=paste("$filename"))
87 130
88 ## plot input file spectrum: 131 ## plot input file spectrum:
89 #if $infile.ext == 'imzml' 132 #if str($centroids) == "TRUE"
90 133 plot(peaks[[1]], main="First spectrum of input file")
91 #if str($centroids) == "TRUE" 134 #else
92 plot(peaks[[1]], main="First spectrum of input file") 135 avgSpectra <- averageMassSpectra(maldi_data,method="mean")
93 #else 136 plot(avgSpectra, main="Average spectrum of input file")
94 avgSpectra <- averageMassSpectra(maldi_data,method="mean")
95 plot(avgSpectra, main="Average spectrum of input file")
96 #end if
97 #elif $infile.ext == 'tabular'
98 plot(peaks[[1]], main="First spectrum of input file")
99 #end if 137 #end if
100 138
139
140
141
142
143
144
145
146
147 ## QC numbers for input file
148 #if str($centroids) == "TRUE"
149 pixel_number = length(peaks)
150 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
151 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
152 maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
153 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
154 inputdata = c(minmz, maxmz,maxfeatures, medint)
155 QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, medint))
156 vectorofactions = "inputdata"
157 #else
158 pixel_number = length(maldi_data)
159 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4)
160 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4)
161 maxfeatures = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2)
162 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2)
163 inputdata = c(minmz, maxmz,maxfeatures, medint)
164 QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, medint))
165 vectorofactions = "inputdata"
166 #end if
101 167
102 #if str($tabular_annotation.load_annotation) == 'yes_annotation': 168 #if str($tabular_annotation.load_annotation) == 'yes_annotation':
103 169
104 ## read and extract x,y,annotation information 170 ## read and extract x,y,annotation information
105 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE) 171 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE)
106 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)] 172 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)]
107 colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation" 173 colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation"
108 174
109 ## merge with coordinate information of MSI data 175 ## merge with coordinate information of MSI data
110 176 colnames(coordinates_info)[3] = "pixel_index"
111 coordinates_st = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data))) 177 merged_annotation = merge(coordinates_info, annotation_input, by=c("x", "y"), all.x=TRUE)
112 colnames(coordinates_st)[3] = "pixel_index"
113 merged_annotation = merge(coordinates_st, annotation_input, by=c("x", "y"), all.x=TRUE)
114 merged_annotation[is.na(merged_annotation)] = "NA" 178 merged_annotation[is.na(merged_annotation)] = "NA"
115 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] 179 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),]
116 samples = as.factor(merged_annotation\$annotation) 180 samples = as.factor(merged_annotation\$annotation)
117 181
118 ## print annotation overview into PDF output 182 ## print annotation overview into PDF output
149 213
150 #################### Preprocessing methods ##################################### 214 #################### Preprocessing methods #####################################
151 215
152 #for $method in $methods: 216 #for $method in $methods:
153 217
154
155 #if str( $method.methods_conditional.method ) == 'Peak_detection': 218 #if str( $method.methods_conditional.method ) == 'Peak_detection':
156 print('peak detection') 219 print('peak detection')
157 ##peak detection 220 ##peak detection
158 221
159 #if $method.methods_conditional.use_annotations: 222 #if $method.methods_conditional.use_annotations:
160 maldi_data <- averageMassSpectra(maldi_data, labels=samples,method="mean") ## use average spectra for peak picking 223 maldi_data <- averageMassSpectra(maldi_data, labels=samples,method="mean") ## use average spectra for peak picking
161 pixelnames = merged_annotation\$annotation 224 pixelnames = levels(samples)
162 summarized_spectra = TRUE 225 summarized_spectra = TRUE
163 226
164 #end if 227 #end if
165 228
166 peaks <- detectPeaks(maldi_data, method="$method.methods_conditional.peak_method", 229 peaks <- detectPeaks(maldi_data, method="$method.methods_conditional.peak_method",
167 halfWindowSize=$method.methods_conditional.halfWindowSize,SNR=$method.methods_conditional.snr) 230 halfWindowSize=$method.methods_conditional.halfWindowSize,SNR=$method.methods_conditional.snr)
168 231
169 ## QC plot 232 ## QC plot and numbers
170 plot(peaks[[1]], main="First spectrum after peak detection") 233 plot(peaks[[1]], main="First spectrum after peak detection")
234 pixel_number = length(peaks)
235 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
236 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
237 maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
238 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
239 peaks_picked = c(minmz, maxmz,maxfeatures, medint)
240 QC_numbers= cbind(QC_numbers, peaks_picked)
241 vectorofactions = append(vectorofactions, "peaks_picked")
171 242
172 if (length(peaks[!sapply(peaks, isEmpty)])>0){ 243 if (length(peaks[!sapply(peaks, isEmpty)])>0){
173 #if $infile.ext == 'imzml' 244 #if $infile.ext == 'imzml'
174 #if str($centroids) == "FALSE" 245 #if str($centroids) == "FALSE"
175 featureMatrix <- intensityMatrix(peaks, maldi_data) 246 featureMatrix <- intensityMatrix(peaks, maldi_data)
176 #end if 247 #end if
177 #else 248 #else
178 featureMatrix <- intensityMatrix(peaks) 249 featureMatrix <- intensityMatrix(peaks)
179 #end if 250 #end if
180 featureMatrix2 =cbind(pixelnames, featureMatrix) 251 featureMatrix2 =cbind(pixelnames, featureMatrix)
181 colnames(featureMatrix2)[1] = c("mz | spectra") 252 colnames(featureMatrix2)[1] = c("mz")
182 featureMatrix2 = t(featureMatrix2) 253 featureMatrix2 = t(featureMatrix2)
183 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") 254 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
184 }else{print("There are no spectra with peaks left")} 255 }else{print("There are no spectra with peaks left")}
185 256
186 257
189 print('monoisotopic peaks') 260 print('monoisotopic peaks')
190 ##monoisotopic peaks 261 ##monoisotopic peaks
191 262
192 peaks = monoisotopicPeaks(peaks, minCor=$method.methods_conditional.minCor, tolerance=$method.methods_conditional.tolerance, distance=$method.methods_conditional.distance, size=$method.methods_conditional.size) 263 peaks = monoisotopicPeaks(peaks, minCor=$method.methods_conditional.minCor, tolerance=$method.methods_conditional.tolerance, distance=$method.methods_conditional.distance, size=$method.methods_conditional.size)
193 264
194 ## QC plot 265 ## QC plot and numbers
195 plot(peaks[[1]], main="First spectrum after monoisotopic peaks detection") 266 plot(peaks[[1]], main="First spectrum after monoisotopic peaks detection")
267 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
268 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
269 maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
270 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
271 monoisotopes = c(minmz, maxmz,maxfeatures, medint)
272 QC_numbers= cbind(QC_numbers, monoisotopes)
273 vectorofactions = append(vectorofactions, "monoisotopes")
196 274
197 if (length(peaks[!sapply(peaks, isEmpty)])>0){ 275 if (length(peaks[!sapply(peaks, isEmpty)])>0){
198 #if $infile.ext == 'imzml' 276 #if $infile.ext == 'imzml'
199 #if str($centroids) == "FALSE" 277 #if str($centroids) == "FALSE"
200 featureMatrix <- intensityMatrix(peaks, maldi_data) 278 featureMatrix <- intensityMatrix(peaks, maldi_data)
201 #end if 279 #end if
202 #else 280 #else
203 featureMatrix <- intensityMatrix(peaks) 281 featureMatrix <- intensityMatrix(peaks)
204 #end if 282 #end if
205 featureMatrix2 =cbind(pixelnames, featureMatrix) 283 featureMatrix2 =cbind(pixelnames, featureMatrix)
206 colnames(featureMatrix2)[1] = c("mz | spectra") 284 colnames(featureMatrix2)[1] = c("mz")
207 featureMatrix2 = t(featureMatrix2) 285 featureMatrix2 = t(featureMatrix2)
208 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") 286 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
209 }else{print("There are no spectra with peaks left")} 287 }else{print("There are no spectra with peaks left")}
210 288
211 #elif str( $method.methods_conditional.method ) == 'Binning': 289 #elif str( $method.methods_conditional.method ) == 'Binning':
212 290
213 print('binning') 291 print('binning')
214 ##m/z binning 292 ##m/z binning
215 293
216 peaks <- binPeaks(peaks, tolerance=$method.methods_conditional.bin_tolerance) 294 peaks <- binPeaks(peaks, tolerance=$method.methods_conditional.bin_tolerance)
217 ## QC plot 295
296 ## QC plot and numbers
218 plot(peaks[[1]], main="First spectrum after binning") 297 plot(peaks[[1]], main="First spectrum after binning")
298 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
299 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
300 maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
301 medint =round( median(unlist(lapply(peaks,intensity))), digits=2)
302 binned = c(minmz, maxmz,maxfeatures, medint)
303 QC_numbers= cbind(QC_numbers, binned)
304 vectorofactions = append(vectorofactions, "binned")
219 305
220 if (length(peaks[!sapply(peaks, isEmpty)])>0){ 306 if (length(peaks[!sapply(peaks, isEmpty)])>0){
221 #if $infile.ext == 'imzml' 307 #if $infile.ext == 'imzml'
222 #if str($centroids) == "FALSE" 308 #if str($centroids) == "FALSE"
223 featureMatrix <- intensityMatrix(peaks, maldi_data) 309 featureMatrix <- intensityMatrix(peaks, maldi_data)
227 #end if 313 #end if
228 #else 314 #else
229 featureMatrix <- intensityMatrix(peaks) 315 featureMatrix <- intensityMatrix(peaks)
230 #end if 316 #end if
231 featureMatrix2 =cbind(pixelnames, featureMatrix) 317 featureMatrix2 =cbind(pixelnames, featureMatrix)
232 colnames(featureMatrix2)[1] = c("mz | spectra") 318 colnames(featureMatrix2)[1] = c("mz")
233 featureMatrix2 = t(featureMatrix2) 319 featureMatrix2 = t(featureMatrix2)
234 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") 320 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
235 }else{print("There are no spectra with peaks left")} 321 }else{print("There are no spectra with peaks left")}
236 322
237 323
254 minFrequency=$method.methods_conditional.minFrequency, 340 minFrequency=$method.methods_conditional.minFrequency,
255 minNumber=$method.methods_conditional.minNumber, 341 minNumber=$method.methods_conditional.minNumber,
256 mergeWhitelists=$method.methods_conditional.mergeWhitelists, label = samples) 342 mergeWhitelists=$method.methods_conditional.mergeWhitelists, label = samples)
257 #end if 343 #end if
258 344
259 ##QC plot 345 ##QC plot and numbers
260 plot(peaks[[1]], main="First spectrum after m/z filtering") 346 plot(peaks[[1]], main="First spectrum after m/z filtering")
347 minmz = round(min(unlist(lapply(peaks,mass))), digits=4)
348 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4)
349 maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2)
350 medint = round(median(unlist(lapply(peaks,intensity))), digits=2)
351 filtered = c(minmz, maxmz,maxfeatures, medint)
352 QC_numbers= cbind(QC_numbers, filtered)
353 vectorofactions = append(vectorofactions, "filtered")
261 354
262 if (length(peaks[!sapply(peaks, isEmpty)])>0){ 355 if (length(peaks[!sapply(peaks, isEmpty)])>0){
263 #if $infile.ext == 'imzml' 356 #if $infile.ext == 'imzml'
264 #if str($centroids) == "FALSE" 357 #if str($centroids) == "FALSE"
265 featureMatrix <- intensityMatrix(peaks, maldi_data) 358 featureMatrix <- intensityMatrix(peaks, maldi_data)
266 #end if 359 #end if
267 #else 360 #else
268 featureMatrix <- intensityMatrix(peaks) 361 featureMatrix <- intensityMatrix(peaks)
269 #end if 362 #end if
270 featureMatrix2 =cbind(pixelnames, featureMatrix) 363 featureMatrix2 =cbind(pixelnames, featureMatrix)
271 colnames(featureMatrix2)[1] = c("mz | spectra") 364 colnames(featureMatrix2)[1] = c("mz")
272 featureMatrix2 = t(featureMatrix2) 365 featureMatrix2 = t(featureMatrix2)
366 }else{print("There are no spectra with peaks left")
367 featureMatrix2 = matrix(0, ncol=1, nrow=1)}
273 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") 368 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t")
274 }else{print("There are no spectra with peaks left")}
275
276 #end if 369 #end if
277 #end for 370 #end for
278 371
279 if (length(peaks[!sapply(peaks, isEmpty)])>0){ 372 if (length(peaks[!sapply(peaks, isEmpty)])>0){
280 ## mass peaks output 373 ## mass peaks output
281 mass_peaks = data.frame(matrix(,ncol=3, nrow=0)) 374 mass_peaks = data.frame(matrix(,ncol=3, nrow=0))
282 for (spectrum in 1:length(peaks)){ 375 for (spectrum in 1:length(peaks)){
283 spectrum_df = data.frame(peaks[[spectrum]]@snr, peaks[[spectrum]]@mass, peaks[[spectrum]]@intensity) 376 spectrum_df = data.frame(peaks[[spectrum]]@snr, peaks[[spectrum]]@mass, peaks[[spectrum]]@intensity)
284 spectrum_df\$spectrum_id = rep(pixelnames[[spectrum]], length(peaks[[spectrum]]@mass)) 377 spectrum_df\$spectrum_id = rep(pixelnames[[spectrum]], length(peaks[[spectrum]]@mass))
285 mass_peaks = rbind(mass_peaks,spectrum_df) 378 mass_peaks = rbind(mass_peaks,spectrum_df)
286 } 379 }
287 colnames(mass_peaks) = c("snr", "mass", "intensity", "spectrum") 380 colnames(mass_peaks) = c("snr", "mass", "intensity", "spectrum")
288 write.table(mass_peaks, file="$masspeaks", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") 381 write.table(mass_peaks, file="$masspeaks", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
289 }else{print("There are no spectra with peaks left")} 382 }else{print("There are no spectra with peaks left")}
383
384 ## print table with QC values
385 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median\nintensity")
386 plot(0,type='n',axes=FALSE,ann=FALSE)
387 grid.table(t(QC_numbers))
290 388
291 dev.off() 389 dev.off()
292 390
293 if (summarized_spectra == FALSE){ 391 if (summarized_spectra == FALSE){
294 #if $infile.ext == 'imzml' 392 #if $infile.ext == 'imzml'
295 exportImzMl(peaks, file="out.imzMl", processed=$export_processed) 393 MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=$export_processed)
296 #elif $infile.ext == 'tabular' 394 #elif $infile.ext == 'tabular'
297 masspeaks_coordinates = matrix(unlist(strsplit(as.character(pixelnames), "\\,")), ncol=2, byrow=TRUE) 395 masspeaks_coordinates = matrix(unlist(strsplit(as.character(pixelnames), "\\_")), ncol=3, byrow=TRUE)
298 ## extract x and y values and create the coordinate matrix in case tabular was input 396 ## extract x and y values and create the coordinate matrix in case tabular was input
299 peaklist_coordinates = unique(cbind(as.numeric(substring(masspeaks_coordinates[,1], 5, last = 1000000L)), as.numeric(substring(masspeaks_coordinates[,2], 5, last = 1000000L)))) 397 peaklist_coordinates = unique(cbind(as.numeric(masspeaks_coordinates[,2]), as.numeric(masspeaks_coordinates[,3])))
300 exportImzMl(peaks, file="out.imzMl", processed=$export_processed, coordinates=peaklist_coordinates) 398 exportImzMl(peaks, file="out.imzMl", processed=$export_processed, coordinates=peaklist_coordinates)
399 #elif $infile.ext == 'rdata'
400 MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=$export_processed, coordinates=cardinal_coordinates)
301 #end if 401 #end if
402
302 } 403 }
303 404
304 ]]> 405 ]]>
305 </configfile> 406 </configfile>
306 </configfiles> 407 </configfiles>
307 <inputs> 408 <inputs>
308 <param name="infile" type="data" format="imzml,tabular" label="MS metadata" help="This file is in imzML or tabular format (peak list, peak detection cannot be run again)"/> 409 <param name="infile" type="data" format="imzml,tabular,rdata" label="Inputfile as imzML or Cardinal MSImageSet saved as RData" help="This file is in imzML or tabular format (peak list, peak detection cannot be run again) or Cardinal MSImageSet saved as RData"/>
309 <param name="centroids" type="boolean" label="Is the imzML data centroided (picked)" help="Choose Yes if peak detection has already been done. Peak detection cannot be run again on centroided data" truevalue="TRUE" falsevalue="FALSE"/> 410 <param name="centroids" type="boolean" label="Is the imzML/RData data centroided (picked)" help="Choose Yes if peak detection has already been done. Peak detection cannot be run again on centroided data" truevalue="TRUE" falsevalue="FALSE"/>
310 <conditional name="restriction_conditional"> 411 <conditional name="restriction_conditional">
311 <param name="restriction" type="select" label="Restrict the preprocessing to coordinates of interest"> 412 <param name="restriction" type="select" label="Read in only spectra of interest" help="This option only works for imzML files">
312 <option value="no_restriction" selected="True">Calculate on entire file</option> 413 <option value="no_restriction" selected="True">Calculate on entire file</option>
313 <option value="restrict">Restrict to coordinates of interest</option> 414 <option value="restrict">Restrict to coordinates of interest</option>
314 </param> 415 </param>
315 <when value="restrict"> 416 <when value="restrict">
316 <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates which should be read" help="x-values in first column, y-values in second column"/> 417 <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates" help="x-values in first column, y-values in second column"/>
418 <param name="coordinates_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
317 </when> 419 </when>
318 <when value="no_restriction"/> 420 <when value="no_restriction"/>
319 </conditional> 421 </conditional>
320 422
321 <conditional name="tabular_annotation"> 423 <conditional name="tabular_annotation">
322 <param name="load_annotation" type="select" label="Use pixel annotation from tabular file - select in peak detection or filtering step where you want to apply the annotation information"> 424 <param name="load_annotation" type="select" label="Use pixel annotation from tabular file - select in peak detection or filtering step where annotation should be used">
323 <option value="no_annotation" selected="True">pixels belong into one group only</option> 425 <option value="no_annotation" selected="True">pixels belong into one group only</option>
324 <option value="yes_annotation">use pixel annotation from a tabular file</option> 426 <option value="yes_annotation">use pixel annotation from a tabular file</option>
325 </param> 427 </param>
326 <when value="yes_annotation"> 428 <when value="yes_annotation">
327 <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file" 429 <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file"
333 </when> 435 </when>
334 <when value="no_annotation"/> 436 <when value="no_annotation"/>
335 </conditional> 437 </conditional>
336 <repeat name="methods" title="Method" min="1"> 438 <repeat name="methods" title="Method" min="1">
337 <conditional name="methods_conditional"> 439 <conditional name="methods_conditional">
338 <param name="method" type="select" label="Select the method you want to apply"> 440 <param name="method" type="select" label="Select a method">
339 <option value="Peak_detection">Peak detection</option> 441 <option value="Peak_detection">Peak detection</option>
340 <option value="monoisotopic_peaks">Keep only monoisotopic peaks</option> 442 <option value="monoisotopic_peaks">Keep only monoisotopic peaks</option>
341 <option value="Binning">Binning</option> 443 <option value="Binning">Binning</option>
342 <option value="Filtering">Filtering</option> 444 <option value="Filtering">Filtering</option>
343 </param> 445 </param>
367 <param name="bin_tolerance" type="float" value="0.002" label="Peak binning tolerance" 469 <param name="bin_tolerance" type="float" value="0.002" label="Peak binning tolerance"
368 help="After the alignment the peak positions (mass) are very similar but not identical. The binning is needed to make similar peak mass values identical."/> 470 help="After the alignment the peak positions (mass) are very similar but not identical. The binning is needed to make similar peak mass values identical."/>
369 </when> 471 </when>
370 <when value="Filtering"> 472 <when value="Filtering">
371 <param name="minFrequency" type="float" value="0.25" 473 <param name="minFrequency" type="float" value="0.25"
372 label="Remove all peaks which occur in less than minFrequency spectra" help="It is a relative threshold."/> 474 label="Removal of all peaks which occur in less than minFrequency spectra" help="It is a relative threshold. The higher value from relative and absolute threshold is taken. Set one value to zero to be sure it will not be sure."/>
373 <param name="minNumber" type="float" value="1.0" 475 <param name="minNumber" type="float" value="1.0"
374 label="remove all peaks which occur in less than minNumber spectra" help="It is an absolute threshold."/> 476 label="Removal of all peaks which occur in less than minNumber spectra" help="It is an absolute threshold. The higher value from relative and absolute threshold is taken. Set one value to zero to be sure it will not be sure."/>
375 <param name="filter_annot_groups" type="boolean" label="Group wise filtering with pixel annotations. If not specified a single group is assumed or when filtering has been done group wise it will automatically be group wise when selecting filtering on all pixel" truevalue="TRUE" falsevalue="FALSE"/> 477 <param name="filter_annot_groups" type="boolean" label="Group wise filtering with pixel annotations. If not specified a single group is assumed or when filtering has been done group wise it will automatically be group wise when selecting filtering on all pixel" truevalue="TRUE" falsevalue="FALSE"/>
376 <param name="mergeWhitelists" type="boolean" truevalue="TRUE" falsevalue="FALSE" 478 <param name="mergeWhitelists" type="boolean" truevalue="TRUE" falsevalue="FALSE"
377 label="mergeWhitelists" help="if FALSE the filtering criteria are applied groupwise. If TRUE peaks that survive the filtering in one group (level of labels) these peaks are also kept in other groups even if their frequencies are below minFrequency"/> 479 label="mergeWhitelists" help="if FALSE the filtering criteria are applied groupwise. If TRUE peaks that survive the filtering in one group (level of labels) these peaks are also kept in other groups even if their frequencies are below minFrequency"/>
378 </when> 480 </when>
379 </conditional> 481 </conditional>
380 </repeat> 482 </repeat>
381 <param name="export_processed" type="boolean" label="Export file as processed imzML" help="otherwise continuous imzML will be exported" checked="true" truevalue="TRUE" falsevalue="FALSE"/> 483 <param name="export_processed" type="boolean" label="Export file as processed imzML" help="otherwise continuous imzML will be exported" checked="true" truevalue="TRUE" falsevalue="FALSE"/>
382 </inputs> 484 </inputs>
383 <outputs> 485 <outputs>
384 <data format="imzml" name="outfile_imzml" label="$infile.display_name peaks" /> 486 <data format="imzml" name="outfile_imzml" label="$infile.display_name peaks"/>
385 <data format="pdf" name="plots" from_work_dir="peaks_qc_plot.pdf" label = "$infile.display_name peakdetection QC"/> 487 <data format="pdf" name="plots" from_work_dir="peaks_qc_plot.pdf" label = "$infile.display_name peakdetection QC"/>
386 <data format="tabular" name="masspeaks" label="$infile.display_name mass_peaks"/> 488 <data format="tabular" name="masspeaks" label="$infile.display_name mass_peaks"/>
387 <data format="tabular" name="intensity_matrix" label="intensity_matrix"/> 489 <data format="tabular" name="intensity_matrix" label="intensity_matrix"/>
388 </outputs> 490 </outputs>
389 <tests> 491 <tests>
412 <output name="plots" file="peakdetection1_QC.pdf" compare="sim_size"/> 514 <output name="plots" file="peakdetection1_QC.pdf" compare="sim_size"/>
413 <output name="masspeaks" file="masspeaks1.tabular"/> 515 <output name="masspeaks" file="masspeaks1.tabular"/>
414 <output name="intensity_matrix" file="int1.tabular"/> 516 <output name="intensity_matrix" file="int1.tabular"/>
415 </test> 517 </test>
416 <test> 518 <test>
417 <param name="infile" value="masspeaks1_forinput.tabular"/> 519 <param name="infile" value="masspeaks3_forinput.tabular"/>
418 <param name="centroids" value="TRUE"/> 520 <param name="centroids" value="TRUE"/>
419 <repeat name="methods"> 521 <repeat name="methods">
420 <conditional name="methods_conditional"> 522 <conditional name="methods_conditional">
421 <param name="method" value="monoisotopic_peaks"/> 523 <param name="method" value="monoisotopic_peaks"/>
422 <param name="minCor" value="0.60"/>
423 <param name="tolerance" value="0.0001"/>
424 </conditional> 524 </conditional>
425 </repeat> 525 </repeat>
426 <output name="plots" file="peakdetection2_QC.pdf" compare="sim_size"/> 526 <output name="plots" file="peakdetection2_QC.pdf" compare="sim_size"/>
427 <output name="masspeaks" file="masspeaks2.tabular"/> 527 <output name="masspeaks" file="masspeaks2.tabular"/>
428 <output name="intensity_matrix" file="int2.tabular"/> 528 <output name="intensity_matrix" file="int2.tabular"/>
455 </conditional> 555 </conditional>
456 </repeat> 556 </repeat>
457 <repeat name="methods"> 557 <repeat name="methods">
458 <conditional name="methods_conditional"> 558 <conditional name="methods_conditional">
459 <param name="method" value="Filtering"/> 559 <param name="method" value="Filtering"/>
460 <param name="bin_tolerance" value="0.01"/>
461 <param name="minFrequency" value="0.5"/> 560 <param name="minFrequency" value="0.5"/>
462 <param name="minNumber" value="3"/> 561 <param name="minNumber" value="3"/>
463 <param name="filter_annot_groups" value="TRUE"/> 562 <param name="filter_annot_groups" value="TRUE"/>
464 <param name="mergeWhitelists" value="FALSE"/> 563 <param name="mergeWhitelists" value="FALSE"/>
465 </conditional> 564 </conditional>
470 </test> 569 </test>
471 </tests> 570 </tests>
472 <help> 571 <help>
473 <![CDATA[ 572 <![CDATA[
474 573
475 MALDIquant_ provides a complete analysis pipeline for MALDI-TOF and other mass spectrometry data. So far we have only implemented the functionalities for mass spectrometry imaging data. 574 @MADLI_QUANT_DESCRIPTION@
476 575
477 Input data: 576 -----
478 577
479 - MSI data as imzML or file (upload via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ 578 **Input data**
480 - or MSI data as peak list (tabular file) with the columns named "snr", "mass", "intensity" and "spectrum". To obtain a valid imzML output file spectrum should contain the pixel coordinates in the format: "x = 1, y = 1" 579
481 - optinal tabular file with pixel coordinates to restrict reading of imzML file to coordinates of interest 580 - MSI data: 3 types of input data can be used:
482 - optional tabular file with pixel annotations. The annotations can be used to summarize pixels of an imzML file which belong to the same group and detect peaks on average spectra, further steps will be done on average spectra as well and average spectra are exported. If this option was not chosen the filtering tool can use the annotations to filter for peaks within pixel groups (select "Group wise filtering") 581
483 582 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
484 583 - Cardinal "MSImageSet" data saved as .RData
485 Options: 584 - MSI data as peak list (tabular file) with the columns named "snr", "mass", "intensity" and "spectrum". The spectrum has to be in the following format: xy_1_1 (for pixel coordinates x1y1). The header must have exactly the four column names.
486 585
487 - Peak detection: detection of peaks, only possible with imzML input 586 ::
587
588 snr mass intensity spectrum
589 5.34 304.16 0.10 xy_1_1
590 12.09 305 0.2 xy_1_1
591 6.80 306.25 0.133 xy_1_1
592 ...
593 ...
594
595
596 - Optional: Tabular file with pixel coordinates to restrict reading of imzML files to coordinates of interest. The file has to contain x values in the first column and y values in the second columns. Further columns are allowed. Tabular files with any header name or no header at all are supported.
597
598 ::
599
600 x_coord y_coord
601 1 1
602 2 1
603 3 1
604 ...
605 ...
606
607
608 - Optional: Tabular file(s) containing pixel coordinates and annotation. X and y values in separate columns and the corresponding annotation in a third column. Tabular files with any header name or no header at all are supported. The annotations can be used to summarize pixels of an imzML file which belong to the same group and detect peaks on average spectra, further steps will be done on average spectra as well and average spectra are exported. If this option was not chosen the filtering tool can use the annotations to filter for peaks within pixel groups (select "Group wise filtering").
609
610 ::
611
612 x_coord y_coord annotation
613 1 1 healthy
614 2 1 healthy
615 3 1 disease
616 ...
617 ...
618
619
620 **Options**
621
622 - Peak detection: detection of peaks, only possible with profile mode input
488 - Monoisotopic peaks: detection of monoisotopic peaks 623 - Monoisotopic peaks: detection of monoisotopic peaks
489 - Peak binning: After the alignment the peak positions (mass) are very similar but not identical. The binning is needed to make similar peak mass values identical. 624 - Peak binning: After the alignment the peak positions (m/z) are very similar but not identical. The binning is needed to make similar peak m/z values identical.
490 - Peak filtering: Removal of less frequent peaks (either with a minimum ratio or with an absolute minimum number of spectra in which the peak has to occur) 625 - Peak filtering: Removal of less frequent peaks (either with a minimum ratio or with an absolute minimum number of spectra in which the peak has to occur)
491 626
492 627
493 Output: 628 **Output**
494 629
495 - centroided processed or continuous imzML file 630 - centroided imzML file (processed or continuous), except for peak picking on the average of multiple spectra
496 - pdf with mass spectra after each preprocessing step 631 - pdf with mass spectra plots after each preprocessing step
497 - peak list (tabular file) with the columns "snr", "mass", "intensity" and "spectrum" 632 - peak list (tabular file) with the columns "snr", "mass", "intensity" and "spectrum"
498 - tabular file with intensity matrix (m/z in rows and spectra in columns). If the input file was imzML in profile mode the intensities before peak picking are also stored in the matrix . For all other inputs not picked values are set to NA. 633 - tabular file with intensity matrix (m/z in rows and spectra in columns). If the input file was imzML in profile mode the intensities before peak picking are also stored in the matrix . For all other inputs not picked values are set to NA. For peak picking on the average of multiple spectra, each spectra group is a column with mean intensities for each m/z
499 634
500 .. _MALDIquant: http://strimmerlab.org/software/maldiquant/ 635 .. _MALDIquant: http://strimmerlab.org/software/maldiquant/
501 636
502 ]]> 637 ]]>
503 </help> 638 </help>