Mercurial > repos > galaxyp > maldi_quant_peak_detection
comparison maldi_quant_peakdetection.xml @ 1:96264fce1847 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/MALDIquant commit 0825a4ccd3ebf4ca8a298326d14f3e7b25ae8415
author | galaxyp |
---|---|
date | Mon, 01 Oct 2018 01:03:26 -0400 |
parents | 3a8a502fbbc1 |
children | 41c148280a08 |
comparison
equal
deleted
inserted
replaced
0:3a8a502fbbc1 | 1:96264fce1847 |
---|---|
1 <tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="1.18.0.0"> | 1 <tool id="maldi_quant_peak_detection" name="MALDIquant peak detection" version="@VERSION@.1"> |
2 <description> | 2 <description> |
3 Peak detection, binning and filtering for mass-spectrometry imaging data | 3 Peak detection, binning and filtering for mass-spectrometry imaging data |
4 </description> | 4 </description> |
5 <macros> | 5 <macros> |
6 <import>maldi_macros.xml</import> | 6 <import>maldi_macros.xml</import> |
13 cp '${infile.extra_files_path}/ibd' infile.ibd && | 13 cp '${infile.extra_files_path}/ibd' infile.ibd && |
14 #elif $infile.ext == 'analyze75' | 14 #elif $infile.ext == 'analyze75' |
15 cp '${infile.extra_files_path}/hdr' infile.hdr && | 15 cp '${infile.extra_files_path}/hdr' infile.hdr && |
16 cp '${infile.extra_files_path}/img' infile.img && | 16 cp '${infile.extra_files_path}/img' infile.img && |
17 cp '${infile.extra_files_path}/t2m' infile.t2m && | 17 cp '${infile.extra_files_path}/t2m' infile.t2m && |
18 #else | |
19 ln -s $infile infile.RData && | |
18 #end if | 20 #end if |
19 Rscript '${maldi_quant_peak_detection}'&& | 21 Rscript '${maldi_quant_peak_detection}'&& |
20 mkdir $outfile_imzml.files_path && | 22 mkdir $outfile_imzml.files_path && |
21 mv ./out.imzMl "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true && | 23 mv ./out.imzMl "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true && |
22 mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true && | 24 mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true && |
27 <configfiles> | 29 <configfiles> |
28 <configfile name="maldi_quant_peak_detection"><![CDATA[ | 30 <configfile name="maldi_quant_peak_detection"><![CDATA[ |
29 | 31 |
30 @R_IMPORTS@ | 32 @R_IMPORTS@ |
31 | 33 |
32 summarized_spectra = FALSE | 34 |
33 | 35 |
34 #if $restriction_conditional.restriction == 'restrict': | 36 #if $restriction_conditional.restriction == 'restrict': |
35 | 37 |
36 print('Reading mask region') | 38 print('Reading mask region') |
39 | |
37 ## Import imzML file | 40 ## Import imzML file |
38 coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = FALSE, stringsAsFactors = FALSE))[,1:2] | 41 coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = $restriction_conditional.coordinates_header, stringsAsFactors = FALSE))[,1:2] |
39 | 42 |
40 maldi_data <- importImzMl('infile.imzML', | 43 maldi_data <- importImzMl('infile.imzML', |
41 coordinates = coordinate_matrix, centroided = $centroids) | 44 coordinates = coordinate_matrix, centroided = $centroids) |
42 pixelnames = paste0("x = ", coordinates(maldi_data)[,1],", y = ", coordinates(maldi_data)[,2]) | 45 pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_") |
46 | |
43 | 47 |
44 #else: | 48 #else: |
45 | 49 |
46 print('Reading entire file') | 50 print('Reading entire file') |
47 ## Import imzML file | 51 ## Import imzML file |
48 | 52 |
49 | |
50 #if $infile.ext == 'imzml' | 53 #if $infile.ext == 'imzml' |
51 | 54 print('imzML file') |
52 #if str($centroids) == "TRUE" | 55 #if str($centroids) == "TRUE" |
53 peaks <- importImzMl('infile.imzML', centroided = $centroids) | 56 peaks <- importImzMl('infile.imzML', centroided = $centroids) |
54 pixelnames = paste0("x = ", coordinates(peaks)[,1],", y = ", coordinates(peaks)[,2]) | 57 pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_") |
55 | |
56 #else | 58 #else |
57 maldi_data <- importImzMl('infile.imzML', centroided = $centroids) | 59 maldi_data <- importImzMl('infile.imzML', centroided = $centroids) |
58 pixelnames = paste0("x = ", coordinates(maldi_data)[,1],", y = ", coordinates(maldi_data)[,2]) | 60 pixelnames = paste("xy", coordinates(maldi_data)[,1],coordinates(maldi_data)[,2], sep="_") |
59 #end if | 61 #end if |
62 coordinates_info = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data))) | |
63 | |
60 #elif $infile.ext == 'tabular' | 64 #elif $infile.ext == 'tabular' |
61 | 65 print('tabular file') |
66 #set $centroids = "TRUE" ## will be used in some if conditions | |
62 peak_tabular = read.delim("$infile", header = TRUE, stringsAsFactors = FALSE) | 67 peak_tabular = read.delim("$infile", header = TRUE, stringsAsFactors = FALSE) |
63 peak_list = split(peak_tabular, f = peak_tabular\$spectrum) ## will be ordered according to spectrum | 68 peak_list = split(peak_tabular, f = peak_tabular\$spectrum) ## will be ordered according to spectrum |
64 pixelnames = unique(peak_tabular\$spectrum) | 69 pixelnames = unique(peak_tabular\$spectrum) |
65 | 70 |
66 peaks = list() | 71 peaks = list() |
67 for (spectra in 1:length(peak_list)) | 72 for (spectra in 1:length(peak_list)) |
68 { | 73 { |
69 single_peaks = createMassPeaks(peak_list[[spectra]]\$mass, peak_list[[spectra]]\$intensity, snr=peak_list[[spectra]]\$snr) | 74 single_peaks = createMassPeaks(peak_list[[spectra]]\$mass, peak_list[[spectra]]\$intensity, snr=peak_list[[spectra]]\$snr) |
70 peaks[[spectra]] = single_peaks | 75 peaks[[spectra]] = single_peaks |
71 } | 76 } |
72 | 77 |
78 #else | |
79 print('rdata file') | |
80 loadRData <- function(fileName){ | |
81 #loads an RData file, and returns it | |
82 load(fileName) | |
83 get(ls()[ls() != "fileName"]) | |
84 } | |
85 msidata = loadRData('infile.RData') | |
86 centroided(msidata) = $centroids | |
87 pixelnames = gsub(", y = ", "_", names(Cardinal::pixels(msidata))) | |
88 pixelnames = gsub(" = ", "y_", pixelnames) | |
89 | |
90 cardinal_coordinates = as.matrix(Cardinal::coord(msidata)[,1:2]) | |
91 | |
92 if (centroided(msidata) == FALSE){ | |
93 ## create mass spectrum object | |
94 cardinal_mzs = Cardinal::mz(msidata) | |
95 maldi_data = list() | |
96 for(number_spectra in 1:ncol(msidata)){ | |
97 maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra]) | |
98 coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data)))} | |
99 coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data))) | |
100 }else{ | |
101 peaks = list() | |
102 for (spectra in 1:ncol(msidata)) | |
103 { | |
104 single_peaks = createMassPeaks(Cardinal::mz(msidata), Cardinal::spectra(msidata)[,spectra], snr=as.numeric(rep("NA", nrow(msidata)))) | |
105 peaks[[spectra]] = single_peaks | |
106 }} | |
73 #end if | 107 #end if |
74 | |
75 | |
76 #end if | 108 #end if |
77 | 109 |
110 | |
111 | |
112 | |
113 | |
114 | |
115 | |
116 | |
117 | |
118 | |
119 ## default summarized = FALSE | |
120 summarized_spectra = FALSE | |
121 | |
78 ## Quality control plots during peak detection | 122 ## Quality control plots during peak detection |
79 | |
80 pdf("peaks_qc_plot.pdf", fonts = "Times", pointsize = 12) | 123 pdf("peaks_qc_plot.pdf", fonts = "Times", pointsize = 12) |
81 plot(0,type='n',axes=FALSE,ann=FALSE) | 124 plot(0,type='n',axes=FALSE,ann=FALSE) |
82 | 125 |
83 ## if no filename is given, name of file in Galaxy history is used | 126 ## if no filename is given, name of file in Galaxy history is used |
84 #set $filename = $infile.display_name | 127 #set $filename = $infile.display_name |
85 | 128 |
86 title(main=paste("$filename")) | 129 title(main=paste("$filename")) |
87 | 130 |
88 ## plot input file spectrum: | 131 ## plot input file spectrum: |
89 #if $infile.ext == 'imzml' | 132 #if str($centroids) == "TRUE" |
90 | 133 plot(peaks[[1]], main="First spectrum of input file") |
91 #if str($centroids) == "TRUE" | 134 #else |
92 plot(peaks[[1]], main="First spectrum of input file") | 135 avgSpectra <- averageMassSpectra(maldi_data,method="mean") |
93 #else | 136 plot(avgSpectra, main="Average spectrum of input file") |
94 avgSpectra <- averageMassSpectra(maldi_data,method="mean") | |
95 plot(avgSpectra, main="Average spectrum of input file") | |
96 #end if | |
97 #elif $infile.ext == 'tabular' | |
98 plot(peaks[[1]], main="First spectrum of input file") | |
99 #end if | 137 #end if |
100 | 138 |
139 | |
140 | |
141 | |
142 | |
143 | |
144 | |
145 | |
146 | |
147 ## QC numbers for input file | |
148 #if str($centroids) == "TRUE" | |
149 pixel_number = length(peaks) | |
150 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) | |
151 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) | |
152 maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) | |
153 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) | |
154 inputdata = c(minmz, maxmz,maxfeatures, medint) | |
155 QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, medint)) | |
156 vectorofactions = "inputdata" | |
157 #else | |
158 pixel_number = length(maldi_data) | |
159 minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) | |
160 maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) | |
161 maxfeatures = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) | |
162 medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) | |
163 inputdata = c(minmz, maxmz,maxfeatures, medint) | |
164 QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, medint)) | |
165 vectorofactions = "inputdata" | |
166 #end if | |
101 | 167 |
102 #if str($tabular_annotation.load_annotation) == 'yes_annotation': | 168 #if str($tabular_annotation.load_annotation) == 'yes_annotation': |
103 | 169 |
104 ## read and extract x,y,annotation information | 170 ## read and extract x,y,annotation information |
105 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE) | 171 input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE) |
106 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)] | 172 annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)] |
107 colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation" | 173 colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation" |
108 | 174 |
109 ## merge with coordinate information of MSI data | 175 ## merge with coordinate information of MSI data |
110 | 176 colnames(coordinates_info)[3] = "pixel_index" |
111 coordinates_st = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data))) | 177 merged_annotation = merge(coordinates_info, annotation_input, by=c("x", "y"), all.x=TRUE) |
112 colnames(coordinates_st)[3] = "pixel_index" | |
113 merged_annotation = merge(coordinates_st, annotation_input, by=c("x", "y"), all.x=TRUE) | |
114 merged_annotation[is.na(merged_annotation)] = "NA" | 178 merged_annotation[is.na(merged_annotation)] = "NA" |
115 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] | 179 merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] |
116 samples = as.factor(merged_annotation\$annotation) | 180 samples = as.factor(merged_annotation\$annotation) |
117 | 181 |
118 ## print annotation overview into PDF output | 182 ## print annotation overview into PDF output |
149 | 213 |
150 #################### Preprocessing methods ##################################### | 214 #################### Preprocessing methods ##################################### |
151 | 215 |
152 #for $method in $methods: | 216 #for $method in $methods: |
153 | 217 |
154 | |
155 #if str( $method.methods_conditional.method ) == 'Peak_detection': | 218 #if str( $method.methods_conditional.method ) == 'Peak_detection': |
156 print('peak detection') | 219 print('peak detection') |
157 ##peak detection | 220 ##peak detection |
158 | 221 |
159 #if $method.methods_conditional.use_annotations: | 222 #if $method.methods_conditional.use_annotations: |
160 maldi_data <- averageMassSpectra(maldi_data, labels=samples,method="mean") ## use average spectra for peak picking | 223 maldi_data <- averageMassSpectra(maldi_data, labels=samples,method="mean") ## use average spectra for peak picking |
161 pixelnames = merged_annotation\$annotation | 224 pixelnames = levels(samples) |
162 summarized_spectra = TRUE | 225 summarized_spectra = TRUE |
163 | 226 |
164 #end if | 227 #end if |
165 | 228 |
166 peaks <- detectPeaks(maldi_data, method="$method.methods_conditional.peak_method", | 229 peaks <- detectPeaks(maldi_data, method="$method.methods_conditional.peak_method", |
167 halfWindowSize=$method.methods_conditional.halfWindowSize,SNR=$method.methods_conditional.snr) | 230 halfWindowSize=$method.methods_conditional.halfWindowSize,SNR=$method.methods_conditional.snr) |
168 | 231 |
169 ## QC plot | 232 ## QC plot and numbers |
170 plot(peaks[[1]], main="First spectrum after peak detection") | 233 plot(peaks[[1]], main="First spectrum after peak detection") |
234 pixel_number = length(peaks) | |
235 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) | |
236 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) | |
237 maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) | |
238 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) | |
239 peaks_picked = c(minmz, maxmz,maxfeatures, medint) | |
240 QC_numbers= cbind(QC_numbers, peaks_picked) | |
241 vectorofactions = append(vectorofactions, "peaks_picked") | |
171 | 242 |
172 if (length(peaks[!sapply(peaks, isEmpty)])>0){ | 243 if (length(peaks[!sapply(peaks, isEmpty)])>0){ |
173 #if $infile.ext == 'imzml' | 244 #if $infile.ext == 'imzml' |
174 #if str($centroids) == "FALSE" | 245 #if str($centroids) == "FALSE" |
175 featureMatrix <- intensityMatrix(peaks, maldi_data) | 246 featureMatrix <- intensityMatrix(peaks, maldi_data) |
176 #end if | 247 #end if |
177 #else | 248 #else |
178 featureMatrix <- intensityMatrix(peaks) | 249 featureMatrix <- intensityMatrix(peaks) |
179 #end if | 250 #end if |
180 featureMatrix2 =cbind(pixelnames, featureMatrix) | 251 featureMatrix2 =cbind(pixelnames, featureMatrix) |
181 colnames(featureMatrix2)[1] = c("mz | spectra") | 252 colnames(featureMatrix2)[1] = c("mz") |
182 featureMatrix2 = t(featureMatrix2) | 253 featureMatrix2 = t(featureMatrix2) |
183 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") | 254 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") |
184 }else{print("There are no spectra with peaks left")} | 255 }else{print("There are no spectra with peaks left")} |
185 | 256 |
186 | 257 |
189 print('monoisotopic peaks') | 260 print('monoisotopic peaks') |
190 ##monoisotopic peaks | 261 ##monoisotopic peaks |
191 | 262 |
192 peaks = monoisotopicPeaks(peaks, minCor=$method.methods_conditional.minCor, tolerance=$method.methods_conditional.tolerance, distance=$method.methods_conditional.distance, size=$method.methods_conditional.size) | 263 peaks = monoisotopicPeaks(peaks, minCor=$method.methods_conditional.minCor, tolerance=$method.methods_conditional.tolerance, distance=$method.methods_conditional.distance, size=$method.methods_conditional.size) |
193 | 264 |
194 ## QC plot | 265 ## QC plot and numbers |
195 plot(peaks[[1]], main="First spectrum after monoisotopic peaks detection") | 266 plot(peaks[[1]], main="First spectrum after monoisotopic peaks detection") |
267 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) | |
268 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) | |
269 maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) | |
270 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) | |
271 monoisotopes = c(minmz, maxmz,maxfeatures, medint) | |
272 QC_numbers= cbind(QC_numbers, monoisotopes) | |
273 vectorofactions = append(vectorofactions, "monoisotopes") | |
196 | 274 |
197 if (length(peaks[!sapply(peaks, isEmpty)])>0){ | 275 if (length(peaks[!sapply(peaks, isEmpty)])>0){ |
198 #if $infile.ext == 'imzml' | 276 #if $infile.ext == 'imzml' |
199 #if str($centroids) == "FALSE" | 277 #if str($centroids) == "FALSE" |
200 featureMatrix <- intensityMatrix(peaks, maldi_data) | 278 featureMatrix <- intensityMatrix(peaks, maldi_data) |
201 #end if | 279 #end if |
202 #else | 280 #else |
203 featureMatrix <- intensityMatrix(peaks) | 281 featureMatrix <- intensityMatrix(peaks) |
204 #end if | 282 #end if |
205 featureMatrix2 =cbind(pixelnames, featureMatrix) | 283 featureMatrix2 =cbind(pixelnames, featureMatrix) |
206 colnames(featureMatrix2)[1] = c("mz | spectra") | 284 colnames(featureMatrix2)[1] = c("mz") |
207 featureMatrix2 = t(featureMatrix2) | 285 featureMatrix2 = t(featureMatrix2) |
208 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") | 286 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") |
209 }else{print("There are no spectra with peaks left")} | 287 }else{print("There are no spectra with peaks left")} |
210 | 288 |
211 #elif str( $method.methods_conditional.method ) == 'Binning': | 289 #elif str( $method.methods_conditional.method ) == 'Binning': |
212 | 290 |
213 print('binning') | 291 print('binning') |
214 ##m/z binning | 292 ##m/z binning |
215 | 293 |
216 peaks <- binPeaks(peaks, tolerance=$method.methods_conditional.bin_tolerance) | 294 peaks <- binPeaks(peaks, tolerance=$method.methods_conditional.bin_tolerance) |
217 ## QC plot | 295 |
296 ## QC plot and numbers | |
218 plot(peaks[[1]], main="First spectrum after binning") | 297 plot(peaks[[1]], main="First spectrum after binning") |
298 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) | |
299 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) | |
300 maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) | |
301 medint =round( median(unlist(lapply(peaks,intensity))), digits=2) | |
302 binned = c(minmz, maxmz,maxfeatures, medint) | |
303 QC_numbers= cbind(QC_numbers, binned) | |
304 vectorofactions = append(vectorofactions, "binned") | |
219 | 305 |
220 if (length(peaks[!sapply(peaks, isEmpty)])>0){ | 306 if (length(peaks[!sapply(peaks, isEmpty)])>0){ |
221 #if $infile.ext == 'imzml' | 307 #if $infile.ext == 'imzml' |
222 #if str($centroids) == "FALSE" | 308 #if str($centroids) == "FALSE" |
223 featureMatrix <- intensityMatrix(peaks, maldi_data) | 309 featureMatrix <- intensityMatrix(peaks, maldi_data) |
227 #end if | 313 #end if |
228 #else | 314 #else |
229 featureMatrix <- intensityMatrix(peaks) | 315 featureMatrix <- intensityMatrix(peaks) |
230 #end if | 316 #end if |
231 featureMatrix2 =cbind(pixelnames, featureMatrix) | 317 featureMatrix2 =cbind(pixelnames, featureMatrix) |
232 colnames(featureMatrix2)[1] = c("mz | spectra") | 318 colnames(featureMatrix2)[1] = c("mz") |
233 featureMatrix2 = t(featureMatrix2) | 319 featureMatrix2 = t(featureMatrix2) |
234 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") | 320 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") |
235 }else{print("There are no spectra with peaks left")} | 321 }else{print("There are no spectra with peaks left")} |
236 | 322 |
237 | 323 |
254 minFrequency=$method.methods_conditional.minFrequency, | 340 minFrequency=$method.methods_conditional.minFrequency, |
255 minNumber=$method.methods_conditional.minNumber, | 341 minNumber=$method.methods_conditional.minNumber, |
256 mergeWhitelists=$method.methods_conditional.mergeWhitelists, label = samples) | 342 mergeWhitelists=$method.methods_conditional.mergeWhitelists, label = samples) |
257 #end if | 343 #end if |
258 | 344 |
259 ##QC plot | 345 ##QC plot and numbers |
260 plot(peaks[[1]], main="First spectrum after m/z filtering") | 346 plot(peaks[[1]], main="First spectrum after m/z filtering") |
347 minmz = round(min(unlist(lapply(peaks,mass))), digits=4) | |
348 maxmz = round(max(unlist(lapply(peaks,mass))), digits=4) | |
349 maxfeatures = round(length(unlist(lapply(peaks,mass)))/length(peaks), digits=2) | |
350 medint = round(median(unlist(lapply(peaks,intensity))), digits=2) | |
351 filtered = c(minmz, maxmz,maxfeatures, medint) | |
352 QC_numbers= cbind(QC_numbers, filtered) | |
353 vectorofactions = append(vectorofactions, "filtered") | |
261 | 354 |
262 if (length(peaks[!sapply(peaks, isEmpty)])>0){ | 355 if (length(peaks[!sapply(peaks, isEmpty)])>0){ |
263 #if $infile.ext == 'imzml' | 356 #if $infile.ext == 'imzml' |
264 #if str($centroids) == "FALSE" | 357 #if str($centroids) == "FALSE" |
265 featureMatrix <- intensityMatrix(peaks, maldi_data) | 358 featureMatrix <- intensityMatrix(peaks, maldi_data) |
266 #end if | 359 #end if |
267 #else | 360 #else |
268 featureMatrix <- intensityMatrix(peaks) | 361 featureMatrix <- intensityMatrix(peaks) |
269 #end if | 362 #end if |
270 featureMatrix2 =cbind(pixelnames, featureMatrix) | 363 featureMatrix2 =cbind(pixelnames, featureMatrix) |
271 colnames(featureMatrix2)[1] = c("mz | spectra") | 364 colnames(featureMatrix2)[1] = c("mz") |
272 featureMatrix2 = t(featureMatrix2) | 365 featureMatrix2 = t(featureMatrix2) |
366 }else{print("There are no spectra with peaks left") | |
367 featureMatrix2 = matrix(0, ncol=1, nrow=1)} | |
273 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") | 368 write.table(featureMatrix2, file="$intensity_matrix", quote = FALSE, row.names = TRUE, col.names=FALSE, sep = "\t") |
274 }else{print("There are no spectra with peaks left")} | |
275 | |
276 #end if | 369 #end if |
277 #end for | 370 #end for |
278 | 371 |
279 if (length(peaks[!sapply(peaks, isEmpty)])>0){ | 372 if (length(peaks[!sapply(peaks, isEmpty)])>0){ |
280 ## mass peaks output | 373 ## mass peaks output |
281 mass_peaks = data.frame(matrix(,ncol=3, nrow=0)) | 374 mass_peaks = data.frame(matrix(,ncol=3, nrow=0)) |
282 for (spectrum in 1:length(peaks)){ | 375 for (spectrum in 1:length(peaks)){ |
283 spectrum_df = data.frame(peaks[[spectrum]]@snr, peaks[[spectrum]]@mass, peaks[[spectrum]]@intensity) | 376 spectrum_df = data.frame(peaks[[spectrum]]@snr, peaks[[spectrum]]@mass, peaks[[spectrum]]@intensity) |
284 spectrum_df\$spectrum_id = rep(pixelnames[[spectrum]], length(peaks[[spectrum]]@mass)) | 377 spectrum_df\$spectrum_id = rep(pixelnames[[spectrum]], length(peaks[[spectrum]]@mass)) |
285 mass_peaks = rbind(mass_peaks,spectrum_df) | 378 mass_peaks = rbind(mass_peaks,spectrum_df) |
286 } | 379 } |
287 colnames(mass_peaks) = c("snr", "mass", "intensity", "spectrum") | 380 colnames(mass_peaks) = c("snr", "mass", "intensity", "spectrum") |
288 write.table(mass_peaks, file="$masspeaks", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") | 381 write.table(mass_peaks, file="$masspeaks", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t") |
289 }else{print("There are no spectra with peaks left")} | 382 }else{print("There are no spectra with peaks left")} |
383 | |
384 ## print table with QC values | |
385 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median\nintensity") | |
386 plot(0,type='n',axes=FALSE,ann=FALSE) | |
387 grid.table(t(QC_numbers)) | |
290 | 388 |
291 dev.off() | 389 dev.off() |
292 | 390 |
293 if (summarized_spectra == FALSE){ | 391 if (summarized_spectra == FALSE){ |
294 #if $infile.ext == 'imzml' | 392 #if $infile.ext == 'imzml' |
295 exportImzMl(peaks, file="out.imzMl", processed=$export_processed) | 393 MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=$export_processed) |
296 #elif $infile.ext == 'tabular' | 394 #elif $infile.ext == 'tabular' |
297 masspeaks_coordinates = matrix(unlist(strsplit(as.character(pixelnames), "\\,")), ncol=2, byrow=TRUE) | 395 masspeaks_coordinates = matrix(unlist(strsplit(as.character(pixelnames), "\\_")), ncol=3, byrow=TRUE) |
298 ## extract x and y values and create the coordinate matrix in case tabular was input | 396 ## extract x and y values and create the coordinate matrix in case tabular was input |
299 peaklist_coordinates = unique(cbind(as.numeric(substring(masspeaks_coordinates[,1], 5, last = 1000000L)), as.numeric(substring(masspeaks_coordinates[,2], 5, last = 1000000L)))) | 397 peaklist_coordinates = unique(cbind(as.numeric(masspeaks_coordinates[,2]), as.numeric(masspeaks_coordinates[,3]))) |
300 exportImzMl(peaks, file="out.imzMl", processed=$export_processed, coordinates=peaklist_coordinates) | 398 exportImzMl(peaks, file="out.imzMl", processed=$export_processed, coordinates=peaklist_coordinates) |
399 #elif $infile.ext == 'rdata' | |
400 MALDIquantForeign::exportImzMl(peaks, file="out.imzMl", processed=$export_processed, coordinates=cardinal_coordinates) | |
301 #end if | 401 #end if |
402 | |
302 } | 403 } |
303 | 404 |
304 ]]> | 405 ]]> |
305 </configfile> | 406 </configfile> |
306 </configfiles> | 407 </configfiles> |
307 <inputs> | 408 <inputs> |
308 <param name="infile" type="data" format="imzml,tabular" label="MS metadata" help="This file is in imzML or tabular format (peak list, peak detection cannot be run again)"/> | 409 <param name="infile" type="data" format="imzml,tabular,rdata" label="Inputfile as imzML or Cardinal MSImageSet saved as RData" help="This file is in imzML or tabular format (peak list, peak detection cannot be run again) or Cardinal MSImageSet saved as RData"/> |
309 <param name="centroids" type="boolean" label="Is the imzML data centroided (picked)" help="Choose Yes if peak detection has already been done. Peak detection cannot be run again on centroided data" truevalue="TRUE" falsevalue="FALSE"/> | 410 <param name="centroids" type="boolean" label="Is the imzML/RData data centroided (picked)" help="Choose Yes if peak detection has already been done. Peak detection cannot be run again on centroided data" truevalue="TRUE" falsevalue="FALSE"/> |
310 <conditional name="restriction_conditional"> | 411 <conditional name="restriction_conditional"> |
311 <param name="restriction" type="select" label="Restrict the preprocessing to coordinates of interest"> | 412 <param name="restriction" type="select" label="Read in only spectra of interest" help="This option only works for imzML files"> |
312 <option value="no_restriction" selected="True">Calculate on entire file</option> | 413 <option value="no_restriction" selected="True">Calculate on entire file</option> |
313 <option value="restrict">Restrict to coordinates of interest</option> | 414 <option value="restrict">Restrict to coordinates of interest</option> |
314 </param> | 415 </param> |
315 <when value="restrict"> | 416 <when value="restrict"> |
316 <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates which should be read" help="x-values in first column, y-values in second column"/> | 417 <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates" help="x-values in first column, y-values in second column"/> |
418 <param name="coordinates_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/> | |
317 </when> | 419 </when> |
318 <when value="no_restriction"/> | 420 <when value="no_restriction"/> |
319 </conditional> | 421 </conditional> |
320 | 422 |
321 <conditional name="tabular_annotation"> | 423 <conditional name="tabular_annotation"> |
322 <param name="load_annotation" type="select" label="Use pixel annotation from tabular file - select in peak detection or filtering step where you want to apply the annotation information"> | 424 <param name="load_annotation" type="select" label="Use pixel annotation from tabular file - select in peak detection or filtering step where annotation should be used"> |
323 <option value="no_annotation" selected="True">pixels belong into one group only</option> | 425 <option value="no_annotation" selected="True">pixels belong into one group only</option> |
324 <option value="yes_annotation">use pixel annotation from a tabular file</option> | 426 <option value="yes_annotation">use pixel annotation from a tabular file</option> |
325 </param> | 427 </param> |
326 <when value="yes_annotation"> | 428 <when value="yes_annotation"> |
327 <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file" | 429 <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file" |
333 </when> | 435 </when> |
334 <when value="no_annotation"/> | 436 <when value="no_annotation"/> |
335 </conditional> | 437 </conditional> |
336 <repeat name="methods" title="Method" min="1"> | 438 <repeat name="methods" title="Method" min="1"> |
337 <conditional name="methods_conditional"> | 439 <conditional name="methods_conditional"> |
338 <param name="method" type="select" label="Select the method you want to apply"> | 440 <param name="method" type="select" label="Select a method"> |
339 <option value="Peak_detection">Peak detection</option> | 441 <option value="Peak_detection">Peak detection</option> |
340 <option value="monoisotopic_peaks">Keep only monoisotopic peaks</option> | 442 <option value="monoisotopic_peaks">Keep only monoisotopic peaks</option> |
341 <option value="Binning">Binning</option> | 443 <option value="Binning">Binning</option> |
342 <option value="Filtering">Filtering</option> | 444 <option value="Filtering">Filtering</option> |
343 </param> | 445 </param> |
367 <param name="bin_tolerance" type="float" value="0.002" label="Peak binning tolerance" | 469 <param name="bin_tolerance" type="float" value="0.002" label="Peak binning tolerance" |
368 help="After the alignment the peak positions (mass) are very similar but not identical. The binning is needed to make similar peak mass values identical."/> | 470 help="After the alignment the peak positions (mass) are very similar but not identical. The binning is needed to make similar peak mass values identical."/> |
369 </when> | 471 </when> |
370 <when value="Filtering"> | 472 <when value="Filtering"> |
371 <param name="minFrequency" type="float" value="0.25" | 473 <param name="minFrequency" type="float" value="0.25" |
372 label="Remove all peaks which occur in less than minFrequency spectra" help="It is a relative threshold."/> | 474 label="Removal of all peaks which occur in less than minFrequency spectra" help="It is a relative threshold. The higher value from relative and absolute threshold is taken. Set one value to zero to be sure it will not be sure."/> |
373 <param name="minNumber" type="float" value="1.0" | 475 <param name="minNumber" type="float" value="1.0" |
374 label="remove all peaks which occur in less than minNumber spectra" help="It is an absolute threshold."/> | 476 label="Removal of all peaks which occur in less than minNumber spectra" help="It is an absolute threshold. The higher value from relative and absolute threshold is taken. Set one value to zero to be sure it will not be sure."/> |
375 <param name="filter_annot_groups" type="boolean" label="Group wise filtering with pixel annotations. If not specified a single group is assumed or when filtering has been done group wise it will automatically be group wise when selecting filtering on all pixel" truevalue="TRUE" falsevalue="FALSE"/> | 477 <param name="filter_annot_groups" type="boolean" label="Group wise filtering with pixel annotations. If not specified a single group is assumed or when filtering has been done group wise it will automatically be group wise when selecting filtering on all pixel" truevalue="TRUE" falsevalue="FALSE"/> |
376 <param name="mergeWhitelists" type="boolean" truevalue="TRUE" falsevalue="FALSE" | 478 <param name="mergeWhitelists" type="boolean" truevalue="TRUE" falsevalue="FALSE" |
377 label="mergeWhitelists" help="if FALSE the filtering criteria are applied groupwise. If TRUE peaks that survive the filtering in one group (level of labels) these peaks are also kept in other groups even if their frequencies are below minFrequency"/> | 479 label="mergeWhitelists" help="if FALSE the filtering criteria are applied groupwise. If TRUE peaks that survive the filtering in one group (level of labels) these peaks are also kept in other groups even if their frequencies are below minFrequency"/> |
378 </when> | 480 </when> |
379 </conditional> | 481 </conditional> |
380 </repeat> | 482 </repeat> |
381 <param name="export_processed" type="boolean" label="Export file as processed imzML" help="otherwise continuous imzML will be exported" checked="true" truevalue="TRUE" falsevalue="FALSE"/> | 483 <param name="export_processed" type="boolean" label="Export file as processed imzML" help="otherwise continuous imzML will be exported" checked="true" truevalue="TRUE" falsevalue="FALSE"/> |
382 </inputs> | 484 </inputs> |
383 <outputs> | 485 <outputs> |
384 <data format="imzml" name="outfile_imzml" label="$infile.display_name peaks" /> | 486 <data format="imzml" name="outfile_imzml" label="$infile.display_name peaks"/> |
385 <data format="pdf" name="plots" from_work_dir="peaks_qc_plot.pdf" label = "$infile.display_name peakdetection QC"/> | 487 <data format="pdf" name="plots" from_work_dir="peaks_qc_plot.pdf" label = "$infile.display_name peakdetection QC"/> |
386 <data format="tabular" name="masspeaks" label="$infile.display_name mass_peaks"/> | 488 <data format="tabular" name="masspeaks" label="$infile.display_name mass_peaks"/> |
387 <data format="tabular" name="intensity_matrix" label="intensity_matrix"/> | 489 <data format="tabular" name="intensity_matrix" label="intensity_matrix"/> |
388 </outputs> | 490 </outputs> |
389 <tests> | 491 <tests> |
412 <output name="plots" file="peakdetection1_QC.pdf" compare="sim_size"/> | 514 <output name="plots" file="peakdetection1_QC.pdf" compare="sim_size"/> |
413 <output name="masspeaks" file="masspeaks1.tabular"/> | 515 <output name="masspeaks" file="masspeaks1.tabular"/> |
414 <output name="intensity_matrix" file="int1.tabular"/> | 516 <output name="intensity_matrix" file="int1.tabular"/> |
415 </test> | 517 </test> |
416 <test> | 518 <test> |
417 <param name="infile" value="masspeaks1_forinput.tabular"/> | 519 <param name="infile" value="masspeaks3_forinput.tabular"/> |
418 <param name="centroids" value="TRUE"/> | 520 <param name="centroids" value="TRUE"/> |
419 <repeat name="methods"> | 521 <repeat name="methods"> |
420 <conditional name="methods_conditional"> | 522 <conditional name="methods_conditional"> |
421 <param name="method" value="monoisotopic_peaks"/> | 523 <param name="method" value="monoisotopic_peaks"/> |
422 <param name="minCor" value="0.60"/> | |
423 <param name="tolerance" value="0.0001"/> | |
424 </conditional> | 524 </conditional> |
425 </repeat> | 525 </repeat> |
426 <output name="plots" file="peakdetection2_QC.pdf" compare="sim_size"/> | 526 <output name="plots" file="peakdetection2_QC.pdf" compare="sim_size"/> |
427 <output name="masspeaks" file="masspeaks2.tabular"/> | 527 <output name="masspeaks" file="masspeaks2.tabular"/> |
428 <output name="intensity_matrix" file="int2.tabular"/> | 528 <output name="intensity_matrix" file="int2.tabular"/> |
455 </conditional> | 555 </conditional> |
456 </repeat> | 556 </repeat> |
457 <repeat name="methods"> | 557 <repeat name="methods"> |
458 <conditional name="methods_conditional"> | 558 <conditional name="methods_conditional"> |
459 <param name="method" value="Filtering"/> | 559 <param name="method" value="Filtering"/> |
460 <param name="bin_tolerance" value="0.01"/> | |
461 <param name="minFrequency" value="0.5"/> | 560 <param name="minFrequency" value="0.5"/> |
462 <param name="minNumber" value="3"/> | 561 <param name="minNumber" value="3"/> |
463 <param name="filter_annot_groups" value="TRUE"/> | 562 <param name="filter_annot_groups" value="TRUE"/> |
464 <param name="mergeWhitelists" value="FALSE"/> | 563 <param name="mergeWhitelists" value="FALSE"/> |
465 </conditional> | 564 </conditional> |
470 </test> | 569 </test> |
471 </tests> | 570 </tests> |
472 <help> | 571 <help> |
473 <![CDATA[ | 572 <![CDATA[ |
474 | 573 |
475 MALDIquant_ provides a complete analysis pipeline for MALDI-TOF and other mass spectrometry data. So far we have only implemented the functionalities for mass spectrometry imaging data. | 574 @MADLI_QUANT_DESCRIPTION@ |
476 | 575 |
477 Input data: | 576 ----- |
478 | 577 |
479 - MSI data as imzML or file (upload via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ | 578 **Input data** |
480 - or MSI data as peak list (tabular file) with the columns named "snr", "mass", "intensity" and "spectrum". To obtain a valid imzML output file spectrum should contain the pixel coordinates in the format: "x = 1, y = 1" | 579 |
481 - optinal tabular file with pixel coordinates to restrict reading of imzML file to coordinates of interest | 580 - MSI data: 3 types of input data can be used: |
482 - optional tabular file with pixel annotations. The annotations can be used to summarize pixels of an imzML file which belong to the same group and detect peaks on average spectra, further steps will be done on average spectra as well and average spectra are exported. If this option was not chosen the filtering tool can use the annotations to filter for peaks within pixel groups (select "Group wise filtering") | 581 |
483 | 582 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ |
484 | 583 - Cardinal "MSImageSet" data saved as .RData |
485 Options: | 584 - MSI data as peak list (tabular file) with the columns named "snr", "mass", "intensity" and "spectrum". The spectrum has to be in the following format: xy_1_1 (for pixel coordinates x1y1). The header must have exactly the four column names. |
486 | 585 |
487 - Peak detection: detection of peaks, only possible with imzML input | 586 :: |
587 | |
588 snr mass intensity spectrum | |
589 5.34 304.16 0.10 xy_1_1 | |
590 12.09 305 0.2 xy_1_1 | |
591 6.80 306.25 0.133 xy_1_1 | |
592 ... | |
593 ... | |
594 | |
595 | |
596 - Optional: Tabular file with pixel coordinates to restrict reading of imzML files to coordinates of interest. The file has to contain x values in the first column and y values in the second columns. Further columns are allowed. Tabular files with any header name or no header at all are supported. | |
597 | |
598 :: | |
599 | |
600 x_coord y_coord | |
601 1 1 | |
602 2 1 | |
603 3 1 | |
604 ... | |
605 ... | |
606 | |
607 | |
608 - Optional: Tabular file(s) containing pixel coordinates and annotation. X and y values in separate columns and the corresponding annotation in a third column. Tabular files with any header name or no header at all are supported. The annotations can be used to summarize pixels of an imzML file which belong to the same group and detect peaks on average spectra, further steps will be done on average spectra as well and average spectra are exported. If this option was not chosen the filtering tool can use the annotations to filter for peaks within pixel groups (select "Group wise filtering"). | |
609 | |
610 :: | |
611 | |
612 x_coord y_coord annotation | |
613 1 1 healthy | |
614 2 1 healthy | |
615 3 1 disease | |
616 ... | |
617 ... | |
618 | |
619 | |
620 **Options** | |
621 | |
622 - Peak detection: detection of peaks, only possible with profile mode input | |
488 - Monoisotopic peaks: detection of monoisotopic peaks | 623 - Monoisotopic peaks: detection of monoisotopic peaks |
489 - Peak binning: After the alignment the peak positions (mass) are very similar but not identical. The binning is needed to make similar peak mass values identical. | 624 - Peak binning: After the alignment the peak positions (m/z) are very similar but not identical. The binning is needed to make similar peak m/z values identical. |
490 - Peak filtering: Removal of less frequent peaks (either with a minimum ratio or with an absolute minimum number of spectra in which the peak has to occur) | 625 - Peak filtering: Removal of less frequent peaks (either with a minimum ratio or with an absolute minimum number of spectra in which the peak has to occur) |
491 | 626 |
492 | 627 |
493 Output: | 628 **Output** |
494 | 629 |
495 - centroided processed or continuous imzML file | 630 - centroided imzML file (processed or continuous), except for peak picking on the average of multiple spectra |
496 - pdf with mass spectra after each preprocessing step | 631 - pdf with mass spectra plots after each preprocessing step |
497 - peak list (tabular file) with the columns "snr", "mass", "intensity" and "spectrum" | 632 - peak list (tabular file) with the columns "snr", "mass", "intensity" and "spectrum" |
498 - tabular file with intensity matrix (m/z in rows and spectra in columns). If the input file was imzML in profile mode the intensities before peak picking are also stored in the matrix . For all other inputs not picked values are set to NA. | 633 - tabular file with intensity matrix (m/z in rows and spectra in columns). If the input file was imzML in profile mode the intensities before peak picking are also stored in the matrix . For all other inputs not picked values are set to NA. For peak picking on the average of multiple spectra, each spectra group is a column with mean intensities for each m/z |
499 | 634 |
500 .. _MALDIquant: http://strimmerlab.org/software/maldiquant/ | 635 .. _MALDIquant: http://strimmerlab.org/software/maldiquant/ |
501 | 636 |
502 ]]> | 637 ]]> |
503 </help> | 638 </help> |