Mercurial > repos > galaxyp > msi_preprocessing
comparison msi_preprocessing.xml @ 9:22521948fa15 draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_preprocessing commit 5feaf3d0e0da8cef1241fecc1f4d6f81324594e6
| author | galaxyp |
|---|---|
| date | Wed, 22 Aug 2018 13:34:00 -0400 |
| parents | 761852b6b3b8 |
| children |
comparison
equal
deleted
inserted
replaced
| 8:761852b6b3b8 | 9:22521948fa15 |
|---|---|
| 1 <tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.5"> | 1 <tool id="mass_spectrometry_imaging_preprocessing" name="MSI preprocessing" version="1.10.0.6"> |
| 2 <description> | 2 <description> |
| 3 mass spectrometry imaging preprocessing | 3 mass spectrometry imaging preprocessing |
| 4 </description> | 4 </description> |
| 5 <requirements> | 5 <requirements> |
| 6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> | 6 <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> |
| 43 msidata <- readImzML('infile', attach.only=TRUE) | 43 msidata <- readImzML('infile', attach.only=TRUE) |
| 44 #end if | 44 #end if |
| 45 #elif $infile.ext == 'analyze75' | 45 #elif $infile.ext == 'analyze75' |
| 46 msidata = readAnalyze('infile', attach.only=TRUE) | 46 msidata = readAnalyze('infile', attach.only=TRUE) |
| 47 #else | 47 #else |
| 48 load('infile.RData') | 48 loadRData <- function(fileName){ |
| 49 load(fileName) | |
| 50 get(ls()[ls() != "fileName"]) | |
| 51 } | |
| 52 msidata = loadRData('infile.RData') | |
| 49 #end if | 53 #end if |
| 50 | 54 |
| 51 print(paste0("Number of NA in input file: ",sum(is.na(spectra(msidata)[])))) | 55 print(paste0("Number of NA in input file: ",sum(is.na(spectra(msidata)[])))) |
| 52 | 56 |
| 53 ## function to later read RData reference files in | 57 ## function to later read RData reference files in |
| 62 ######################### preparations for QC report ################# | 66 ######################### preparations for QC report ################# |
| 63 | 67 |
| 64 maxfeatures = length(features(msidata)) | 68 maxfeatures = length(features(msidata)) |
| 65 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | 69 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) |
| 66 medint = round(median(spectra(msidata)[],na.rm=TRUE), digits=2) | 70 medint = round(median(spectra(msidata)[],na.rm=TRUE), digits=2) |
| 67 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) | 71 minmz = round(min(mz(msidata)), digits=2) |
| 68 QC_numbers= data.frame(inputdata = c(maxfeatures, medianpeaks, medint, TICs)) | 72 maxmz = round(max(mz(msidata)), digits=2) |
| 73 QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, medianpeaks, medint)) | |
| 69 vectorofactions = "inputdata" | 74 vectorofactions = "inputdata" |
| 70 | 75 |
| 71 ############################### Preprocessing steps ########################### | 76 ############################### Preprocessing steps ########################### |
| 72 ############################################################################### | 77 ############################################################################### |
| 73 | 78 |
| 84 ############################### QC ########################### | 89 ############################### QC ########################### |
| 85 | 90 |
| 86 maxfeatures = length(features(msidata)) | 91 maxfeatures = length(features(msidata)) |
| 87 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE),) | 92 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE),) |
| 88 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | 93 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) |
| 89 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) | 94 minmz = round(min(mz(msidata)), digits=2) |
| 90 normalized = c(maxfeatures, medianpeaks, medint, TICs) | 95 maxmz = round(max(mz(msidata)), digits=2) |
| 96 normalized = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
| 91 QC_numbers= cbind(QC_numbers, normalized) | 97 QC_numbers= cbind(QC_numbers, normalized) |
| 92 vectorofactions = append(vectorofactions, "normalized") | 98 vectorofactions = append(vectorofactions, "normalized") |
| 93 | 99 |
| 94 ############################### Baseline reduction ########################### | 100 ############################### Baseline reduction ########################### |
| 95 | 101 |
| 102 ############################### QC ########################### | 108 ############################### QC ########################### |
| 103 | 109 |
| 104 maxfeatures = length(features(msidata)) | 110 maxfeatures = length(features(msidata)) |
| 105 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | 111 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) |
| 106 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | 112 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) |
| 107 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) | 113 minmz = round(min(mz(msidata)), digits=2) |
| 108 baseline= c(maxfeatures, medianpeaks, medint, TICs) | 114 maxmz = round(max(mz(msidata)), digits=2) |
| 115 baseline = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
| 109 QC_numbers= cbind(QC_numbers, baseline) | 116 QC_numbers= cbind(QC_numbers, baseline) |
| 110 vectorofactions = append(vectorofactions, "baseline red.") | 117 vectorofactions = append(vectorofactions, "baseline red.") |
| 111 | 118 |
| 112 ############################### Smoothing ########################### | 119 ############################### Smoothing ########################### |
| 113 | 120 |
| 134 ############################### QC ########################### | 141 ############################### QC ########################### |
| 135 | 142 |
| 136 maxfeatures = length(features(msidata)) | 143 maxfeatures = length(features(msidata)) |
| 137 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | 144 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) |
| 138 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | 145 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) |
| 139 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) | 146 minmz = round(min(mz(msidata)), digits=2) |
| 140 smoothed= c(maxfeatures, medianpeaks, medint, TICs) | 147 maxmz = round(max(mz(msidata)), digits=2) |
| 148 smoothed = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
| 141 QC_numbers= cbind(QC_numbers, smoothed) | 149 QC_numbers= cbind(QC_numbers, smoothed) |
| 142 vectorofactions = append(vectorofactions, "smoothed") | 150 vectorofactions = append(vectorofactions, "smoothed") |
| 143 | 151 |
| 144 ############################### Peak picking ########################### | 152 ############################### Peak picking ########################### |
| 145 | 153 |
| 146 #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking': | 154 #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking': |
| 147 print('Peak_picking') | 155 print('Peak_picking') |
| 148 ## Peakpicking | 156 ## Peakpicking |
| 149 | 157 |
| 150 | |
| 151 ## remove duplicated coordinates, otherwise peak picking will fail | 158 ## remove duplicated coordinates, otherwise peak picking will fail |
| 152 print(paste0(sum(duplicated(coord(msidata))), " coordinates were removed")) | 159 print(paste0(sum(duplicated(coord(msidata))), " coordinates were removed")) |
| 153 msidata <- msidata[,!duplicated(coord(msidata))] | 160 msidata <- msidata[,!duplicated(coord(msidata))] |
| 154 | 161 |
| 155 #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive': | 162 #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive': |
| 172 ############################### QC ########################### | 179 ############################### QC ########################### |
| 173 | 180 |
| 174 maxfeatures = length(features(msidata)) | 181 maxfeatures = length(features(msidata)) |
| 175 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | 182 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) |
| 176 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | 183 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) |
| 177 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) | 184 minmz = round(min(mz(msidata)), digits=2) |
| 178 picked= c(maxfeatures, medianpeaks, medint, TICs) | 185 maxmz = round(max(mz(msidata)), digits=2) |
| 186 picked = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
| 179 QC_numbers= cbind(QC_numbers, picked) | 187 QC_numbers= cbind(QC_numbers, picked) |
| 180 vectorofactions = append(vectorofactions, "picked") | 188 vectorofactions = append(vectorofactions, "picked") |
| 181 | 189 |
| 182 ############################### Peak alignment ########################### | 190 ############################### Peak alignment ########################### |
| 183 | 191 |
| 218 ############################### QC ########################### | 226 ############################### QC ########################### |
| 219 | 227 |
| 220 maxfeatures = length(features(msidata)) | 228 maxfeatures = length(features(msidata)) |
| 221 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | 229 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) |
| 222 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | 230 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) |
| 223 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) | 231 minmz = round(min(mz(msidata)), digits=2) |
| 224 aligned= c(maxfeatures, medianpeaks, medint, TICs) | 232 maxmz = round(max(mz(msidata)), digits=2) |
| 233 aligned = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
| 225 QC_numbers= cbind(QC_numbers, aligned) | 234 QC_numbers= cbind(QC_numbers, aligned) |
| 226 vectorofactions = append(vectorofactions, "aligned") | 235 vectorofactions = append(vectorofactions, "aligned") |
| 227 | 236 |
| 228 ############################### Peak filtering ########################### | 237 ############################### Peak filtering ########################### |
| 229 | 238 |
| 235 ############################### QC ########################### | 244 ############################### QC ########################### |
| 236 | 245 |
| 237 maxfeatures = length(features(msidata)) | 246 maxfeatures = length(features(msidata)) |
| 238 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | 247 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) |
| 239 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | 248 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) |
| 240 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) | 249 minmz = round(min(mz(msidata)), digits=2) |
| 241 filtered= c(maxfeatures, medianpeaks, medint, TICs) | 250 maxmz = round(max(mz(msidata)), digits=2) |
| 251 filtered = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
| 242 QC_numbers= cbind(QC_numbers, filtered) | 252 QC_numbers= cbind(QC_numbers, filtered) |
| 243 vectorofactions = append(vectorofactions, "filtered") | 253 vectorofactions = append(vectorofactions, "filtered") |
| 244 | 254 |
| 245 ############################### Data reduction ########################### | 255 ############################### Data reduction ########################### |
| 246 | 256 |
| 277 ############################### QC ########################### | 287 ############################### QC ########################### |
| 278 | 288 |
| 279 maxfeatures = length(features(msidata)) | 289 maxfeatures = length(features(msidata)) |
| 280 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | 290 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) |
| 281 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | 291 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) |
| 282 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) | 292 minmz = round(min(mz(msidata)), digits=2) |
| 283 reduced= c(maxfeatures, medianpeaks, medint, TICs) | 293 maxmz = round(max(mz(msidata)), digits=2) |
| 294 reduced = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
| 284 QC_numbers= cbind(QC_numbers, reduced) | 295 QC_numbers= cbind(QC_numbers, reduced) |
| 285 vectorofactions = append(vectorofactions, "reduced") | 296 vectorofactions = append(vectorofactions, "reduced") |
| 286 | 297 |
| 287 ############################### Transformation ########################### | 298 ############################### Transformation ########################### |
| 288 | 299 |
| 306 ############################### QC ########################### | 317 ############################### QC ########################### |
| 307 | 318 |
| 308 maxfeatures = length(features(msidata)) | 319 maxfeatures = length(features(msidata)) |
| 309 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | 320 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) |
| 310 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | 321 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) |
| 311 TICs = round(mean(colSums(spectra(msidata)[], na.rm=TRUE)), digits=1) | 322 minmz = round(min(mz(msidata)), digits=2) |
| 312 transformed= c(maxfeatures, medianpeaks, medint, TICs) | 323 maxmz = round(max(mz(msidata)), digits=2) |
| 324 transformed = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
| 313 QC_numbers= cbind(QC_numbers, transformed) | 325 QC_numbers= cbind(QC_numbers, transformed) |
| 314 vectorofactions = append(vectorofactions, "transformed") | 326 vectorofactions = append(vectorofactions, "transformed") |
| 315 | 327 |
| 316 #end if | 328 #end if |
| 317 #end for | 329 #end for |
| 434 ## save QC report | 446 ## save QC report |
| 435 | 447 |
| 436 pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12) | 448 pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12) |
| 437 plot(0,type='n',axes=FALSE,ann=FALSE) | 449 plot(0,type='n',axes=FALSE,ann=FALSE) |
| 438 title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name")) | 450 title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name")) |
| 439 rownames(QC_numbers) = c("# features", "median # peaks", "median intensity", "median TIC") | 451 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median\n# peaks", "median\nintensity") |
| 440 grid.table(t(QC_numbers)) | 452 grid.table(t(QC_numbers)) |
| 441 | 453 |
| 442 #if str($tabular_annotation.load_annotation) == 'yes_annotation': | 454 #if str($tabular_annotation.load_annotation) == 'yes_annotation': |
| 443 | 455 |
| 444 ## the more annotation groups a file has the smaller will be the legend | 456 ## the more annotation groups a file has the smaller will be the legend |
| 889 | 901 |
| 890 Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_ | 902 Cardinal is an R package that implements statistical & computational tools for analyzing mass spectrometry imaging datasets. `More information on Cardinal <http://cardinalmsi.org//>`_ |
| 891 | 903 |
| 892 This tool provides provides multiple Cardinal functions to preprocess mass spectrometry imaging data. | 904 This tool provides provides multiple Cardinal functions to preprocess mass spectrometry imaging data. |
| 893 | 905 |
| 894 Input data: 3 types of input data can be used: | 906 Input data: 3 types of MSI data can be used: |
| 895 | 907 |
| 896 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ | 908 - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ |
| 897 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) | 909 - Analyze7.5 (upload hdr, img and t2m file via the "composite" function) |
| 898 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) | 910 - Cardinal "MSImageSet" data (with variable name "msidata", saved as .RData) |
| 911 - Coordinates stored as decimals rather than integers will be rounded to obtain a regular pixel grid. This might lead to duplicated coordinates which will be automatically removed before peak picking. | |
| 899 - optional: tabular file with pixel annotations: x and y values in separate columns and the corresponding annotation in a third column | 912 - optional: tabular file with pixel annotations: x and y values in separate columns and the corresponding annotation in a third column |
| 900 | 913 |
| 901 Options: | 914 Options: |
| 902 | 915 |
| 903 - Normalization: Normalization of intensities to total ion current (TIC) | 916 - Normalization: Normalization of intensities to total ion current (TIC) |
| 904 - Baseline reduction: Baseline reduction removes backgroundintensity generated by chemical noise (common in MALDI datasets) | 917 - Baseline reduction: Baseline reduction removes background intensity generated by chemical noise (common in MALDI datasets) |
| 905 - Smoothening: Smoothing of the peaks reduces noise and improves peak detection | 918 - Smoothing: Smoothing of the peaks reduces noise and improves peak detection |
| 906 - Peak picking: relevant peaks are picked while noise-peaks are removed (needs peak alignment afterwards) | 919 - Peak picking: relevant peaks are picked while noise-peaks are removed (needs peak alignment afterwards) |
| 907 - Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value | 920 - Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value |
| 908 - Peak filtering: works only on centroided data (after peak picking and alignment or data reduction with peak filtering), removes peaks that occur only in a small proportion of pixels. If not sure which cutoff to chose run qualitycontrol first and decide according to the zero value plot. | 921 - Peak filtering: works only on centroided data (after peak picking and alignment or data reduction with peak filtering), removes peaks that occur only in a small proportion of pixels. If not sure which cutoff to chose run qualitycontrol first and decide according to the zero value plot. |
| 909 - Data reduction: binning, resampling or peak filtering to reduce data | 922 - Data reduction: binning, resampling or peak filtering to reduce data |
| 910 - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. | 923 - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. |
| 912 | 925 |
| 913 Output: | 926 Output: |
| 914 | 927 |
| 915 - imzML file, preprocessed | 928 - imzML file, preprocessed |
| 916 - pdf with key values after each processing step, in case of loaded annotations file overview plot of pixel annotations | 929 - pdf with key values after each processing step, in case of loaded annotations file overview plot of pixel annotations |
| 917 - optional: intensity matrix as tabular file (intensities for m/z in rows and pixel in columns) | 930 - optional: intensity matrix as tabular file (m/z in rows and pixel in columns, filled with intensity values) |
| 918 - optional: summarized intensity matrix: mean, median or standard deviation for each m/z feature; in case pixel annotations are provided the intensity values are summarized for each pixel group | 931 - optional: summarized intensity matrix: mean, median or standard deviation for each m/z feature; in case pixel annotations are provided the intensity values are summarized for each pixel group |
| 919 | 932 |
| 920 Tip: | 933 Tip: |
| 921 | 934 |
| 922 - Peak alignment works only after peak picking | 935 - Peak alignment works only after peak picking |
