Mercurial > repos > galaxyp > cardinal_preprocessing
comparison preprocessing.xml @ 0:01d12e7e48d3 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 0825a4ccd3ebf4ca8a298326d14f3e7b25ae8415
| author | galaxyp |
|---|---|
| date | Mon, 01 Oct 2018 01:01:04 -0400 |
| parents | |
| children | c8ab1b6f3834 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:01d12e7e48d3 |
|---|---|
| 1 <tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.0"> | |
| 2 <description> | |
| 3 mass spectrometry imaging preprocessing | |
| 4 </description> | |
| 5 <macros> | |
| 6 <import>macros.xml</import> | |
| 7 </macros> | |
| 8 <expand macro="requirements"> | |
| 9 <requirement type="package" version="2.2.1">r-gridextra</requirement> | |
| 10 <requirement type="package" version="0.20-35">r-lattice</requirement> | |
| 11 <requirement type="package" version="2.2.1">r-ggplot2</requirement> | |
| 12 </expand> | |
| 13 <command detect_errors="exit_code"> | |
| 14 <![CDATA[ | |
| 15 | |
| 16 @INPUT_LINKING@ | |
| 17 cat '${cardinal_preprocessing}' && | |
| 18 Rscript '${cardinal_preprocessing}' | |
| 19 | |
| 20 ]]> | |
| 21 </command> | |
| 22 <configfiles> | |
| 23 <configfile name="cardinal_preprocessing"><![CDATA[ | |
| 24 | |
| 25 ################################# load libraries and read file ################# | |
| 26 | |
| 27 library(Cardinal) | |
| 28 library(gridExtra) | |
| 29 library(lattice) | |
| 30 library(ggplot2) | |
| 31 | |
| 32 @READING_MSIDATA@ | |
| 33 | |
| 34 print(paste0("Number of NA in input file: ",sum(is.na(spectra(msidata)[])))) | |
| 35 | |
| 36 | |
| 37 if (sum(spectra(msidata)[]>0, na.rm=TRUE)> 0){ | |
| 38 ######################### preparations for QC report ################# | |
| 39 | |
| 40 maxfeatures = length(features(msidata)) | |
| 41 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | |
| 42 medint = round(median(spectra(msidata)[],na.rm=TRUE), digits=2) | |
| 43 minmz = round(min(mz(msidata)), digits=2) | |
| 44 maxmz = round(max(mz(msidata)), digits=2) | |
| 45 QC_numbers= data.frame(inputdata = c(minmz, maxmz,maxfeatures, medianpeaks, medint)) | |
| 46 vectorofactions = "inputdata" | |
| 47 | |
| 48 ############################### Preprocessing steps ########################### | |
| 49 ############################################################################### | |
| 50 | |
| 51 #for $method in $methods: | |
| 52 | |
| 53 ############################### Normalization ########################### | |
| 54 | |
| 55 #if str( $method.methods_conditional.preprocessing_method ) == 'Normalization': | |
| 56 print('Normalization') | |
| 57 ##normalization | |
| 58 | |
| 59 msidata = normalize(msidata, method="tic") | |
| 60 | |
| 61 ############################### QC ########################### | |
| 62 | |
| 63 maxfeatures = length(features(msidata)) | |
| 64 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE),) | |
| 65 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | |
| 66 minmz = round(min(mz(msidata)), digits=2) | |
| 67 maxmz = round(max(mz(msidata)), digits=2) | |
| 68 normalized = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
| 69 QC_numbers= cbind(QC_numbers, normalized) | |
| 70 vectorofactions = append(vectorofactions, "normalized") | |
| 71 | |
| 72 ############################### Baseline reduction ########################### | |
| 73 | |
| 74 #elif str( $method.methods_conditional.preprocessing_method ) == 'Baseline_reduction': | |
| 75 print('Baseline_reduction') | |
| 76 ##baseline reduction | |
| 77 | |
| 78 msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline, spar=$method.methods_conditional.spar_baseline) | |
| 79 | |
| 80 ############################### QC ########################### | |
| 81 | |
| 82 maxfeatures = length(features(msidata)) | |
| 83 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | |
| 84 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | |
| 85 minmz = round(min(mz(msidata)), digits=2) | |
| 86 maxmz = round(max(mz(msidata)), digits=2) | |
| 87 baseline = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
| 88 QC_numbers= cbind(QC_numbers, baseline) | |
| 89 vectorofactions = append(vectorofactions, "baseline red.") | |
| 90 | |
| 91 ############################### Smoothing ########################### | |
| 92 | |
| 93 #elif str( $method.methods_conditional.preprocessing_method ) == 'Smoothing': | |
| 94 print('Smoothing') | |
| 95 ## Smoothing | |
| 96 | |
| 97 #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian': | |
| 98 print('gaussian smoothing') | |
| 99 | |
| 100 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian) | |
| 101 | |
| 102 #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay': | |
| 103 print('sgolay smoothing') | |
| 104 | |
| 105 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters) | |
| 106 #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma': | |
| 107 print('moving average smoothing') | |
| 108 | |
| 109 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter) | |
| 110 | |
| 111 #end if | |
| 112 | |
| 113 ############################### QC ########################### | |
| 114 | |
| 115 maxfeatures = length(features(msidata)) | |
| 116 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | |
| 117 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | |
| 118 minmz = round(min(mz(msidata)), digits=2) | |
| 119 maxmz = round(max(mz(msidata)), digits=2) | |
| 120 smoothed = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
| 121 QC_numbers= cbind(QC_numbers, smoothed) | |
| 122 vectorofactions = append(vectorofactions, "smoothed") | |
| 123 | |
| 124 ############################### Peak picking ########################### | |
| 125 | |
| 126 #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_picking': | |
| 127 print('Peak_picking') | |
| 128 ## Peakpicking | |
| 129 | |
| 130 ## remove duplicated coordinates, otherwise peak picking will fail | |
| 131 print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed")) | |
| 132 msidata <- msidata[,!duplicated(coord(msidata))] | |
| 133 | |
| 134 #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive': | |
| 135 print('adaptive peakpicking') | |
| 136 | |
| 137 msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, spar=$method.methods_conditional.methods_for_picking.spar_picking) | |
| 138 | |
| 139 #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'limpic': | |
| 140 print('limpic peakpicking') | |
| 141 | |
| 142 msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method, thresh=$method.methods_conditional.methods_for_picking.tresh_picking) | |
| 143 | |
| 144 #elif str( $method.methods_conditional.methods_for_picking.picking_method) == 'simple': | |
| 145 print('simple peakpicking') | |
| 146 | |
| 147 msidata = peakPick(msidata, window = $method.methods_conditional.window_picking, blocks = $method.methods_conditional.blocks_picking, method='$method.methods_conditional.methods_for_picking.picking_method', SNR=$method.methods_conditional.SNR_picking_method) | |
| 148 | |
| 149 #end if | |
| 150 | |
| 151 ############################### QC ########################### | |
| 152 | |
| 153 maxfeatures = length(features(msidata)) | |
| 154 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | |
| 155 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | |
| 156 minmz = round(min(mz(msidata)), digits=2) | |
| 157 maxmz = round(max(mz(msidata)), digits=2) | |
| 158 picked = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
| 159 QC_numbers= cbind(QC_numbers, picked) | |
| 160 vectorofactions = append(vectorofactions, "picked") | |
| 161 | |
| 162 ############################### Peak alignment ########################### | |
| 163 | |
| 164 #elif str( $method.methods_conditional.preprocessing_method ) == 'Peak_alignment': | |
| 165 print('Peak_alignment') | |
| 166 ## Peakalignment | |
| 167 | |
| 168 #if str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_noref': | |
| 169 | |
| 170 align_peak_reference = msidata | |
| 171 | |
| 172 #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_table': | |
| 173 | |
| 174 align_reference_table = read.delim("$method.methods_conditional.align_ref_type.mz_tabular", header = $method.methods_conditional.align_ref_type.align_mass_header, stringsAsFactors = FALSE) | |
| 175 align_reference_column = align_reference_table[,$method.methods_conditional.align_ref_type.align_mass_column] | |
| 176 align_peak_reference = align_reference_column[align_reference_column>=min(mz(msidata)) & align_reference_column<=max(mz(msidata))] | |
| 177 if (length(align_peak_reference) == 0) | |
| 178 {align_peak_reference = 0} | |
| 179 | |
| 180 #elif str( $method.methods_conditional.align_ref_type.align_reference_datatype) == 'align_msidata_ref': | |
| 181 | |
| 182 align_peak_reference = loadRData('$method.methods_conditional.align_ref_type.align_peaks_msidata') | |
| 183 | |
| 184 #end if | |
| 185 | |
| 186 #if str( $method.methods_conditional.methods_for_alignment.alignment_method) == 'diff': | |
| 187 print('diff peakalignment') | |
| 188 | |
| 189 msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',diff.max =$method.methods_conditional.methods_for_alignment.value_diffalignment, units = "$method.methods_conditional.methods_for_alignment.units_diffalignment", ref=align_peak_reference) | |
| 190 | |
| 191 #elif str( $method.methods_conditional.methods_for_alignment.alignment_method) == 'DP': | |
| 192 print('DPpeakalignment') | |
| 193 | |
| 194 msidata = peakAlign(msidata, method='$method.methods_conditional.methods_for_alignment.alignment_method',gap = $method.methods_conditional.methods_for_alignment.gap_DPalignment, ref=align_peak_reference) | |
| 195 | |
| 196 #end if | |
| 197 | |
| 198 ############################### QC ########################### | |
| 199 | |
| 200 maxfeatures = length(features(msidata)) | |
| 201 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | |
| 202 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | |
| 203 minmz = round(min(mz(msidata)), digits=2) | |
| 204 maxmz = round(max(mz(msidata)), digits=2) | |
| 205 aligned = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
| 206 QC_numbers= cbind(QC_numbers, aligned) | |
| 207 vectorofactions = append(vectorofactions, "aligned") | |
| 208 | |
| 209 ############################### Peak filtering ########################### | |
| 210 | |
| 211 #elif str( $method.methods_conditional.preprocessing_method) == 'Peak_filtering': | |
| 212 print('Peak_filtering') | |
| 213 | |
| 214 msidata = peakFilter(msidata, method='freq', freq.min = $method.methods_conditional.frequ_filtering) | |
| 215 | |
| 216 ############################### QC ########################### | |
| 217 | |
| 218 maxfeatures = length(features(msidata)) | |
| 219 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | |
| 220 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | |
| 221 minmz = round(min(mz(msidata)), digits=2) | |
| 222 maxmz = round(max(mz(msidata)), digits=2) | |
| 223 filtered = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
| 224 QC_numbers= cbind(QC_numbers, filtered) | |
| 225 vectorofactions = append(vectorofactions, "filtered") | |
| 226 | |
| 227 ############################### Data reduction ########################### | |
| 228 | |
| 229 #elif str( $method.methods_conditional.preprocessing_method) == 'Data_reduction': | |
| 230 print('Data_reduction') | |
| 231 | |
| 232 #if str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'bin': | |
| 233 print('bin reduction') | |
| 234 | |
| 235 msidata = reduceDimension(msidata, method="bin", width=$method.methods_conditional.methods_for_reduction.bin_width, units="$method.methods_conditional.methods_for_reduction.bin_units", fun=$method.methods_conditional.methods_for_reduction.bin_fun) | |
| 236 | |
| 237 #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'resample': | |
| 238 print('resample reduction') | |
| 239 | |
| 240 msidata = reduceDimension(msidata, method="resample", step=$method.methods_conditional.methods_for_reduction.resample_step) | |
| 241 | |
| 242 #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'peaks': | |
| 243 print('peaks reduction') | |
| 244 | |
| 245 #if str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'table': | |
| 246 | |
| 247 reference_table = read.delim("$method.methods_conditional.methods_for_reduction.ref_type.mz_tabular", header = $method.methods_conditional.methods_for_reduction.ref_type.mass_header, stringsAsFactors = FALSE) | |
| 248 reference_column = reference_table[,$method.methods_conditional.methods_for_reduction.ref_type.mass_column] | |
| 249 peak_reference = reference_column[reference_column>min(mz(msidata)) & reference_column<max(mz(msidata))] | |
| 250 | |
| 251 #elif str( $method.methods_conditional.methods_for_reduction.ref_type.reference_datatype) == 'msidata_ref': | |
| 252 | |
| 253 peak_reference = loadRData('$method.methods_conditional.methods_for_reduction.ref_type.peaks_msidata') | |
| 254 | |
| 255 #end if | |
| 256 | |
| 257 msidata = reduceDimension(msidata, method="peaks", ref=peak_reference, type="$method.methods_conditional.methods_for_reduction.peaks_type") | |
| 258 #end if | |
| 259 ############################### QC ########################### | |
| 260 | |
| 261 maxfeatures = length(features(msidata)) | |
| 262 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | |
| 263 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | |
| 264 minmz = round(min(mz(msidata)), digits=2) | |
| 265 maxmz = round(max(mz(msidata)), digits=2) | |
| 266 reduced = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
| 267 QC_numbers= cbind(QC_numbers, reduced) | |
| 268 vectorofactions = append(vectorofactions, "reduced") | |
| 269 | |
| 270 ############################### Transformation ########################### | |
| 271 | |
| 272 #elif str( $method.methods_conditional.preprocessing_method) == 'Transformation': | |
| 273 print('Transformation') | |
| 274 | |
| 275 #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2': | |
| 276 print('log2 transformation') | |
| 277 | |
| 278 spectra(msidata)[][spectra(msidata)[] ==0] = NA | |
| 279 print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra(msidata)[])))) | |
| 280 spectra(msidata)[] = log2(spectra(msidata)[]) | |
| 281 | |
| 282 #elif str( $method.methods_conditional.transf_conditional.trans_type) == 'sqrt': | |
| 283 print('squareroot transformation') | |
| 284 | |
| 285 spectra(msidata)[] = sqrt(spectra(msidata)[]) | |
| 286 | |
| 287 #end if | |
| 288 | |
| 289 ############################### QC ########################### | |
| 290 | |
| 291 maxfeatures = length(features(msidata)) | |
| 292 medianpeaks = median(colSums(spectra(msidata)[]>0, na.rm=TRUE)) | |
| 293 medint = round(median(spectra(msidata)[], na.rm=TRUE), digits=2) | |
| 294 minmz = round(min(mz(msidata)), digits=2) | |
| 295 maxmz = round(max(mz(msidata)), digits=2) | |
| 296 transformed = c(minmz, maxmz,maxfeatures, medianpeaks, medint) | |
| 297 QC_numbers= cbind(QC_numbers, transformed) | |
| 298 vectorofactions = append(vectorofactions, "transformed") | |
| 299 | |
| 300 #end if | |
| 301 #end for | |
| 302 | |
| 303 ############# Outputs: RData and QC report ############# | |
| 304 ################################################################################ | |
| 305 | |
| 306 print(paste0("Number of NA in output file: ",sum(is.na(spectra(msidata)[])))) | |
| 307 | |
| 308 ## save as (.RData) | |
| 309 save(msidata, file="$msidata_preprocessed") | |
| 310 | |
| 311 ## save QC report | |
| 312 | |
| 313 pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12) | |
| 314 plot(0,type='n',axes=FALSE,ann=FALSE) | |
| 315 title(main=paste("Quality control during preprocessing \n", "Filename:", "$infile.display_name")) | |
| 316 rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median\n# peaks", "median\nintensity") | |
| 317 grid.table(t(QC_numbers)) | |
| 318 | |
| 319 dev.off() | |
| 320 | |
| 321 }else{ | |
| 322 print("inputfile has no intensities > 0") | |
| 323 } | |
| 324 | |
| 325 ]]></configfile> | |
| 326 </configfiles> | |
| 327 <inputs> | |
| 328 <expand macro="reading_msidata"/> | |
| 329 <repeat name="methods" title="Preprocessing" min="1" max="50"> | |
| 330 <conditional name="methods_conditional"> | |
| 331 <param name="preprocessing_method" type="select" label="Preprocessing methods"> | |
| 332 <option value="Normalization" selected="True">Normalization</option> | |
| 333 <option value="Baseline_reduction">Baseline Reduction</option> | |
| 334 <option value="Smoothing">Peak smoothing</option> | |
| 335 <option value="Peak_picking">Peak picking</option> | |
| 336 <option value="Peak_alignment">Peak alignment</option> | |
| 337 <option value="Peak_filtering">Peak filtering</option> | |
| 338 <option value="Data_reduction">Data reduction</option> | |
| 339 <option value="Transformation">Transformation</option> | |
| 340 </param> | |
| 341 <when value="Normalization"/> | |
| 342 <when value="Baseline_reduction"> | |
| 343 <param name="blocks_baseline" type="integer" value="500" | |
| 344 label="Blocks"/> | |
| 345 <param name="spar_baseline" type="float" value="1.0" label="Spar value" | |
| 346 help = "Smoothing parameter for the spline smoothing | |
| 347 applied to the spectrum in order to decide the cutoffs | |
| 348 for throwing away false noise spikes that might occur inside peaks"/> | |
| 349 </when> | |
| 350 <when value="Smoothing"> | |
| 351 <conditional name="methods_for_smoothing"> | |
| 352 <param name="smoothing_method" type="select" label="Smoothing method"> | |
| 353 <option value="gaussian" selected="True">gaussian</option> | |
| 354 <option value="sgolay">Savitsky-Golay</option> | |
| 355 <option value="ma">moving average</option> | |
| 356 </param> | |
| 357 <when value="gaussian"> | |
| 358 <param name="sd_gaussian" type="float" value="2" | |
| 359 label="The standard deviation for the Gaussian kernel (window/sd)"/> | |
| 360 </when> | |
| 361 <when value="sgolay"> | |
| 362 <param name="order_of_filters" type="integer" value="3" | |
| 363 label="The order of the smoothing filter, must be smaller than window size"/> | |
| 364 </when> | |
| 365 <when value="ma"> | |
| 366 <param name="coefficients_ma_filter" type="float" value="1" | |
| 367 label="The coefficients for the moving average filter"/> | |
| 368 </when> | |
| 369 </conditional> | |
| 370 <param name="window_smoothing" type="float" value="8" | |
| 371 label="Window size"/> | |
| 372 </when> | |
| 373 <when value="Peak_picking"> | |
| 374 <param name="SNR_picking_method" type="integer" value="6" | |
| 375 label="Signal to noise ratio" | |
| 376 help="The minimal signal to noise ratio for peaks to be considered as a valid peak."/> | |
| 377 <param name="blocks_picking" type="integer" value="100" label = "Number of blocks" | |
| 378 help="Number of blocks in which to divide mass spectrum to calculate noise"/> | |
| 379 <param name="window_picking" type="float" value="5" label= "Window size" help="Window width for seeking local maxima"/> | |
| 380 <conditional name="methods_for_picking"> | |
| 381 <param name="picking_method" type="select" label="Peak picking method" help="only simple works for processed imzML files"> | |
| 382 <option value="adaptive" selected="True">adaptive</option> | |
| 383 <option value="limpic">limpic</option> | |
| 384 <option value="simple">simple</option> | |
| 385 </param> | |
| 386 <when value="adaptive"> | |
| 387 <param name="spar_picking" type="float" value="1.0" | |
| 388 label="Spar value" | |
| 389 help = "Smoothing parameter for the spline smoothing | |
| 390 applied to the spectrum in order to decide the cutoffs | |
| 391 for throwing away false noise spikes that might occur inside peaks"/> | |
| 392 </when> | |
| 393 <when value="limpic"> | |
| 394 <param name="tresh_picking" type="float" value="0.75" | |
| 395 label="thresh value" help="The thresholding quantile to use when comparing slopes in order to throw away peaks that are too flat"/> | |
| 396 </when> | |
| 397 <when value="simple"/> | |
| 398 </conditional> | |
| 399 </when> | |
| 400 <when value="Peak_alignment"> | |
| 401 <conditional name="methods_for_alignment"> | |
| 402 <param name="alignment_method" type="select" label="Alignment method"> | |
| 403 <option value="diff" selected="True">diff</option> | |
| 404 <option value="DP">DP</option> | |
| 405 </param> | |
| 406 <when value="diff"> | |
| 407 <param name="value_diffalignment" type="float" value="200" | |
| 408 label="diff.max" help="Peaks that differ less than this value will be aligned together"/> | |
| 409 <param name="units_diffalignment" type="select" display="radio" optional="False" label="units"> | |
| 410 <option value="ppm" selected="True">ppm</option> | |
| 411 <option value="Da">m/z</option> | |
| 412 </param> | |
| 413 </when> | |
| 414 <when value="DP"> | |
| 415 <param name="gap_DPalignment" type="float" value="0" | |
| 416 label="Gap" help="The gap penalty for the dynamic programming sequence alignment"/> | |
| 417 </when> | |
| 418 </conditional> | |
| 419 <conditional name="align_ref_type"> | |
| 420 <param name="align_reference_datatype" type="select" label="Choose reference"> | |
| 421 <option value="align_noref" selected="True">no reference</option> | |
| 422 <option value="align_table" >tabular file as reference</option> | |
| 423 <option value="align_msidata_ref">msidata file as reference</option> | |
| 424 </param> | |
| 425 <when value="align_noref"/> | |
| 426 <when value="align_table"> | |
| 427 <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to use for alignment. Only the m/z values from the tabular file will be kept."/> | |
| 428 </when> | |
| 429 <when value="align_msidata_ref"> | |
| 430 <param name="align_peaks_msidata" type="data" format="rdata," label="Picked and aligned Cardinal MSImageSet saved as RData"/> | |
| 431 </when> | |
| 432 </conditional> | |
| 433 </when> | |
| 434 <when value="Peak_filtering"> | |
| 435 <param name="frequ_filtering" type="integer" value="1000" | |
| 436 label="Freq.min" help="Peaks that occur in the dataset fewer times than this will be removed. Number should be between 1 (no filtering) and number of spectra (pixel)"/> | |
| 437 </when> | |
| 438 <when value="Data_reduction"> | |
| 439 <conditional name="methods_for_reduction"> | |
| 440 <param name="reduction_method" type="select" label="Reduction method"> | |
| 441 <option value="bin" selected="True">bin</option> | |
| 442 <option value="resample">resample</option> | |
| 443 <option value="peaks">peaks</option> | |
| 444 </param> | |
| 445 <when value="bin"> | |
| 446 <param name="bin_width" type="float" value="1" | |
| 447 label="The width of a bin in m/z or ppm" help="Width must be greater than range of m/z values divided by number of m/z features"/> | |
| 448 <param name="bin_units" type="select" display="radio" | |
| 449 label="Unit for bin"> | |
| 450 <option value="mz" selected="True">mz</option> | |
| 451 <option value="ppm">ppm</option> | |
| 452 </param> | |
| 453 <param name="bin_fun" type="select" display="radio" | |
| 454 label="Calculate sum or mean intensity for ions of the same bin"> | |
| 455 <option value="mean" selected="True">mean</option> | |
| 456 <option value="sum">sum</option> | |
| 457 </param> | |
| 458 </when> | |
| 459 <when value="resample"> | |
| 460 <param name="resample_step" type="float" value="1" | |
| 461 label="The step size in m/z" help="Step size must be greater than range of m/z values divided by number of m/z features"/> | |
| 462 </when> | |
| 463 <when value="peaks"> | |
| 464 <param name="peaks_type" type="select" display="radio" | |
| 465 label="Should the peak height or area under the curve be taken as the intensity value?"> | |
| 466 <option value="height" selected="True">height</option> | |
| 467 <option value="area">area</option> | |
| 468 </param> | |
| 469 <conditional name="ref_type"> | |
| 470 <param name="reference_datatype" type="select" label="Choose reference datatype"> | |
| 471 <option value="table" selected="True">tabular file</option> | |
| 472 <option value="msidata_ref">msidata file</option> | |
| 473 </param> | |
| 474 <when value="table"> | |
| 475 <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to extract from input file"/> | |
| 476 </when> | |
| 477 <when value="msidata_ref"> | |
| 478 <param name="peaks_msidata" type="data" format="rdata," label="Picked and aligned Cardinal MSImageSet saved as RData"/> | |
| 479 </when> | |
| 480 </conditional> | |
| 481 </when> | |
| 482 </conditional> | |
| 483 </when> | |
| 484 <when value="Transformation"> | |
| 485 <conditional name="transf_conditional"> | |
| 486 <param name="trans_type" type="select" label="Intensity transformations" help="logarithm base 2 (log2) or squareroot (sqrt)"> | |
| 487 <option value="log2" selected="True">log2</option> | |
| 488 <option value="sqrt">sqrt</option> | |
| 489 </param> | |
| 490 <when value="log2"/> | |
| 491 <when value="sqrt"/> | |
| 492 </conditional> | |
| 493 </when> | |
| 494 </conditional> | |
| 495 </repeat> | |
| 496 </inputs> | |
| 497 <outputs> | |
| 498 <data format="rdata" name="msidata_preprocessed" label="${tool.name} on ${on_string}"/> | |
| 499 <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "${tool.name} on ${on_string}: QC"/> | |
| 500 </outputs> | |
| 501 <tests> | |
| 502 <test> | |
| 503 <expand macro="infile_imzml"/> | |
| 504 <repeat name="methods"> | |
| 505 <conditional name="methods_conditional"> | |
| 506 <param name="preprocessing_method" value="Normalization"/> | |
| 507 <conditional name="methods_for_normalization"> | |
| 508 <param name="normalization_method" value="median"/> | |
| 509 </conditional> | |
| 510 </conditional> | |
| 511 </repeat> | |
| 512 <repeat name="methods"> | |
| 513 <conditional name="methods_conditional"> | |
| 514 <param name="preprocessing_method" value="Smoothing"/> | |
| 515 <conditional name="methods_for_smoothing"> | |
| 516 <param name="smoothing_method" value="gaussian"/> | |
| 517 <param name="sd_gaussian" value="4"/> | |
| 518 </conditional> | |
| 519 <param name="window_smoothing" value="9"/> | |
| 520 </conditional> | |
| 521 </repeat> | |
| 522 <repeat name="methods"> | |
| 523 <conditional name="methods_conditional"> | |
| 524 <param name="preprocessing_method" value="Peak_picking"/> | |
| 525 <conditional name="methods_for_picking"> | |
| 526 <param name="picking_method" value="adaptive"/> | |
| 527 </conditional> | |
| 528 <param name="blocks_picking" value="3"/> | |
| 529 <param name="window_picking" value="3"/> | |
| 530 <param name="SNR_picking_method" value="3"/> | |
| 531 </conditional> | |
| 532 </repeat> | |
| 533 <repeat name="methods"> | |
| 534 <conditional name="methods_conditional"> | |
| 535 <param name="preprocessing_method" value="Peak_alignment"/> | |
| 536 <conditional name="methods_for_alignment"> | |
| 537 <param name="alignment_method" value="diff"/> | |
| 538 </conditional> | |
| 539 </conditional> | |
| 540 </repeat> | |
| 541 <repeat name="methods"> | |
| 542 <conditional name="methods_conditional"> | |
| 543 <param name="preprocessing_method" value="Peak_filtering"/> | |
| 544 <param name="frequ_filtering" value="2"/> | |
| 545 </conditional> | |
| 546 </repeat> | |
| 547 <repeat name="methods"> | |
| 548 <conditional name="methods_conditional"> | |
| 549 <param name="preprocessing_method" value="Transformation"/> | |
| 550 <conditional name="transf_conditional"> | |
| 551 <param name="trans_type" value="sqrt"/> | |
| 552 </conditional> | |
| 553 </conditional> | |
| 554 </repeat> | |
| 555 <output name="msidata_preprocessed" file="preprocessing_results1.RData" compare="sim_size"/> | |
| 556 <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/> | |
| 557 </test> | |
| 558 <test> | |
| 559 <param name="infile" value="3_files_combined.RData" ftype="rdata"/> | |
| 560 <repeat name="methods"> | |
| 561 <conditional name="methods_conditional"> | |
| 562 <param name="preprocessing_method" value="Peak_picking"/> | |
| 563 <param name="blocks_picking" value="3"/> | |
| 564 <param name="window_picking" value="5"/> | |
| 565 <param name="SNR_picking_method" value="2"/> | |
| 566 <conditional name="methods_for_picking"> | |
| 567 <param name="picking_method" value="adaptive"/> | |
| 568 </conditional> | |
| 569 </conditional> | |
| 570 </repeat> | |
| 571 <repeat name="methods"> | |
| 572 <conditional name="methods_conditional"> | |
| 573 <param name="preprocessing_method" value="Peak_alignment"/> | |
| 574 <conditional name="methods_for_alignment"> | |
| 575 <param name="alignment_method" value="DP"/> | |
| 576 </conditional> | |
| 577 </conditional> | |
| 578 </repeat> | |
| 579 <output name="msidata_preprocessed" file="preprocessing_results2.RData" compare="sim_size"/> | |
| 580 <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/> | |
| 581 </test> | |
| 582 <test> | |
| 583 <expand macro="infile_analyze75"/> | |
| 584 <repeat name="methods"> | |
| 585 <conditional name="methods_conditional"> | |
| 586 <param name="preprocessing_method" value="Normalization"/> | |
| 587 <conditional name="methods_for_normalization"> | |
| 588 <param name="normalization_method" value="median"/> | |
| 589 </conditional> | |
| 590 </conditional> | |
| 591 </repeat> | |
| 592 <repeat name="methods"> | |
| 593 <conditional name="methods_conditional"> | |
| 594 <param name="preprocessing_method" value="Peak_picking"/> | |
| 595 <param name="blocks_picking" value="100"/> | |
| 596 <param name="window_picking" value="5"/> | |
| 597 <param name="SNR_picking_method" value="3"/> | |
| 598 <param name="picking_method" value="limpic"/> | |
| 599 </conditional> | |
| 600 </repeat> | |
| 601 <repeat name="methods"> | |
| 602 <conditional name="methods_conditional"> | |
| 603 <param name="preprocessing_method" value="Peak_alignment"/> | |
| 604 <conditional name="methods_for_alignment"> | |
| 605 <param name="alignment_method" value="diff"/> | |
| 606 </conditional> | |
| 607 </conditional> | |
| 608 </repeat> | |
| 609 <output name="msidata_preprocessed" file="preprocessing_results3.RData" compare="sim_size"/> | |
| 610 <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/> | |
| 611 </test> | |
| 612 <test> | |
| 613 <expand macro="infile_analyze75"/> | |
| 614 <repeat name="methods"> | |
| 615 <conditional name="methods_conditional"> | |
| 616 <param name="preprocessing_method" value="Normalization"/> | |
| 617 </conditional> | |
| 618 </repeat> | |
| 619 <repeat name="methods"> | |
| 620 <conditional name="methods_conditional"> | |
| 621 <param name="preprocessing_method" value="Data_reduction"/> | |
| 622 <param name="bin_width" value="0.1"/> | |
| 623 </conditional> | |
| 624 </repeat> | |
| 625 <output name="msidata_preprocessed" file="preprocessing_results4.RData" compare="sim_size"/> | |
| 626 <output name="QC_overview" file="preprocessing_results4.pdf" compare="sim_size"/> | |
| 627 </test> | |
| 628 <test> | |
| 629 <expand macro="infile_imzml"/> | |
| 630 <repeat name="methods"> | |
| 631 <conditional name="methods_conditional"> | |
| 632 <param name="preprocessing_method" value="Data_reduction"/> | |
| 633 <conditional name="methods_for_reduction"> | |
| 634 <param name="reduction_method" value="resample"/> | |
| 635 <param name="step_width" value="0.1"/> | |
| 636 </conditional> | |
| 637 </conditional> | |
| 638 </repeat> | |
| 639 <output name="msidata_preprocessed" file="preprocessing_results5.RData" compare="sim_size"/> | |
| 640 <output name="QC_overview" file="preprocessing_results5.pdf" compare="sim_size"/> | |
| 641 </test> | |
| 642 </tests> | |
| 643 <help> | |
| 644 <![CDATA[ | |
| 645 | |
| 646 @CARDINAL_DESCRIPTION@ | |
| 647 | |
| 648 ----- | |
| 649 | |
| 650 This tool provides multiple Cardinal functions to preprocess mass spectrometry imaging data. | |
| 651 | |
| 652 @MSIDATA_INPUT_DESCRIPTION@ | |
| 653 - Coordinates stored as decimals rather than integers will be rounded to obtain a regular pixel grid. This might lead to duplicated coordinates which will be automatically removed before peak picking. | |
| 654 @MZ_TABULAR_INPUT_DESCRIPTION@ | |
| 655 | |
| 656 **Options** | |
| 657 | |
| 658 - Normalization: Normalization of intensities to total ion current (TIC) | |
| 659 - Baseline reduction: Baseline reduction removes background intensity generated by chemical noise (common in MALDI datasets) | |
| 660 - Smoothing: Smoothing of the peaks reduces noise and improves peak detection | |
| 661 - Peak picking: relevant peaks are picked while noise-peaks are removed (needs peak alignment afterwards) | |
| 662 - Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value; if no reference is given the peaks are aligned to the local maxima of the mean spectrum of the current dataset; external reference data can be used from another MSI data file or a tabular file with m/z values, but then only the m/z from the reference will be kept | |
| 663 - Peak filtering: removes peaks that occur only in a small proportion of pixels. If not sure which cut off to choose run quality control tool first and decide according to the number of peaks per m/z plot | |
| 664 - Data reduction: binning, resampling or peak filtering to reduce data | |
| 665 - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. | |
| 666 | |
| 667 **Tips** | |
| 668 | |
| 669 - Peak alignment works only after peak picking | |
| 670 - Peak filtering works only on centroided data (peak picking and alignment or Data reduction peaks | |
| 671 | |
| 672 **Output** | |
| 673 | |
| 674 - imzML file, preprocessed | |
| 675 - pdf with key values after each processing step | |
| 676 | |
| 677 ]]> | |
| 678 </help> | |
| 679 <expand macro="citations"/> | |
| 680 </tool> |
