Mercurial > repos > galaxyp > cardinal_preprocessing
diff preprocessing.xml @ 9:e0bbaf9f7da0 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit e499c9124d3fd85a7fc47b95c206ce91a5e3678c-dirty"
| author | galaxyp |
|---|---|
| date | Tue, 03 Nov 2020 22:41:21 +0000 |
| parents | 413a1b74f496 |
| children | aa479a0cfb43 |
line wrap: on
line diff
--- a/preprocessing.xml Wed May 13 17:56:03 2020 +0000 +++ b/preprocessing.xml Tue Nov 03 22:41:21 2020 +0000 @@ -1,4 +1,4 @@ -<tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.2"> +<tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.1"> <description> mass spectrometry imaging preprocessing </description> @@ -7,7 +7,7 @@ </macros> <expand macro="requirements"> <requirement type="package" version="2.3">r-gridextra</requirement> - <requirement type="package" version="3.2.1">r-ggplot2</requirement> + <requirement type="package" version="3.3.2">r-ggplot2</requirement> </expand> <command detect_errors="exit_code"> <![CDATA[ @@ -74,7 +74,7 @@ ## Choose random spectra for QC plots random_spectra = sample(pixels(msidata), 4, replace=FALSE) par(oma=c(0,0,2,0)) - print(plot(msidata, pixel=random_spectra)) + print(plot(msidata, pixel=random_spectra, col="black")) title("Input spectra", outer=TRUE, line=0) ############################### Preprocessing steps ########################### @@ -88,10 +88,6 @@ print('Normalization') ##normalization - if (class(msidata) == "MSProcessedImagingExperiment"){ - msidata = as(msidata, "MSContinuousImagingExperiment") - } - msidata = normalize(msidata, method="$method.methods_conditional.methods_for_normalization.normalization_method") msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) @@ -105,7 +101,7 @@ normalized = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, normalized) vectorofactions = append(vectorofactions, "normalized") - print(plot(msidata, pixel=random_spectra)) + print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after normalization", outer=TRUE, line=0) ############################### Baseline reduction ########################### @@ -114,10 +110,6 @@ print('Baseline_reduction') ##baseline reduction - if (class(msidata) == "MSProcessedImagingExperiment"){ - msidata = as(msidata, "MSContinuousImagingExperiment") - } - msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline, spar=$method.methods_conditional.spar_baseline) msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) @@ -130,7 +122,7 @@ baseline = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, baseline) vectorofactions = append(vectorofactions, "baseline red.") - print(plot(msidata, pixel=random_spectra)) + print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after baseline reduction", outer=TRUE, line=0) ############################### Smoothing ########################### @@ -139,11 +131,6 @@ print('Smoothing') ## Smoothing - if (class(msidata) == "MSProcessedImagingExperiment"){ - msidata = as(msidata, "MSContinuousImagingExperiment") - } - - #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian': print('gaussian smoothing') @@ -176,7 +163,7 @@ smoothed = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, smoothed) vectorofactions = append(vectorofactions, "smoothed") - print(plot(msidata, pixel=random_spectra)) + print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after smoothing", outer=TRUE, line=0) @@ -211,7 +198,7 @@ mz_aligned = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, mz_aligned) vectorofactions = append(vectorofactions, "mz aligned") - print(plot(msidata, pixel=random_spectra)) + print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after m/z alignment", outer=TRUE, line=0) @@ -240,10 +227,6 @@ #end if msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) - #if str($method.methods_conditional.imzml_output) == "cont_format": - #set $continuous_format = True - #end if - ############################### QC ########################### @@ -254,7 +237,7 @@ picked = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, picked) vectorofactions = append(vectorofactions, "picked") - print(plot(msidata, pixel=random_spectra)) + print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after peak picking", outer=TRUE, line=0) ############################### Peak alignment ########################### @@ -285,9 +268,6 @@ msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) - #if str($method.methods_conditional.imzml_output) == "cont_format": - #set $continuous_format = True - #end if ############################### QC ########################### @@ -298,7 +278,7 @@ aligned = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, aligned) vectorofactions = append(vectorofactions, "aligned") - print(plot(msidata, pixel=random_spectra)) + print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after alignment", outer=TRUE, line=0) ############################### Peak filtering ########################### @@ -318,7 +298,7 @@ filtered = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, filtered) vectorofactions = append(vectorofactions, "filtered") - print(plot(msidata, pixel=random_spectra)) + print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after filtering", outer=TRUE, line=0) ############################### Peak binning ########################### @@ -343,39 +323,43 @@ peak_binned = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, peak_binned) vectorofactions = append(vectorofactions, "peak binned") - print(plot(msidata, pixel=random_spectra)) + print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after peak binning", outer=TRUE, line=0) - ############################### Data reduction ########################### + ############################### Mass binning ########################### - #elif str( $method.methods_conditional.preprocessing_method) == 'Data_reduction': - print('Data_reduction') + #elif str( $method.methods_conditional.preprocessing_method) == 'Mass_binning': + print('mass binning') - ## these functions only work on MSImageSet - msidata = as(msidata, "MSImageSet") + #if str( $method.methods_conditional.mz_range.features_filtering) == 'change_mz_range': + + #if str($processed_cond.processed_file) == "processed": - #if str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'bin': - print('bin reduction') + msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, from=$method.methods_conditional.mz_range.min_mz, to=$method.methods_conditional.mz_range.max_mz, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun") + + #else + ## continuous file cannot be binned from m/z to m/z, therefore first cut m/z range and then do mzbin: + msidata = msidata[mz(msidata) >= $method.methods_conditional.mz_range.min_mz & mz(msidata) <= $method.methods_conditional.mz_range.max_mz,] + msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun") + #end if - msidata = reduceDimension(msidata, method="bin", width=$method.methods_conditional.methods_for_reduction.bin_width, units="$method.methods_conditional.methods_for_reduction.bin_units", fun=$method.methods_conditional.methods_for_reduction.bin_fun) + + #elif str( $method.methods_conditional.mz_range.features_filtering) == 'none': + + msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun=$method.methods_conditional.bin_fun) + #end if + + msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) + ## optional: replace NA with 0 - #if $method.methods_conditional.methods_for_reduction.replace_NA_bin: + #if $method.methods_conditional.replace_NA_bin: ## count and replace NAs print(paste0("Number of NA that were set to zero after binning:",sum(is.na(spectra(msidata))))) spectra(msidata)[is.na(spectra(msidata))] = 0 #end if - - #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'resample': - print('resample reduction') - - msidata = reduceDimension(msidata, method="resample", step=$method.methods_conditional.methods_for_reduction.resample_step) - #end if - - ## coercition into new format - msidata = as(msidata, "MSImagingExperiment") - + ############################### QC ########################### maxfeatures =nrow(msidata) @@ -385,8 +369,8 @@ reduced = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, reduced) vectorofactions = append(vectorofactions, "reduced") - print(plot(msidata, pixel=random_spectra)) - title("Spectra after data reduction", outer=TRUE, line=0) + print(plot(msidata, pixel=random_spectra, col="black")) + title("Spectra after m/z binning", outer=TRUE, line=0) ############################### Transformation ########################### @@ -428,7 +412,7 @@ transformed = c(minmz, maxmz,maxfeatures, pixelcount) QC_numbers= cbind(QC_numbers, transformed) vectorofactions = append(vectorofactions, "transformed") - print(plot(msidata, pixel=random_spectra)) + print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after transformation", outer=TRUE, line=0) #end if @@ -438,6 +422,10 @@ ################################################################################ ## save msidata as imzML file, will only work if there is at least 1 m/z left + + #if str($imzml_output) == "cont_format": + #set $continuous_format = True + #end if if (nrow(msidata) > 0){ ## make sure that coordinates are integers @@ -479,7 +467,7 @@ <option value="Peak_alignment">Peak alignment</option> <option value="Peak_filtering">Peak filtering</option> <option value="Peak_binning">Peak binning to reference peaks</option> - <option value="Data_reduction">Data reduction</option> + <option value="Mass_binning">m/z binning</option> <option value="Transformation">Transformation</option> </param> <when value="Normalization"> @@ -576,7 +564,6 @@ <when value="mad"/> <when value="simple"/> </conditional> - <param name="imzml_output" type="boolean" label="imzML output in processed format" checked="True" truevalue="proc_format" falsevalue="cont_format"/> </when> <when value="Peak_alignment"> <param name="value_diffalignment" type="float" value="200" @@ -595,7 +582,6 @@ <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to use for alignment. Only the m/z values from the tabular file will be kept."/> </when> </conditional> - <param name="imzml_output" type="boolean" label="imzML output in processed format" checked="True" truevalue="proc_format" falsevalue="cont_format" help= "Processed imzML works only in MALDIquant tools, not yet in MSI tools (Cardinal)"/> </when> <when value="Peak_filtering"> <param name="frequ_filtering" type="float" value="0.01" max="1" min="0" label="Minimum frequency" help="Peaks that occur in the dataset in lesser proportion than this will be dropped (0.01 --> filtering for 1% of spectra)"/> @@ -620,31 +606,29 @@ <option value="area">area</option> </param> </when> - <when value="Data_reduction"> - <conditional name="methods_for_reduction"> - <param name="reduction_method" type="select" label="Reduction method"> - <option value="bin" selected="True">bin</option> - <option value="resample">resample</option> + <when value="Mass_binning"> + <param name="bin_width" type="float" value="1" label="The width of a bin in m/z or ppm" help="Width must be greater than range of m/z values divided by number of m/z features"/> + <param name="bin_units" type="select" display="radio" + label="Unit for bin"> + <option value="mz" selected="True">mz</option> + <option value="ppm">ppm</option> + </param> + <param name="bin_fun" type="select" display="radio" + label="Calculate sum or mean intensity for ions of the same bin"> + <option value="mean" selected="True">mean</option> + <option value="sum">sum</option> + </param> + <param name="replace_NA_bin" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="Binning can introduce NAs, should they be replaced with 0"/> + <conditional name="mz_range"> + <param name="features_filtering" type="select" label="Select m/z feature filtering option"> + <option value="none" selected="True">none</option> + <option value="change_mz_range">change m/z range</option> </param> - <when value="bin"> - <param name="bin_width" type="float" value="1" - label="The width of a bin in m/z or ppm" help="Width must be greater than range of m/z values divided by number of m/z features"/> - <param name="bin_units" type="select" display="radio" - label="Unit for bin"> - <option value="mz" selected="True">mz</option> - <option value="ppm">ppm</option> - </param> - <param name="bin_fun" type="select" display="radio" - label="Calculate sum or mean intensity for ions of the same bin"> - <option value="mean" selected="True">mean</option> - <option value="sum">sum</option> - </param> - <param name="replace_NA_bin" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="Binning can introduce NAs, should they be replaced with 0"/> - </when> - <when value="resample"> - <param name="resample_step" type="float" value="1" - label="The step size in m/z" help="Step size must be greater than range of m/z values divided by number of m/z features"/> - </when> + <when value="none"/> + <when value="change_mz_range"> + <param name="min_mz" type="float" value="1" label="Minimum value for m/z"/> + <param name="max_mz" type="float" value="10000" label="Maximum value for m/z"/> + </when> </conditional> </when> <when value="Transformation"> @@ -661,6 +645,7 @@ </when> </conditional> </repeat> + <param name="imzml_output" type="boolean" label="imzML output in processed format" checked="True" truevalue="proc_format" falsevalue="cont_format"/> </inputs> <outputs> <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"/> @@ -696,13 +681,11 @@ <param name="blocks_picking" value="3"/> <param name="window_picking" value="3"/> <param name="SNR_picking_method" value="3"/> - <param name="imzml_output" value="cont_format"/> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Peak_alignment"/> - <param name="imzml_output" value="cont_format"/> </conditional> </repeat> <repeat name="methods"> @@ -719,6 +702,7 @@ </conditional> </conditional> </repeat> + <param name="imzml_output" value="cont_format"/> <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/> <output name="outfile_imzml" ftype="imzml" file="preprocessing_results1.imzml.txt" compare="sim_size"> <extra_files type="file" file="preprocessing_results1.imzml" name="imzml" lines_diff="6"/> @@ -736,15 +720,14 @@ <conditional name="methods_for_picking"> <param name="picking_method" value="adaptive"/> </conditional> - <param name="imzml_output" value="cont_format"/> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Peak_alignment"/> - <param name="imzml_output" value="cont_format"/> </conditional> </repeat> + <param name="imzml_output" value="cont_format"/> <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/> <output name="outfile_imzml" ftype="imzml" file="preprocessing_results2.imzml.txt" compare="sim_size"> <extra_files type="file" file="preprocessing_results2.imzml" name="imzml" lines_diff="6"/> @@ -800,11 +783,9 @@ </repeat> <repeat name="methods"> <conditional name="methods_conditional"> - <param name="preprocessing_method" value="Data_reduction"/> - <conditional name="methods_for_reduction"> - <param name="reduction_method" value="bin"/> + <param name="preprocessing_method" value="Mass_binning"/> <param name="bin_width" value="0.1"/> - </conditional> + <param name="bin_units" value="mz"/> </conditional> </repeat> <output name="QC_overview" file="preprocessing_results4.pdf" compare="sim_size"/> @@ -817,7 +798,7 @@ <expand macro="processed_infile_imzml"/> <conditional name="processed_cond"> <param name="processed_file" value="processed"/> - <param name="accuracy" value="100"/> + <param name="accuracy" value="200"/> <param name="units" value="ppm"/> </conditional> <repeat name="methods"> @@ -855,15 +836,17 @@ **Options** -- Normalization: Normalization of intensities to total ion current (TIC) or to root-mean-square (RMS) -- Baseline reduction: Baseline reduction removes background intensity generated by chemical noise (common in MALDI datasets) +- Normalization: normalization of intensities to total ion current (TIC) or to root-mean-square (RMS) +- Baseline reduction: baseline reduction removes background intensity generated by chemical noise (common in MALDI datasets) - Smoothing: Smoothing of the peaks reduces noise and improves peak detection +- m/z alignment: removes small m/z shifts between spectra - Peak picking: relevant peaks are picked while noise-peaks are removed (needs peak alignment afterwards) - Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value; if no reference is given the peaks are aligned to the local maxima of the mean spectrum of the current dataset; external reference data can be used from another MSI data file or a tabular file with m/z values, but then only the m/z from the reference will be kept - Peak filtering: removes peaks that occur only in a small proportion of pixels. If not sure which cut off to choose run quality control tool first and decide according to the number of peaks per m/z plot -- Peak binning: extracts peaks intensities (from a profile dataset) for a list of m/z (reference) values -- Data reduction: binning or resampling to reduce data +- Peak binning: extracts peaks intensities, either peak height or area under curve (from a profile dataset) for a list of m/z (reference) values +- m/z binning: generates new m/z bins - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. + **Output**
