Mercurial > repos > galaxyp > cardinal_preprocessing
diff preprocessing.xml @ 10:aa479a0cfb43 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit c8d3adac445b4e08e2724e22d7201bfc38bbf40f"
| author | galaxyp |
|---|---|
| date | Sun, 29 Aug 2021 07:20:07 +0000 |
| parents | e0bbaf9f7da0 |
| children | 274e81434593 |
line wrap: on
line diff
--- a/preprocessing.xml Tue Nov 03 22:41:21 2020 +0000 +++ b/preprocessing.xml Sun Aug 29 07:20:07 2021 +0000 @@ -1,4 +1,4 @@ -<tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.1"> +<tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.0"> <description> mass spectrometry imaging preprocessing </description> @@ -7,7 +7,7 @@ </macros> <expand macro="requirements"> <requirement type="package" version="2.3">r-gridextra</requirement> - <requirement type="package" version="3.3.2">r-ggplot2</requirement> + <requirement type="package" version="3.3.5">r-ggplot2</requirement> </expand> <command detect_errors="exit_code"> <![CDATA[ @@ -47,7 +47,6 @@ ## remove duplicated coordinates, otherwise peak picking and log2 transformation will fail -msidata <- msidata[,!duplicated(coord(msidata)[,1:2])] ## set variable to False #set $used_peak_picking = False @@ -135,24 +134,29 @@ print('gaussian smoothing') msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian) + msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay': print('sgolay smoothing') msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters) + msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ## if selected replace negative intensities with zero #if $method.methods_conditional.methods_for_smoothing.replace_negatives: - spectra(msidata)[spectra(msidata)<0] = 0 + ## bring spectra matrix to disk + spectra_df = as.matrix(spectra(msidata)) + spectra_df[spectra_df<0] = 0 + spectra(msidata) = spectra_df #end if #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma': print('moving average smoothing') msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter) + msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) #end if - msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) ############################### QC ########################### @@ -170,7 +174,7 @@ ############################### Mz alignment ########################### #elif str( $method.methods_conditional.preprocessing_method ) == 'mz_alignment': - print('M/z alignment') + print('m/z alignment') ## M/z alignment #if str( $method.methods_conditional.mzalign_ref_type.align_reference_datatype) == 'align_table': @@ -183,7 +187,7 @@ #elif str( $method.methods_conditional.mzalign_ref_type.align_reference_datatype) == 'align_noref': - msidata = mzAlign(msidata,tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", , quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span) + msidata = mzAlign(msidata,tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span) #end if @@ -200,6 +204,35 @@ vectorofactions = append(vectorofactions, "mz aligned") print(plot(msidata, pixel=random_spectra, col="black")) title("Spectra after m/z alignment", outer=TRUE, line=0) + + + ############################### Mz recalibration ########################### + + #elif str( $method.methods_conditional.preprocessing_method ) == 'mz_recalibration': + print('m/z recalibration') + ## M/z recalibration + + reference_mz = read.delim("$method.methods_conditional.mz_tabular", header = $method.methods_conditional.feature_header, stringsAsFactors = FALSE) + reference_mz = reference_mz[,$method.methods_conditional.feature_column] + + msidata = mzAlign(msidata, ref=reference_mz, tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span) + + msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) + + ## remove the reference peaks data to allow proper peak alignment afterwards + metadata(featureData(msidata))['reference peaks'] <- NULL + + ############################### QC ########################### + + maxfeatures =nrow(msidata) + pixelcount = ncol(msidata) + minmz = round(min(mz(msidata)), digits=2) + maxmz = round(max(mz(msidata)), digits=2) + mz_recal = c(minmz, maxmz,maxfeatures, pixelcount) + QC_numbers= cbind(QC_numbers, mz_recal) + vectorofactions = append(vectorofactions, "mz recalibrated") + print(plot(msidata, pixel=random_spectra, col="black")) + title("Spectra after m/z recalibration", outer=TRUE, line=0) ############################### Peak picking ########################### @@ -332,23 +365,23 @@ #elif str( $method.methods_conditional.preprocessing_method) == 'Mass_binning': print('mass binning') - #if str( $method.methods_conditional.mz_range.features_filtering) == 'change_mz_range': - - #if str($processed_cond.processed_file) == "processed": + #if str($method.methods_conditional.mz_range.features_filtering) == 'change_mz_range': msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, from=$method.methods_conditional.mz_range.min_mz, to=$method.methods_conditional.mz_range.max_mz, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun") - - #else - ## continuous file cannot be binned from m/z to m/z, therefore first cut m/z range and then do mzbin: - msidata = msidata[mz(msidata) >= $method.methods_conditional.mz_range.min_mz & mz(msidata) <= $method.methods_conditional.mz_range.max_mz,] - msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun") - #end if - #elif str( $method.methods_conditional.mz_range.features_filtering) == 'none': + #elif str($method.methods_conditional.mz_range.features_filtering) == 'none': msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun=$method.methods_conditional.bin_fun) + #elif str($method.methods_conditional.mz_range.features_filtering) == 'bin_to_reference': + + bin_reference_mz = read.delim("$method.methods_conditional.mz_range.mz_tabular", header = $method.methods_conditional.mz_range.feature_header, stringsAsFactors = FALSE) + bin_reference_mz = bin_reference_mz[,$method.methods_conditional.mz_range.feature_column] + + msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun=$method.methods_conditional.bin_fun, + ref=bin_reference_mz) + #end if msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu)) @@ -385,7 +418,7 @@ print('log2 transformation') ## replace 0 with NA to prevent Inf - spectra_df = spectra(msidata) ## convert into R matrix + spectra_df = as.matrix(spectra(msidata)) ## convert into R matrix spectra_df[spectra_df ==0] = NA print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra_df)))) spectra(msidata) = spectra_df @@ -422,25 +455,14 @@ ################################################################################ ## save msidata as imzML file, will only work if there is at least 1 m/z left - - #if str($imzml_output) == "cont_format": - #set $continuous_format = True - #end if if (nrow(msidata) > 0){ ## make sure that coordinates are integers coord(msidata)\$y = as.integer(coord(msidata)\$y) coord(msidata)\$x = as.integer(coord(msidata)\$x) - #if $used_peak_picking: - #if $continuous_format: - msidata = as(msidata, "MSContinuousImagingExperiment") - #end if - #elif $used_peak_alignment - #if $continuous_format: - msidata = as(msidata, "MSContinuousImagingExperiment") - #end if - #end if - writeImzML(msidata, "out") + ## only continuous files can currently be exported + msidata = as(msidata, "MSContinuousImagingExperiment") + writeImzML(msidata, "out") } plot(0,type='n',axes=FALSE,ann=FALSE) @@ -463,6 +485,7 @@ <option value="Baseline_reduction">Baseline Reduction</option> <option value="Smoothing">Peak smoothing</option> <option value="mz_alignment">m/z alignment</option> + <option value="mz_recalibration">m/z recalibration</option> <option value="Peak_picking">Peak picking</option> <option value="Peak_alignment">Peak alignment</option> <option value="Peak_filtering">Peak filtering</option> @@ -541,6 +564,26 @@ <param name="span" type="float" value="0.75" label="span" help="The smoothing parameter for the local polynomial regression used to determine the warping function."/> </when> + <when value="mz_recalibration"> + <param name="alignment_tol" type="text" value="NA" + label="tolerance" help="The tolerance to be used when matching the peaks in the unaligned spectra to the reference spectrum. If this is NA, then automatically guess a tolerance from the data."> + <sanitizer> + <valid initial="string.digits"> + <add value="N" /> + <add value="A" /> + </valid> + </sanitizer> + </param> + <param name="alignment_units" type="select" display="radio" optional="False" label="The units to use for the tolerance."> + <option value="ppm" selected="True">ppm</option> + <option value="mz">m/z</option> + </param> + <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to use for alignment. Only the m/z values from the tabular file will be kept."/> + <param name="quantile" type="float" value="0.2" + label="quantile" help="The top quantile of reference points (peaks detected via local maxima) to use from the reference spectrum."/> + <param name="span" type="float" value="0.75" + label="span" help="The smoothing parameter for the local polynomial regression used to determine the warping function."/> + </when> <when value="Peak_picking"> <param name="SNR_picking_method" type="float" value="6" label="Signal to noise ratio" @@ -620,15 +663,19 @@ </param> <param name="replace_NA_bin" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="Binning can introduce NAs, should they be replaced with 0"/> <conditional name="mz_range"> - <param name="features_filtering" type="select" label="Select m/z feature filtering option"> + <param name="features_filtering" type="select" label="Select m/z options"> <option value="none" selected="True">none</option> <option value="change_mz_range">change m/z range</option> + <option value="bin_to_reference">bin m/z to reference</option> </param> <when value="none"/> <when value="change_mz_range"> <param name="min_mz" type="float" value="1" label="Minimum value for m/z"/> <param name="max_mz" type="float" value="10000" label="Maximum value for m/z"/> </when> + <when value="bin_to_reference"> + <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features as reference for binning. Only the m/z values from the tabular file will be kept."/> + </when> </conditional> </when> <when value="Transformation"> @@ -645,7 +692,6 @@ </when> </conditional> </repeat> - <param name="imzml_output" type="boolean" label="imzML output in processed format" checked="True" truevalue="proc_format" falsevalue="cont_format"/> </inputs> <outputs> <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"/> @@ -666,10 +712,8 @@ <conditional name="methods_conditional"> <param name="preprocessing_method" value="Smoothing"/> <conditional name="methods_for_smoothing"> - <param name="smoothing_method" value="gaussian"/> - <param name="sd_gaussian" value="4"/> + <param name="smoothing_method" value="sgolay"/> </conditional> - <param name="window_smoothing" value="9"/> </conditional> </repeat> <repeat name="methods"> @@ -702,7 +746,6 @@ </conditional> </conditional> </repeat> - <param name="imzml_output" value="cont_format"/> <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/> <output name="outfile_imzml" ftype="imzml" file="preprocessing_results1.imzml.txt" compare="sim_size"> <extra_files type="file" file="preprocessing_results1.imzml" name="imzml" lines_diff="6"/> @@ -727,7 +770,6 @@ <param name="preprocessing_method" value="Peak_alignment"/> </conditional> </repeat> - <param name="imzml_output" value="cont_format"/> <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/> <output name="outfile_imzml" ftype="imzml" file="preprocessing_results2.imzml.txt" compare="sim_size"> <extra_files type="file" file="preprocessing_results2.imzml" name="imzml" lines_diff="6"/> @@ -753,13 +795,11 @@ <conditional name="methods_for_picking"> <param name="picking_method" value="mad"/> </conditional> - <param name="imzml_output" value="proc_format"/> </conditional> </repeat> <repeat name="methods"> <conditional name="methods_conditional"> <param name="preprocessing_method" value="Peak_alignment"/> - <param name="imzml_output" value="proc_format"/> </conditional> </repeat> <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/> @@ -783,6 +823,16 @@ </repeat> <repeat name="methods"> <conditional name="methods_conditional"> + <param name="preprocessing_method" value="mz_recalibration"/> + <param name="alignment_tol" value="2"/> + <param name="alignment_units" value="ppm"/> + <param name="mz_tabular" value="inputcalibrantfile2.txt" ftype="tabular"/> + <param name="feature_column" value="1"/> + <param name="feature_header" value="TRUE"/> + </conditional> + </repeat> + <repeat name="methods"> + <conditional name="methods_conditional"> <param name="preprocessing_method" value="Mass_binning"/> <param name="bin_width" value="0.1"/> <param name="bin_units" value="mz"/> @@ -850,7 +900,7 @@ **Output** -- MSI data as continuous or processed imzML file +- MSI data as continuous imzML file - pdf with key values and four random mass spectra after each processing step ]]>
