diff preprocessing.xml @ 9:e0bbaf9f7da0 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit e499c9124d3fd85a7fc47b95c206ce91a5e3678c-dirty"
author galaxyp
date Tue, 03 Nov 2020 22:41:21 +0000
parents 413a1b74f496
children aa479a0cfb43
line wrap: on
line diff
--- a/preprocessing.xml	Wed May 13 17:56:03 2020 +0000
+++ b/preprocessing.xml	Tue Nov 03 22:41:21 2020 +0000
@@ -1,4 +1,4 @@
-<tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.2">
+<tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.1">
     <description>
         mass spectrometry imaging preprocessing
     </description>
@@ -7,7 +7,7 @@
     </macros>
     <expand macro="requirements">
         <requirement type="package" version="2.3">r-gridextra</requirement>
-        <requirement type="package" version="3.2.1">r-ggplot2</requirement>
+        <requirement type="package" version="3.3.2">r-ggplot2</requirement>
     </expand>
     <command detect_errors="exit_code">
     <![CDATA[
@@ -74,7 +74,7 @@
     ## Choose random spectra for QC plots
     random_spectra = sample(pixels(msidata), 4, replace=FALSE)
     par(oma=c(0,0,2,0))
-    print(plot(msidata, pixel=random_spectra))
+    print(plot(msidata, pixel=random_spectra, col="black"))
     title("Input spectra", outer=TRUE, line=0)
 
     ############################### Preprocessing steps ###########################
@@ -88,10 +88,6 @@
             print('Normalization')
             ##normalization
 
-            if (class(msidata) == "MSProcessedImagingExperiment"){
-                msidata = as(msidata, "MSContinuousImagingExperiment")
-            }
-
             msidata = normalize(msidata, method="$method.methods_conditional.methods_for_normalization.normalization_method")
             msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
 
@@ -105,7 +101,7 @@
             normalized = c(minmz, maxmz,maxfeatures, pixelcount)
             QC_numbers= cbind(QC_numbers, normalized)
             vectorofactions = append(vectorofactions, "normalized")
-            print(plot(msidata, pixel=random_spectra))
+            print(plot(msidata, pixel=random_spectra, col="black"))
             title("Spectra after normalization", outer=TRUE, line=0)
 
     ############################### Baseline reduction ###########################
@@ -114,10 +110,6 @@
             print('Baseline_reduction')
             ##baseline reduction
 
-            if (class(msidata) == "MSProcessedImagingExperiment"){
-                msidata = as(msidata, "MSContinuousImagingExperiment")
-            }
-
             msidata = reduceBaseline(msidata, method="median", blocks=$method.methods_conditional.blocks_baseline, spar=$method.methods_conditional.spar_baseline)
             msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
 
@@ -130,7 +122,7 @@
             baseline = c(minmz, maxmz,maxfeatures, pixelcount)
             QC_numbers= cbind(QC_numbers, baseline)
             vectorofactions = append(vectorofactions, "baseline red.")
-            print(plot(msidata, pixel=random_spectra))
+            print(plot(msidata, pixel=random_spectra, col="black"))
             title("Spectra after baseline reduction", outer=TRUE, line=0)
 
     ############################### Smoothing ###########################
@@ -139,11 +131,6 @@
             print('Smoothing')
             ## Smoothing
 
-            if (class(msidata) == "MSProcessedImagingExperiment"){
-                msidata = as(msidata, "MSContinuousImagingExperiment")
-            }
-
-
             #if str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'gaussian':
                 print('gaussian smoothing')
 
@@ -176,7 +163,7 @@
             smoothed = c(minmz, maxmz,maxfeatures, pixelcount)
             QC_numbers= cbind(QC_numbers, smoothed)
             vectorofactions = append(vectorofactions, "smoothed")
-            print(plot(msidata, pixel=random_spectra))
+            print(plot(msidata, pixel=random_spectra, col="black"))
             title("Spectra after smoothing", outer=TRUE, line=0)
 
 
@@ -211,7 +198,7 @@
             mz_aligned = c(minmz, maxmz,maxfeatures, pixelcount)
             QC_numbers= cbind(QC_numbers, mz_aligned)
             vectorofactions = append(vectorofactions, "mz aligned")
-            print(plot(msidata, pixel=random_spectra))
+            print(plot(msidata, pixel=random_spectra, col="black"))
             title("Spectra after m/z alignment", outer=TRUE, line=0)
 
 
@@ -240,10 +227,6 @@
             #end if
             msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
 
-            #if str($method.methods_conditional.imzml_output) == "cont_format":
-                #set $continuous_format = True
-            #end if
-
 
             ############################### QC ###########################
 
@@ -254,7 +237,7 @@
             picked = c(minmz, maxmz,maxfeatures, pixelcount)
             QC_numbers= cbind(QC_numbers, picked)
             vectorofactions = append(vectorofactions, "picked")
-            print(plot(msidata, pixel=random_spectra))
+            print(plot(msidata, pixel=random_spectra, col="black"))
             title("Spectra after peak picking", outer=TRUE, line=0)
 
     ############################### Peak alignment ###########################
@@ -285,9 +268,6 @@
 
             msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
 
-            #if str($method.methods_conditional.imzml_output) == "cont_format":
-                #set $continuous_format = True
-            #end if
 
             ############################### QC ###########################
 
@@ -298,7 +278,7 @@
             aligned = c(minmz, maxmz,maxfeatures, pixelcount)
             QC_numbers= cbind(QC_numbers, aligned)
             vectorofactions = append(vectorofactions, "aligned")
-            print(plot(msidata, pixel=random_spectra))
+            print(plot(msidata, pixel=random_spectra, col="black"))
             title("Spectra after alignment", outer=TRUE, line=0)
 
     ############################### Peak filtering ###########################
@@ -318,7 +298,7 @@
             filtered = c(minmz, maxmz,maxfeatures, pixelcount)
             QC_numbers= cbind(QC_numbers, filtered)
             vectorofactions = append(vectorofactions, "filtered")
-            print(plot(msidata, pixel=random_spectra))
+            print(plot(msidata, pixel=random_spectra, col="black"))
             title("Spectra after filtering", outer=TRUE, line=0)
 
     ############################### Peak binning ###########################
@@ -343,39 +323,43 @@
             peak_binned = c(minmz, maxmz,maxfeatures, pixelcount)
             QC_numbers= cbind(QC_numbers, peak_binned)
             vectorofactions = append(vectorofactions, "peak binned")
-            print(plot(msidata, pixel=random_spectra))
+            print(plot(msidata, pixel=random_spectra, col="black"))
             title("Spectra after peak binning", outer=TRUE, line=0)
 
 
-    ############################### Data reduction ###########################
+    ############################### Mass binning ###########################
 
-        #elif str( $method.methods_conditional.preprocessing_method) == 'Data_reduction':
-            print('Data_reduction')
+        #elif str( $method.methods_conditional.preprocessing_method) == 'Mass_binning':
+            print('mass binning')
 
-            ## these functions only work on MSImageSet
-            msidata = as(msidata, "MSImageSet")
+            #if str( $method.methods_conditional.mz_range.features_filtering) == 'change_mz_range':
+
+                #if str($processed_cond.processed_file) == "processed":
 
-            #if str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'bin':
-                print('bin reduction')
+                msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, from=$method.methods_conditional.mz_range.min_mz, to=$method.methods_conditional.mz_range.max_mz, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun")
+                
+                #else 
+                    ## continuous file cannot be binned from m/z to m/z, therefore first cut m/z range and then do mzbin:
+                    msidata = msidata[mz(msidata) >= $method.methods_conditional.mz_range.min_mz & mz(msidata) <= $method.methods_conditional.mz_range.max_mz,]
+                    msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun")
+                #end if
 
-                msidata = reduceDimension(msidata, method="bin", width=$method.methods_conditional.methods_for_reduction.bin_width, units="$method.methods_conditional.methods_for_reduction.bin_units", fun=$method.methods_conditional.methods_for_reduction.bin_fun)
+		   
+	    #elif str( $method.methods_conditional.mz_range.features_filtering) == 'none':	    
+
+                msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun=$method.methods_conditional.bin_fun)
 
+            #end if
+             
+            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
+                
                 ## optional: replace NA with 0
-                #if $method.methods_conditional.methods_for_reduction.replace_NA_bin:
+                #if $method.methods_conditional.replace_NA_bin:
                     ## count and replace NAs
                     print(paste0("Number of NA that were set to zero after binning:",sum(is.na(spectra(msidata)))))
                     spectra(msidata)[is.na(spectra(msidata))] = 0 
                 #end if
-
-            #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'resample':
-                print('resample reduction')
-
-                msidata = reduceDimension(msidata, method="resample", step=$method.methods_conditional.methods_for_reduction.resample_step)
-            #end if
-
-            ## coercition into new format
-            msidata = as(msidata, "MSImagingExperiment")
-
+                
             ############################### QC ###########################
 
             maxfeatures =nrow(msidata)
@@ -385,8 +369,8 @@
             reduced = c(minmz, maxmz,maxfeatures, pixelcount)
             QC_numbers= cbind(QC_numbers, reduced)
             vectorofactions = append(vectorofactions, "reduced")
-            print(plot(msidata, pixel=random_spectra))
-            title("Spectra after data reduction", outer=TRUE, line=0)
+            print(plot(msidata, pixel=random_spectra, col="black"))
+            title("Spectra after m/z binning", outer=TRUE, line=0)
 
         ############################### Transformation ###########################
 
@@ -428,7 +412,7 @@
             transformed = c(minmz, maxmz,maxfeatures, pixelcount)
             QC_numbers= cbind(QC_numbers, transformed)
             vectorofactions = append(vectorofactions, "transformed")
-            print(plot(msidata, pixel=random_spectra))
+            print(plot(msidata, pixel=random_spectra, col="black"))
             title("Spectra after transformation", outer=TRUE, line=0)
 
             #end if
@@ -438,6 +422,10 @@
     ################################################################################
 
     ## save msidata as imzML file, will only work if there is at least 1 m/z left
+    
+    #if str($imzml_output) == "cont_format":
+        #set $continuous_format = True
+    #end if
 
         if (nrow(msidata) > 0){
             ## make sure that coordinates are integers
@@ -479,7 +467,7 @@
                     <option value="Peak_alignment">Peak alignment</option>
                     <option value="Peak_filtering">Peak filtering</option>
                     <option value="Peak_binning">Peak binning to reference peaks</option>
-                    <option value="Data_reduction">Data reduction</option>
+                    <option value="Mass_binning">m/z binning</option>
                     <option value="Transformation">Transformation</option>
                 </param>
                 <when value="Normalization">
@@ -576,7 +564,6 @@
                         <when value="mad"/>
                         <when value="simple"/>
                     </conditional>
-                <param name="imzml_output" type="boolean" label="imzML output in processed format" checked="True" truevalue="proc_format" falsevalue="cont_format"/>
                 </when>
                 <when value="Peak_alignment">
                     <param name="value_diffalignment" type="float" value="200"
@@ -595,7 +582,6 @@
                             <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to use for alignment. Only the m/z values from the tabular file will be kept."/>
                         </when>
                     </conditional>
-                <param name="imzml_output" type="boolean" label="imzML output in processed format" checked="True" truevalue="proc_format" falsevalue="cont_format" help= "Processed imzML works only in MALDIquant tools, not yet in MSI tools (Cardinal)"/>
                 </when>
                 <when value="Peak_filtering">
                     <param name="frequ_filtering" type="float" value="0.01" max="1" min="0" label="Minimum frequency" help="Peaks that occur in the dataset in lesser proportion than this will be dropped (0.01 --> filtering for 1% of spectra)"/>
@@ -620,31 +606,29 @@
                             <option value="area">area</option>
                     </param>
                 </when>
-                <when value="Data_reduction">
-                    <conditional name="methods_for_reduction">
-                        <param name="reduction_method" type="select" label="Reduction method">
-                            <option value="bin" selected="True">bin</option>
-                            <option value="resample">resample</option>
+                <when value="Mass_binning">
+                    <param name="bin_width" type="float" value="1" label="The width of a bin in m/z or ppm" help="Width must be greater than range of m/z values divided by number of m/z features"/>
+		    <param name="bin_units" type="select" display="radio"
+		           label="Unit for bin">
+		            <option value="mz" selected="True">mz</option>
+		            <option value="ppm">ppm</option>
+		    </param>
+                    <param name="bin_fun" type="select" display="radio"
+                           label="Calculate sum or mean intensity for ions of the same bin">
+                            <option value="mean" selected="True">mean</option>
+                            <option value="sum">sum</option>
+                    </param>
+                    <param name="replace_NA_bin" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="Binning can introduce NAs, should they be replaced with 0"/>
+                    <conditional name="mz_range">
+            		 <param name="features_filtering" type="select" label="Select m/z feature filtering option">
+                            <option value="none" selected="True">none</option>
+                            <option value="change_mz_range">change m/z range</option>
                         </param>
-                        <when value="bin">
-                            <param name="bin_width" type="float" value="1"
-                                   label="The width of a bin in m/z or ppm" help="Width must be greater than range of m/z values divided by number of m/z features"/>
-                            <param name="bin_units" type="select" display="radio"
-                                   label="Unit for bin">
-                                    <option value="mz" selected="True">mz</option>
-                                    <option value="ppm">ppm</option>
-                            </param>
-                            <param name="bin_fun" type="select" display="radio"
-                                   label="Calculate sum or mean intensity for ions of the same bin">
-                                    <option value="mean" selected="True">mean</option>
-                                    <option value="sum">sum</option>
-                            </param>
-                            <param name="replace_NA_bin" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="Binning can introduce NAs, should they be replaced with 0"/>
-                        </when>
-                        <when value="resample">
-                            <param name="resample_step" type="float" value="1"
-                                   label="The step size in m/z" help="Step size must be greater than range of m/z values divided by number of m/z features"/>
-                        </when>
+			    <when value="none"/>
+			    <when value="change_mz_range">
+				<param name="min_mz" type="float" value="1" label="Minimum value for m/z"/>
+				<param name="max_mz" type="float" value="10000" label="Maximum value for m/z"/>
+			    </when>
                     </conditional>
                 </when>
                 <when value="Transformation">
@@ -661,6 +645,7 @@
                 </when>
             </conditional>
         </repeat>
+        <param name="imzml_output" type="boolean" label="imzML output in processed format" checked="True" truevalue="proc_format" falsevalue="cont_format"/>
     </inputs>
     <outputs>
         <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"/>
@@ -696,13 +681,11 @@
                     <param name="blocks_picking" value="3"/>
                     <param name="window_picking" value="3"/>
                     <param name="SNR_picking_method" value="3"/>
-                    <param name="imzml_output" value="cont_format"/>
                 </conditional>
             </repeat>
             <repeat name="methods">
                 <conditional name="methods_conditional">
                     <param name="preprocessing_method" value="Peak_alignment"/>
-                    <param name="imzml_output" value="cont_format"/>
                 </conditional>
             </repeat>
             <repeat name="methods">
@@ -719,6 +702,7 @@
                         </conditional>
                 </conditional>
             </repeat>
+            <param name="imzml_output" value="cont_format"/>
             <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/>
             <output name="outfile_imzml" ftype="imzml" file="preprocessing_results1.imzml.txt" compare="sim_size">
                 <extra_files type="file" file="preprocessing_results1.imzml" name="imzml" lines_diff="6"/>
@@ -736,15 +720,14 @@
                     <conditional name="methods_for_picking">
                         <param name="picking_method" value="adaptive"/>
                     </conditional>
-                    <param name="imzml_output" value="cont_format"/>
                 </conditional>
             </repeat>
             <repeat name="methods">
                 <conditional name="methods_conditional">
                     <param name="preprocessing_method" value="Peak_alignment"/>
-                    <param name="imzml_output" value="cont_format"/>
                 </conditional>
             </repeat>
+            <param name="imzml_output" value="cont_format"/>
             <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/>
             <output name="outfile_imzml" ftype="imzml" file="preprocessing_results2.imzml.txt" compare="sim_size">
                 <extra_files type="file" file="preprocessing_results2.imzml" name="imzml" lines_diff="6"/>
@@ -800,11 +783,9 @@
             </repeat>
             <repeat name="methods">
                 <conditional name="methods_conditional">
-                    <param name="preprocessing_method" value="Data_reduction"/>
-                    <conditional name="methods_for_reduction">
-                        <param name="reduction_method" value="bin"/>
+                    <param name="preprocessing_method" value="Mass_binning"/>
                         <param name="bin_width" value="0.1"/>
-                    </conditional>
+                        <param name="bin_units" value="mz"/>
                 </conditional>
             </repeat>
             <output name="QC_overview" file="preprocessing_results4.pdf" compare="sim_size"/>
@@ -817,7 +798,7 @@
             <expand macro="processed_infile_imzml"/>
             <conditional name="processed_cond">
                 <param name="processed_file" value="processed"/>
-                <param name="accuracy" value="100"/>
+                <param name="accuracy" value="200"/>
                 <param name="units" value="ppm"/>
             </conditional>
             <repeat name="methods">
@@ -855,15 +836,17 @@
 
 **Options**
 
-- Normalization: Normalization of intensities to total ion current (TIC) or to root-mean-square (RMS)
-- Baseline reduction: Baseline  reduction removes background intensity generated by chemical noise (common in MALDI datasets)
+- Normalization: normalization of intensities to total ion current (TIC) or to root-mean-square (RMS)
+- Baseline reduction: baseline  reduction removes background intensity generated by chemical noise (common in MALDI datasets)
 - Smoothing: Smoothing of the peaks reduces noise and improves peak detection
+- m/z alignment: removes small m/z shifts between spectra 
 - Peak picking: relevant peaks are picked while noise-peaks are removed (needs peak alignment afterwards)
 - Peak alignment: only possible after peak picking, m/z inaccuracies are removed by alignment of same peaks to a common m/z value; if no reference is given the peaks are aligned to the local maxima of the mean spectrum of the current dataset; external reference data can be used from another MSI data file or a tabular file with m/z values, but then only the m/z from the reference will be kept
 - Peak filtering: removes peaks that occur only in a small proportion of pixels. If not sure which cut off to choose run quality control tool first and decide according to the number of peaks per m/z plot
-- Peak binning: extracts peaks intensities (from a profile dataset) for a list of m/z (reference) values
-- Data reduction: binning or resampling to reduce data
+- Peak binning: extracts peaks intensities, either peak height or area under curve (from a profile dataset) for a list of m/z (reference) values
+- m/z binning: generates new m/z bins
 - Transformation: log2 or squareroot transformation of all intensities; when using log2 transformation zero intensities will become NA, this can lead to compatibility problems. 
+                    
 
 **Output**