diff preprocessing.xml @ 10:aa479a0cfb43 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit c8d3adac445b4e08e2724e22d7201bfc38bbf40f"
author galaxyp
date Sun, 29 Aug 2021 07:20:07 +0000
parents e0bbaf9f7da0
children 274e81434593
line wrap: on
line diff
--- a/preprocessing.xml	Tue Nov 03 22:41:21 2020 +0000
+++ b/preprocessing.xml	Sun Aug 29 07:20:07 2021 +0000
@@ -1,4 +1,4 @@
-<tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.1">
+<tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.0">
     <description>
         mass spectrometry imaging preprocessing
     </description>
@@ -7,7 +7,7 @@
     </macros>
     <expand macro="requirements">
         <requirement type="package" version="2.3">r-gridextra</requirement>
-        <requirement type="package" version="3.3.2">r-ggplot2</requirement>
+        <requirement type="package" version="3.3.5">r-ggplot2</requirement>
     </expand>
     <command detect_errors="exit_code">
     <![CDATA[
@@ -47,7 +47,6 @@
 
 
 ## remove duplicated coordinates, otherwise peak picking and log2 transformation will fail
-msidata <- msidata[,!duplicated(coord(msidata)[,1:2])] 
 
 ## set variable to False
 #set $used_peak_picking = False
@@ -135,24 +134,29 @@
                 print('gaussian smoothing')
 
                 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, sd = $method.methods_conditional.methods_for_smoothing.sd_gaussian)
+                msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
 
             #elif str( $method.methods_conditional.methods_for_smoothing.smoothing_method) == 'sgolay':
                 print('sgolay smoothing')
 
                 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, order = $method.methods_conditional.methods_for_smoothing.order_of_filters)
+                msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
 
                 ## if selected replace negative intensities with zero
                 #if $method.methods_conditional.methods_for_smoothing.replace_negatives:
-                    spectra(msidata)[spectra(msidata)<0] = 0
+                    ## bring spectra matrix to disk
+                    spectra_df = as.matrix(spectra(msidata))
+                    spectra_df[spectra_df<0] = 0
+                    spectra(msidata) = spectra_df
                 #end if
 
             #elif str($method.methods_conditional.methods_for_smoothing.smoothing_method) == 'ma':
                 print('moving average smoothing')
 
                 msidata = smoothSignal(msidata, method="$method.methods_conditional.methods_for_smoothing.smoothing_method", window=$method.methods_conditional.window_smoothing, coef = $method.methods_conditional.methods_for_smoothing.coefficients_ma_filter)
+                msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
 
             #end if
-            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
 
             ############################### QC ###########################
 
@@ -170,7 +174,7 @@
     ############################### Mz alignment ###########################
 
         #elif str( $method.methods_conditional.preprocessing_method ) == 'mz_alignment':
-            print('M/z alignment')
+            print('m/z alignment')
             ## M/z alignment
 
             #if str( $method.methods_conditional.mzalign_ref_type.align_reference_datatype) == 'align_table':
@@ -183,7 +187,7 @@
 
             #elif str( $method.methods_conditional.mzalign_ref_type.align_reference_datatype) == 'align_noref':
 
-                msidata = mzAlign(msidata,tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", , quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span)
+                msidata = mzAlign(msidata,tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span)
 
             #end if
 
@@ -200,6 +204,35 @@
             vectorofactions = append(vectorofactions, "mz aligned")
             print(plot(msidata, pixel=random_spectra, col="black"))
             title("Spectra after m/z alignment", outer=TRUE, line=0)
+            
+          
+    ############################### Mz recalibration ###########################
+
+        #elif str( $method.methods_conditional.preprocessing_method ) == 'mz_recalibration':
+            print('m/z recalibration')
+            ## M/z recalibration
+
+            reference_mz = read.delim("$method.methods_conditional.mz_tabular", header = $method.methods_conditional.feature_header, stringsAsFactors = FALSE)
+            reference_mz = reference_mz[,$method.methods_conditional.feature_column]
+
+            msidata = mzAlign(msidata, ref=reference_mz, tolerance = $method.methods_conditional.alignment_tol, units = "$method.methods_conditional.alignment_units", quantile = $method.methods_conditional.quantile, span = $method.methods_conditional.span)
+
+            msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
+            
+            ## remove the reference peaks data to allow proper peak alignment afterwards
+            metadata(featureData(msidata))['reference peaks'] <- NULL
+
+            ############################### QC ###########################
+
+            maxfeatures =nrow(msidata)
+            pixelcount = ncol(msidata)
+            minmz = round(min(mz(msidata)), digits=2)
+            maxmz = round(max(mz(msidata)), digits=2)
+            mz_recal = c(minmz, maxmz,maxfeatures, pixelcount)
+            QC_numbers= cbind(QC_numbers, mz_recal)
+            vectorofactions = append(vectorofactions, "mz recalibrated")
+            print(plot(msidata, pixel=random_spectra, col="black"))
+            title("Spectra after m/z recalibration", outer=TRUE, line=0)      
 
 
     ############################### Peak picking ###########################
@@ -332,23 +365,23 @@
         #elif str( $method.methods_conditional.preprocessing_method) == 'Mass_binning':
             print('mass binning')
 
-            #if str( $method.methods_conditional.mz_range.features_filtering) == 'change_mz_range':
-
-                #if str($processed_cond.processed_file) == "processed":
+            #if str($method.methods_conditional.mz_range.features_filtering) == 'change_mz_range':
 
                 msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, from=$method.methods_conditional.mz_range.min_mz, to=$method.methods_conditional.mz_range.max_mz, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun")
-                
-                #else 
-                    ## continuous file cannot be binned from m/z to m/z, therefore first cut m/z range and then do mzbin:
-                    msidata = msidata[mz(msidata) >= $method.methods_conditional.mz_range.min_mz & mz(msidata) <= $method.methods_conditional.mz_range.max_mz,]
-                    msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun="$method.methods_conditional.bin_fun")
-                #end if
 
 		   
-	    #elif str( $method.methods_conditional.mz_range.features_filtering) == 'none':	    
+	    #elif str($method.methods_conditional.mz_range.features_filtering) == 'none':
 
                 msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun=$method.methods_conditional.bin_fun)
 
+	    #elif str($method.methods_conditional.mz_range.features_filtering) == 'bin_to_reference':
+
+	        bin_reference_mz = read.delim("$method.methods_conditional.mz_range.mz_tabular", header = $method.methods_conditional.mz_range.feature_header, stringsAsFactors = FALSE)
+                bin_reference_mz = bin_reference_mz[,$method.methods_conditional.mz_range.feature_column]
+
+	       msidata = mse_bin = mzBin(msidata,resolution=$method.methods_conditional.bin_width, units="$method.methods_conditional.bin_units", fun=$method.methods_conditional.bin_fun,
+	       ref=bin_reference_mz)
+
             #end if
              
             msidata <- process(msidata, BPPARAM=MulticoreParam(workers=number_cpu))
@@ -385,7 +418,7 @@
                 print('log2 transformation')
 
                 ## replace 0 with NA to prevent Inf
-                spectra_df = spectra(msidata) ## convert into R matrix
+                spectra_df = as.matrix(spectra(msidata)) ## convert into R matrix
                 spectra_df[spectra_df ==0] = NA
                 print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra_df))))
                 spectra(msidata) = spectra_df
@@ -422,25 +455,14 @@
     ################################################################################
 
     ## save msidata as imzML file, will only work if there is at least 1 m/z left
-    
-    #if str($imzml_output) == "cont_format":
-        #set $continuous_format = True
-    #end if
 
         if (nrow(msidata) > 0){
             ## make sure that coordinates are integers
             coord(msidata)\$y = as.integer(coord(msidata)\$y)
             coord(msidata)\$x = as.integer(coord(msidata)\$x)
-        #if $used_peak_picking:
-            #if $continuous_format:
-                msidata = as(msidata, "MSContinuousImagingExperiment")
-            #end if
-        #elif $used_peak_alignment
-            #if $continuous_format:
-                msidata = as(msidata, "MSContinuousImagingExperiment")
-            #end if
-        #end if
-        writeImzML(msidata, "out")
+            ## only continuous files can currently be exported
+            msidata = as(msidata, "MSContinuousImagingExperiment")
+            writeImzML(msidata, "out")
         }
 
     plot(0,type='n',axes=FALSE,ann=FALSE)
@@ -463,6 +485,7 @@
                     <option value="Baseline_reduction">Baseline Reduction</option>
                     <option value="Smoothing">Peak smoothing</option>
                     <option value="mz_alignment">m/z alignment</option>
+                    <option value="mz_recalibration">m/z recalibration</option>
                     <option value="Peak_picking">Peak picking</option>
                     <option value="Peak_alignment">Peak alignment</option>
                     <option value="Peak_filtering">Peak filtering</option>
@@ -541,6 +564,26 @@
                     <param name="span" type="float" value="0.75"
                         label="span" help="The smoothing parameter for the local polynomial regression used to determine the warping function."/>
                 </when>
+	        <when value="mz_recalibration">
+		    <param name="alignment_tol" type="text" value="NA"
+		           label="tolerance" help="The tolerance to be used when matching the peaks in the unaligned spectra to the reference spectrum. If this is NA, then automatically guess a tolerance from the data.">
+		       <sanitizer>
+		            <valid initial="string.digits">
+		                <add value="N" />
+		                <add value="A" />
+		            </valid>
+		        </sanitizer>
+		    </param>
+		    <param name="alignment_units" type="select" display="radio" optional="False" label="The units to use for the tolerance.">
+		            <option value="ppm" selected="True">ppm</option>
+		            <option value="mz">m/z</option>
+		    </param>
+		    <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features to use for alignment. Only the m/z values from the tabular file will be kept."/>
+		    <param name="quantile" type="float" value="0.2"
+		        label="quantile" help="The top quantile of reference points (peaks detected via local maxima) to use from the reference spectrum."/>
+		    <param name="span" type="float" value="0.75"
+		        label="span" help="The smoothing parameter for the local polynomial regression used to determine the warping function."/>
+		</when>
                 <when value="Peak_picking">
                     <param name="SNR_picking_method" type="float" value="6"
                         label="Signal to noise ratio"
@@ -620,15 +663,19 @@
                     </param>
                     <param name="replace_NA_bin" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" checked="True" help="Binning can introduce NAs, should they be replaced with 0"/>
                     <conditional name="mz_range">
-            		 <param name="features_filtering" type="select" label="Select m/z feature filtering option">
+                    	<param name="features_filtering" type="select" label="Select m/z options">
                             <option value="none" selected="True">none</option>
                             <option value="change_mz_range">change m/z range</option>
+                            <option value="bin_to_reference">bin m/z to reference</option>
                         </param>
 			    <when value="none"/>
 			    <when value="change_mz_range">
 				<param name="min_mz" type="float" value="1" label="Minimum value for m/z"/>
 				<param name="max_mz" type="float" value="10000" label="Maximum value for m/z"/>
 			    </when>
+			    <when value="bin_to_reference">
+		                <expand macro="reading_1_column_mz_tabular" label="Tabular file with m/z features as reference for binning. Only the m/z values from the tabular file will be kept."/>
+	                   </when>
                     </conditional>
                 </when>
                 <when value="Transformation">
@@ -645,7 +692,6 @@
                 </when>
             </conditional>
         </repeat>
-        <param name="imzml_output" type="boolean" label="imzML output in processed format" checked="True" truevalue="proc_format" falsevalue="cont_format"/>
     </inputs>
     <outputs>
         <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"/>
@@ -666,10 +712,8 @@
                 <conditional name="methods_conditional">
                     <param name="preprocessing_method" value="Smoothing"/>
                     <conditional name="methods_for_smoothing">
-                        <param name="smoothing_method" value="gaussian"/>
-                        <param name="sd_gaussian" value="4"/>
+                        <param name="smoothing_method" value="sgolay"/>
                     </conditional>
-                        <param name="window_smoothing" value="9"/>
                 </conditional>
             </repeat>
             <repeat name="methods">
@@ -702,7 +746,6 @@
                         </conditional>
                 </conditional>
             </repeat>
-            <param name="imzml_output" value="cont_format"/>
             <output name="QC_overview" file="preprocessing_results1.pdf" compare="sim_size"/>
             <output name="outfile_imzml" ftype="imzml" file="preprocessing_results1.imzml.txt" compare="sim_size">
                 <extra_files type="file" file="preprocessing_results1.imzml" name="imzml" lines_diff="6"/>
@@ -727,7 +770,6 @@
                     <param name="preprocessing_method" value="Peak_alignment"/>
                 </conditional>
             </repeat>
-            <param name="imzml_output" value="cont_format"/>
             <output name="QC_overview" file="preprocessing_results2.pdf" compare="sim_size"/>
             <output name="outfile_imzml" ftype="imzml" file="preprocessing_results2.imzml.txt" compare="sim_size">
                 <extra_files type="file" file="preprocessing_results2.imzml" name="imzml" lines_diff="6"/>
@@ -753,13 +795,11 @@
                     <conditional name="methods_for_picking">
                         <param name="picking_method" value="mad"/>
                     </conditional>
-                <param name="imzml_output" value="proc_format"/>
                 </conditional>
             </repeat>
             <repeat name="methods">
                 <conditional name="methods_conditional">
                     <param name="preprocessing_method" value="Peak_alignment"/>
-                    <param name="imzml_output" value="proc_format"/>
                 </conditional>
             </repeat>
             <output name="QC_overview" file="preprocessing_results3.pdf" compare="sim_size"/>
@@ -783,6 +823,16 @@
             </repeat>
             <repeat name="methods">
                 <conditional name="methods_conditional">
+                    <param name="preprocessing_method" value="mz_recalibration"/>                   
+                    <param name="alignment_tol" value="2"/>
+                    <param name="alignment_units" value="ppm"/>
+                    <param name="mz_tabular" value="inputcalibrantfile2.txt" ftype="tabular"/>
+                    <param name="feature_column" value="1"/>
+                    <param name="feature_header" value="TRUE"/>
+                </conditional>
+            </repeat>
+            <repeat name="methods">
+                <conditional name="methods_conditional">
                     <param name="preprocessing_method" value="Mass_binning"/>
                         <param name="bin_width" value="0.1"/>
                         <param name="bin_units" value="mz"/>
@@ -850,7 +900,7 @@
 
 **Output**
 
-- MSI data as continuous or processed imzML file
+- MSI data as continuous imzML file
 - pdf with key values and four random mass spectra after each processing step
 
         ]]>