diff filtering.xml @ 6:97fdb0ce4dd3 draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit 15e24b1f0143679647906bc427654f66b417a45c"
author galaxyp
date Wed, 25 Mar 2020 09:24:18 +0000
parents b045ba419ac6
children 5aaea231da6b
line wrap: on
line diff
--- a/filtering.xml	Fri Dec 13 18:48:46 2019 +0000
+++ b/filtering.xml	Wed Mar 25 09:24:18 2020 +0000
@@ -1,12 +1,14 @@
-<tool id="cardinal_filtering" name="MSI filtering" version="@VERSION@.3">
+<tool id="cardinal_filtering" name="MSI filtering" version="2.4.0.0">
     <description>tool for filtering mass spectrometry imaging data</description>
     <macros>
         <import>macros.xml</import>
     </macros>
-    <expand macro="requirements">
+    <requirements>
+        <requirement type="package" version="2.4.0">bioconductor-cardinal</requirement>
+        <requirement type="package" version="3.6.1">r-base</requirement>
         <requirement type="package" version="2.3">r-gridextra</requirement>
-        <requirement type="package" version="3.0">r-ggplot2</requirement>
-    </expand>
+        <requirement type="package" version="3.2.1">r-ggplot2</requirement>
+    </requirements>
     <expand macro="print_version"/>
     <command detect_errors="exit_code">
     <![CDATA[
@@ -14,12 +16,9 @@
         @INPUT_LINKING@
         cat '${MSI_subsetting}' &&
         Rscript '${MSI_subsetting}' &&
-
-        #if str($imzml_output) == "imzml_format":
-            mkdir $outfile_imzml.files_path &&
-            mv ./out.imzML "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true &&
-            mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true &&
-        #end if
+        mkdir $outfile_imzml.files_path &&
+        mv ./out.imzML "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true &&
+        mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true &&
         echo "imzML file:" > $outfile_imzml &&
         ls -l "$outfile_imzml.files_path" >> $outfile_imzml
     ]]>
@@ -37,29 +36,51 @@
 library(ggplot2)
 library(gridExtra)
 
-@READING_MSIDATA@
+
+
+## function to read RData files independent of filename
+loadRData <- function(fileName){
+load(fileName)
+get(ls()[ls() != "fileName"])
+}
+
+#if $infile.ext == 'imzml'
+    #if str($processed_cond.processed_file) == "processed":
+        msidata <- readImzML('infile', resolution=$processed_cond.accuracy, units = "$processed_cond.units", attach.only=TRUE)
+        centroided(msidata) = $centroids
+    #else
+        msidata <- readImzML('infile', attach.only=TRUE)
+        centroided(msidata) = $centroids
+    #end if
+#elif $infile.ext == 'analyze75'
+    msidata = readAnalyze('infile', attach.only=TRUE)
+    centroided(msidata) = $centroids
+#else
+    msidata = loadRData('infile.RData')
+    msidata = as(msidata, "MSImagingExperiment")
+#end if
 
 
 ########################### QC numbers ########################
 
         ## Number of features (m/z)
-        maxfeatures = length(features(msidata))
+        maxfeatures = nrow(msidata)
         ## Range m/z
         minmz = round(min(mz(msidata)), digits=2)
         maxmz = round(max(mz(msidata)), digits=2)
         ## Number of spectra (pixels)
-        pixelcount = length(pixels(msidata))
+        pixelcount = ncol(msidata)
         ## Range x coordinates
-        minimumx = min(coord(msidata)[,1])
-        maximumx = max(coord(msidata)[,1])
+        minimumx = min(coord(msidata)\$x)
+        maximumx = max(coord(msidata)\$x)
         ## Range y coordinates
-        minimumy = min(coord(msidata)[,2])
-        maximumy = max(coord(msidata)[,2])
+        minimumy = min(coord(msidata)\$y)
+        maximumy = max(coord(msidata)\$y)
         ## Store features for QC plot
         featuresinfile = mz(msidata)
 
-        all_df = cbind(coord(msidata)[,1:2], rep("removed pixels", times=ncol(msidata)))
-        colnames(all_df)[3] = "annotation"
+        all_df = data.frame(coord(msidata)\$x, coord(msidata)\$y, rep("removed pixels", times=ncol(msidata)))
+        colnames(all_df) = c("x", "y", "annotation")
 
 ## Next steps will only run if there are more than 0 pixels/features in the file
 
@@ -92,7 +113,7 @@
                 error=function(cond) {
                 ## in case all coordinates were outside the dataset leading to zero pixels, tool is stopped to avoid continuing with wrong data
                     message("Error during pixel filtering")
-                    message("Possible problems: Forgot to set 'Tabular file contains a header line' = Yes, wrong columns selected, columns with coordinates contain empty fields or letters, all coordinates were outside the range of the dataset - this can be checked with the 'MSI qualitycontrol' tool)")
+                    message("Possible problems: Forgot to set 'Tabular file contains a header line' = Yes, wrong columns selected, columns with coordinates contain empty fields or letters, all coordinates were outside the range of the dataset - this can be checked with the 'MSI qualitycontrol' tool")
                     stop(call.=FALSE)
                 }
             )    
@@ -142,9 +163,9 @@
 
     ## dataframe for QC of pixel distribution
 
-    remaining_df = cbind(coord(msidata)[,1:2], rep("remaining pixels", times=ncol(msidata)))
-    colnames(remaining_df)[3] = "annotation"
-    position_df = rbind(all_df, remaining_df) 
+    remaining_df = data.frame(as.numeric(coord(msidata)\$x), as.numeric(coord(msidata)\$y), rep("remaining pixels", times=ncol(msidata)))
+    colnames(remaining_df) = c("x", "y", "annotation")
+    position_df = rbind(all_df, remaining_df)
     position_df[row.names(unique(position_df[,c("x", "y")])),]
     position_df\$annotation = factor(position_df\$annotation)
     gc()
@@ -205,7 +226,7 @@
                         ## in case all provided m/z values were outside the m/z range
                         ## tool is stopped to avoid continuing with wrong data
                             message("Error during m/z filtering")
-                            message("Possible problems: Forgot to set 'Tabular file contains a header line' = Yes, wrong columns selected, column with m/z features contains empty fields or letters, all m/z features s were outside the range of the dataset  - this can be checked with the 'MSI qualitycontrol' tool) or did not match any m/z feature of the dataset (see help section for more information on that)")
+                            message("Possible problems: Forgot to set 'Tabular file contains a header line' = Yes, wrong columns selected, column with m/z features contains empty fields or letters, all m/z features s were outside the range of the dataset  (this can be checked with the 'MSI qualitycontrol' tool) or did not match any m/z feature of the dataset (see help section for more information on that)")
                             stop(call.=FALSE)
                         }
                     )    
@@ -262,8 +283,11 @@
                         #end if 
                         current_mass = which(c(mz(msidata) <= masses + plusminus & mz(msidata) >= masses - plusminus))
                         mass_to_remove = append(mass_to_remove, current_mass)}
-                    msidata= msidata[-mass_to_remove, ]
-                    validmz = numberfeatures - nrow(msidata)
+                        mass_to_keep = setdiff(1:nrow(msidata),mass_to_remove)
+
+                    msidata= msidata[mass_to_keep, ]
+                    validmz = maxfeatures - nrow(msidata)
+
                     ## does not throw error when processed file has no features left, therefore create error to avoid continuing with wrong data
                     if (nrow(msidata) == 0)
                         {
@@ -278,7 +302,6 @@
 
 
 
-
         ######################## No m/z filtering ##############################
 
         #elif str($features_cond.features_filtering) == "none":
@@ -304,18 +327,18 @@
 #################### QC numbers #######################
 
 ## Number of features (m/z)
-maxfeatures2 = length(features(msidata))
+maxfeatures2 = nrow(msidata)
 ## Range m/z
 minmz2 = round(min(mz(msidata)), digits=2)
 maxmz2 = round(max(mz(msidata)), digits=2)
 ## Number of spectra (pixels)
-pixelcount2 = length(pixels(msidata))
+pixelcount2 = ncol(msidata)
 ## Range x coordinates
-minimumx2 = min(coord(msidata)[,1])
-maximumx2 = max(coord(msidata)[,1])
+minimumx2 = min(coord(msidata)\$x)
+maximumx2 = max(coord(msidata)\$x)
 ## Range y coordinates
-minimumy2 = min(coord(msidata)[,2])
-maximumy2 = max(coord(msidata)[,2])
+minimumy2 = min(coord(msidata)\$y)
+maximumy2 = max(coord(msidata)\$y)
 
 properties = c("Number of m/z features",
                "Range of m/z values",
@@ -347,7 +370,7 @@
 
 pdf("filtertool_QC.pdf", fonts = "Times", pointsize = 12)
 plot(0,type='n',axes=FALSE,ann=FALSE)
-title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.display_name"))
+title(main=paste0("Qualitycontrol of filtering tool for file: \n\n", "$infile.element_identifier"))
 grid.table(property_df, rows= NULL)
 
 ## QC report only when pixels/features are left
@@ -362,35 +385,39 @@
            ggtitle("Spatial orientation of filtered pixels")+
            theme_bw()+
            theme(plot.title = element_text(hjust = 0.5))+
-           theme(legend.position="bottom",legend.direction="vertical")
+           theme(legend.position="bottom",legend.direction="vertical")+
+           guides(fill=guide_legend(ncol=5,byrow=TRUE))
     print(pixel_image)
 
-    ### plot features which are removed
-    hist(mz(msidata), xlab="m/z", main="Kept m/z values")
-    #if str($features_cond.features_filtering) == "none":
-        print("no difference histogram as no m/z filtering took place")
-    #else:
-        if (isTRUE(all.equal(featuresinfile, mz(msidata)))){
-        print("No difference in m/z values before and after filtering, no histogram drawn")
-        }else{
-        hist(setdiff(featuresinfile, mz(msidata)), xlab="m/z", main="Removed m/z values")}
-    #end if
+
+    ### visual mz feature control
+
+    kept_df = data.frame(mz(msidata), rep("remaining m/z", nrow(msidata)))
+    colnames(kept_df) = c("mz", "legend")
+
+    mz_removed = setdiff(featuresinfile, mz(msidata))
+    removed_df = data.frame(mz_removed, rep("removed m/z", length(mz_removed)))
+    colnames(removed_df) = c("mz", "legend")
+    histogram_df = rbind(removed_df,kept_df)
+
+    histogram_mz= ggplot(histogram_df, aes(x=mz, fill=legend)) +
+        geom_histogram()+ theme_bw()+
+        theme(plot.title = element_text(hjust = 0.5))+
+        theme(legend.position="bottom",legend.direction="vertical")+
+        labs(title="Overview of filtered m/z", x="m/z", y = "count") +
+        guides(fill=guide_legend(ncol=5,byrow=TRUE))
+      print(histogram_mz)
+
 
         dev.off()
 
     ## save msidata as imzML file, will only work if there is at least 1 m/z left
 
-    #if str($imzml_output) == "imzml_format":
         if (maxfeatures2 > 0){
             ## make sure that coordinates are integers
             coord(msidata)\$y = as.integer(coord(msidata)\$y)
             coord(msidata)\$x = as.integer(coord(msidata)\$x)
         writeImzML(msidata, "out")}
-    #elif str($imzml_output) == "rdata_format":
-        ## save msidata as Rfile
-        iData(msidata) = iData(msidata)[]
-        save(msidata, file="$outfile_rdata")
-    #end if
 
 
 }else{
@@ -448,20 +475,10 @@
                 </param>
             </when>
         </conditional>
-        <param name="imzml_output" type="select" display = "radio" optional = "False"
-               label="Output format" help= "Choose the output format">
-                <option value="imzml_format" >imzML</option>
-                <option value="rdata_format" selected="True" >RData</option>
-        </param>
     </inputs>
 
     <outputs>
-        <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML">
-            <filter>imzml_output =='imzml_format'</filter>
-        </data>
-        <data format="rdata" name="outfile_rdata" label="${tool.name} on ${on_string}: RData">
-            <filter>imzml_output == 'rdata_format'</filter>
-        </data>
+        <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"/>
         <data format="pdf" name="QC_overview" from_work_dir="filtertool_QC.pdf" label = "${tool.name} on ${on_string}: QC"/>
     </outputs>
     <tests>
@@ -475,10 +492,9 @@
             <param name="features_filtering" value="features_range"/>
             <param name="min_mz" value="350"/>
             <param name="max_mz" value="500"/>
-            <param name="imzml_output" value="imzml_format"/>
             <output name="QC_overview" file="imzml_filtered3.pdf" compare="sim_size"/>
             <output name="outfile_imzml" ftype="imzml" file="out3.imzml.txt" compare="sim_size">
-                <extra_files type="file" file="out3.imzml" name="imzml" lines_diff="4"/>
+                <extra_files type="file" file="out3.imzml" name="imzml" lines_diff="6"/>
                 <extra_files type="file" file="out3.ibd" name="ibd" compare="sim_size"/>
             </output>
         </test>
@@ -488,10 +504,9 @@
             <param name="annotation_file" ftype="tabular" value="inputpixels_2column.tabular"/>
             <param name="column_x" value="1"/>
             <param name="column_y" value="3"/>
-            <param name="imzml_output" value="imzml_format"/>
             <output name="QC_overview" file="imzml_filtered4.pdf" compare="sim_size"/>
             <output name="outfile_imzml" ftype="imzml" file="out4.imzml.txt" compare="sim_size">
-                <extra_files type="file" file="out4.imzml" name="imzml" lines_diff="4"/>
+                <extra_files type="file" file="out4.imzml" name="imzml" lines_diff="6"/>
                 <extra_files type="file" file="out4.ibd" name="ibd" compare="sim_size"/>
             </output>
         </test>
@@ -506,34 +521,51 @@
             <param name="mz_tabular" ftype="tabular" value = "featuresofinterest5.tabular"/>
             <param name="feature_column" value="1"/>
             <param name="feature_header" value="0"/>
-            <param name="imzml_output" value="imzml_format"/>
             <output name="QC_overview" file="imzml_filtered5.pdf" compare="sim_size"/>
             <output name="outfile_imzml" ftype="imzml" file="out5.imzml.txt" compare="sim_size">
-                <extra_files type="file" file="out5.imzml" name="imzml" lines_diff="4"/>
+                <extra_files type="file" file="out5.imzml" name="imzml" lines_diff="6"/>
                 <extra_files type="file" file="out5.ibd" name="ibd" compare="sim_size"/>
             </output>
         </test>
         <test>
             <expand macro="infile_analyze75"/>
-            <param name="imzml_output" value="imzml_format"/>
             <output name="QC_overview" file="analyze75_filtered2.pdf" compare="sim_size"/>
             <output name="outfile_imzml" ftype="imzml" file="out6.imzml.txt" compare="sim_size">
-                <extra_files type="file" file="out6.imzml" name="imzml" lines_diff="4"/>
+                <extra_files type="file" file="out6.imzml" name="imzml" lines_diff="6"/>
                 <extra_files type="file" file="out6.ibd" name="ibd" compare="sim_size"/>
             </output>
         </test>
         <test>
             <param name="infile" value="preprocessed.RData" ftype="rdata"/>
-            <conditional name="outputs">
-                <param name="outputs_select" value="no_quality_control"/>
-            </conditional>
-            <param name="imzml_output" value="imzml_format"/>
             <output name="QC_overview" file="rdata_notfiltered.pdf" compare="sim_size" />
             <output name="outfile_imzml" ftype="imzml" file="out7.imzml.txt" compare="sim_size">
-                <extra_files type="file" file="out7.imzml" name="imzml" lines_diff="4"/>
+                <extra_files type="file" file="out7.imzml" name="imzml" lines_diff="6"/>
                 <extra_files type="file" file="out7.ibd" name="ibd" compare="sim_size"/>
             </output>
         </test>
+        <test>
+            <expand macro="processed_infile_imzml"/>
+            <conditional name="processed_cond">
+                <param name="processed_file" value="processed"/>
+                <param name="accuracy" value="100"/>
+                <param name="units" value="ppm"/>
+            </conditional>
+            <param name="pixel_filtering" value="two_columns"/>
+            <param name="annotation_file" ftype="tabular" value="inputpixels_2column.tabular"/>
+            <param name="column_x" value="1"/>
+            <param name="column_y" value="3"/>
+            <param name="features_filtering" value="remove_features"/>
+            <param name="mz_tabular" ftype="tabular" value = "featuresofinterest5.tabular"/>
+            <param name="feature_column" value="1"/>
+            <param name="feature_header" value="0"/>
+            <param name="removal_plusminus" value="100"/>
+            <param name="units_removal" value="ppm"/>
+            <output name="QC_overview" file="imzml_filtered8.pdf" compare="sim_size"/>
+            <output name="outfile_imzml" ftype="imzml" file="out8.imzml.txt" compare="sim_size">
+                <extra_files type="file" file="out8.imzml" name="imzml" lines_diff="6"/>
+                <extra_files type="file" file="out8.ibd" name="ibd" compare="sim_size"/>
+            </output>
+        </test>
     </tests>
     <help>
         <![CDATA[
@@ -581,7 +613,7 @@
 
 - m/z feature removing: 
 
-    - Perturbing m/z features such as matrix contaminants can be removed by specifying their m/z value in a tabular file, optionally with a half window size in ppm or m/z for the window in which peaks should be removed
+    - Perturbing m/z features such as matrix contaminants, tryptic peptides and internal calibrants can be removed by specifying their m/z value in a tabular file, optionally with a half window size in ppm or m/z for the window in which peaks should be removed
 
 
 **Tips**
@@ -598,7 +630,7 @@
 
 **Output**
 
-- MSI data as imzML file or .RData (can be read with the Cardinal package in R)
+- MSI data as (continuous) imzML file
 - pdf with heatmap showing the pixels that are removed and kept as well as histograms of kept and removed m/z