diff flag-remove-peaks.xml @ 0:c9d911d33970 draft

planemo upload for repository https://github.com/computational-metabolomics/dma-tools-galaxy commit 6c48bd51987a28401de6cf5e49b1b30e5e73fe16-dirty
author tomnl
date Tue, 27 Mar 2018 06:51:52 -0400
parents
children 6701ecb1a3b0
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/flag-remove-peaks.xml	Tue Mar 27 06:51:52 2018 -0400
@@ -0,0 +1,258 @@
+<tool id="flag_remove_peaks" name="flag_remove_peaks" version="0.1.3">
+    <requirements>
+        <requirement type="package" >r</requirement>
+	    <requirement type="package" >r-optparse</requirement>
+        <requirement type="package" version="0.0.3" >r-xcmswrapper</requirement>
+    </requirements>
+    <description>Tool to flag and remove XCMS grouped peaks from the xcmsSet object based on various thresholds
+        (e.g. RSD of intensity and retention time).
+    </description>
+
+    <stdio>
+        <exit_code range="1:" />
+    </stdio>
+    <command interpreter="Rscript"><![CDATA[
+        flag-remove-peaks.R
+                --xset_path=$xset_path
+
+                #if $sample_flag.sample_flag=='update'
+                    --rsd_i_sample=$sample_flag.rsd_i_sample
+                    --minfrac_sample=$sample_flag.minfrac_sample
+                    --ithres_sample=$sample_flag.ithres_sample
+                #end if
+
+                #if $blank_flag.blank_flag=='update'
+                    --rsd_i_blank=$blank_flag.rsd_i_blank
+                    --minfrac_blank=$blank_flag.minfrac_blank
+                    --ithres_blank=$blank_flag.ithres_blank
+                    --blank_class=$blank_flag.blank_class
+                #end if
+
+                #if $peak_removal.peak_removal=='remove'
+                    --remove_spectra
+                    --minfrac_xcms=$peak_removal.minfrac_xcms
+                    --mzwid=$peak_removal.mzwid
+                    --bw=$peak_removal.bw
+                #end if
+
+                #if $advanced.advanced=='update'
+                    --egauss_thr=$advanced.egauss_thr
+                    --polarity=$advanced.polarity
+                    --grp_rm_ids=$advanced.grp_rm_ids
+                    --xset_name=$advanced.xset_name
+                    $advanced.temp_save.value
+
+                #end if
+    ]]></command>
+    <inputs>
+        <param type="data" name="xset_path"
+                help="The path to the xcmsSet object saved as an RData file"/>
+        <conditional name="sample_flag">
+            <param name="sample_flag" type="select" label="Change biological sample flag parameters?">
+                <option value="update" >Update biological sample flag parameters</option>
+                <option value="" selected="true">Use default biological sample flag parameters</option>
+            </param>
+            <when value="">
+            </when>
+            <when value="update">
+                <param name="rsd_i_sample" type="text" label="rsd_i_sample" value="NA"
+                    help="Relative Standard Deviation threshold for the sample classes (--rsd_i_sample)"/>
+                <param name="minfrac_sample" type="float" label="minfrac_sample" value="0.5" min="0.0" max="1"
+                    help="minimum fraction of files for features needed for the sample classes (--minfrac_sample)"/>
+                <param name="rsd_rt_sample" type="text" label="rsd_rt_sample" value="NA"
+                    help="Relative standard Deviation threshold for the retention time of the sample
+                     classes (--rsd_rt_sample)"/>
+                <param name="ithres_sample" type="text" label="ithres_sample" value="NA"
+                    help="Intensity threshold for the sample (--ithres_sample)"/>
+            </when>
+        </conditional>
+        <conditional name="blank_flag">
+            <param name="blank_flag" type="select" label="Change blank flag parameters?">
+                <option value="update" >Update blank flag parameters</option>
+                <option value="" selected="true">Use default blank flag parameters</option>
+            </param>
+            <when value="">
+            </when>
+            <when value="update">
+                <param name="blank_class" type="text" label="blank_class" value="blank"
+                help="A string representing the class that will be used for the blank (--blank_class)"/>
+                <param name="rsd_i_blank" type="text" label="rsd_i_blank" value="NA"
+                    help="RSD threshold for the blank (--rsd_i_blank)"/>
+                <param name="minfrac_blank" type="float" label="minfrac_blank" value="0.5" min="0.0" max="1"
+                    help="minimum fraction of files for features needed for the blank (--minfrac_blank)"/>
+                <param name="rsd_rt_blank" type="text" label="rsd_rt_blank" value="NA"
+                    help="RSD threshold for the retention time of the blank (--rsd_rt_blank)"/>
+                <param name="ithres_blank" type="text" label="ithres_blank" value="NA"
+                    help="Intensity threshold for the blank (--ithres_blank)"/>
+                <param name="s2b" type="float" label="s2b" value="10"
+                    help="fold change (sample/blank) needed for sample peak to be allowed. e.g.
+                    if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10.
+                    1000/10 = 100, so sample has fold change higher than the threshold and the peak
+                    is not considered a blank (--s2b)"/>
+            </when>
+        </conditional>
+        <conditional name="peak_removal">
+            <param name="peak_removal" type="select" label="Remove peaks from xcmsSet object?">
+                <option value="remove" >Remove peaks and re-group</option>
+                <option value="" selected="true">Only flag peaks (do not remove and re-group)</option>
+            </param>
+            <when value="">
+            </when>
+            <when value="remove">
+                 <param name="minfrac_xcms" type="float" label="minfrac_xcms" value="0.5" min="0.0" max="1"
+                    help="minfrac for xcms  grouping (--minfrac_xcms)"/>
+                 <param name="mzwid" type="float" label="mzwid" value="0.001"
+                    help="mzwid for xcms  grouping (--minfrac_xcms)"/>
+                <param name="bw" type="float" label="bw" value="5"
+                    help="bw for xcms  grouping(--minfrac_xcms)"/>
+            </when>
+        </conditional>
+        <conditional name="advanced">
+            <param name="advanced" type="select" label="Advanced parameters">
+                <option value="update" >Update advanced and testing parameters</option>
+                <option value="" selected="true">Use default advanced parameters</option>
+            </param>
+            <when value="">
+            </when>
+            <when value="update">
+                <param name="egauss_thr" type="text" label="egauss_thr" value="NA"
+                    help="Threshold for filtering out non gaussian shaped peaks. Note this only works
+                            if the 'verbose columns' and 'fit gauss' was used with xcms (--egauss_thr)"/>
+                <param name="temp_save" type="boolean" label="temp_save" checked="false" truevalue="--temp_save" falsevalue=""
+                    help="Assign True if files for each step saved (for testing purposes) (--temp_save)"/>
+                <param name="polarity" type="select" label="polarity"
+                    help="polarity (just used for naming purpose when files are saved) (--polarity)">
+                        <option value="positive">Positive</option>
+                        <option value="negative" >Negative</option>
+                        <option value="NA" selected="true">NA</option>
+                </param>
+                <param name="grp_rm_ids" type="text" label="grp_rm_ids" value="NA"
+                help="comma seperated list of grouped_xcms peak ids to remove (corresponds to the row from xcms::group output)
+                      e.g '1,20,30,56'
+                 (--grp_rm_ids)"/>
+                <param name="xset_name" type="text" label="xset_name" value="xset"
+                    help="Name of the xcmsSet object within the RData file (--xset_name)"/>
+
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+	    <data name="peaklist_filtered" format="tabular" label="peaklist_filtered"
+              from_work_dir="peaklist_filtered.txt" visible="true"/>
+        <data name="removed_peaks" format="tabular" label="removed_peaks"
+              from_work_dir="removed_peaks.txt" visible="true"/>
+        <data name="xset_filtered" format="rdata" label="xset_filtered"
+              from_work_dir="xset_filtered.RData" visible="true"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="blank_flag.blank_flag" value="update" />
+            <conditional name="blank_flag">
+                <param name="blank_flag"  value="update"/>
+                <param name="blank_class" value="KO" />
+            </conditional>
+            <conditional name="peak_removal">
+                <param name="peak_removal"  value="remove"/>
+            </conditional>
+            <param name="xset_path" value="xset.RData" type="rdata"  />
+            <output name="peaklist_filtered" type="tabular" file="peaklist_filtered.tabular">
+            </output>
+            <output name="removed_peaks" type="tabular" file="removed_peaks.tabular">
+            </output>
+
+
+        </test>
+    </tests>
+    <help><![CDATA[
+
+=======================================
+Flag & remove peaks from xcmsSet object
+=======================================
+-----------
+Description
+-----------
+
+Tool to flag XCMS grouped peaks based on various criteria (e.g RSD, intensity). The flagged grouped peaks can then be removed
+completely from the xcmsSet object (xset). This means removing the individual peaks associated for each file. Located
+in the **xset@peaks** socket of the xcmsSet object.
+
+Additionally a list of ids of the xcms grouped peaks can be supplied, all peaks associated with these ids can be
+be removed.
+
+**Note**: grouped peak refers to a peak that has been grouped together by xcms::group function
+
+-----------------
+Updated peaklist
+-----------------
+The calculated columns for the update peaklist dataframe include:
+
+* RSD of intensity for grouped peaks across each class
+* RSD of retention time for grouped peaks across each class
+* Coverage across all classes
+* mzmin_full & mzmax_full: the full mzrange of each grouped peak
+* rtmin_full & rtmax_full: the full rtrange of each grouped peak
+* flag for criteria for the blank class (if 1 it means the blank is valid) and this grouped peak will be removed
+* flag for criteria for the sample classes (if 1 it means that this grouped peak is valid for this class)
+* all_sample_valid: flag for all samples (if 1 it means that at least 1 sample class is valid and the blank is not valid) (we keep the grouped peak)
+
+This is in addition to the standard output from the xcmsSet peaklist
+
+**flag example**
+
+Dataset consists of 3 classes. Blank, cond1 and cond2. The classes cond1 and cond2 are biological sample classes.
+
+============= ============= ============= ================ ================
+blank_valid   cond1_valid   cond2_valid   all_sample_valid Keep peak?
+============= ============= ============= ================ ================
+0             0             1             1                Yes
+------------- ------------- ------------- ---------------- ----------------
+0             1             1             1                Yes
+------------- ------------- ------------- ---------------- ----------------
+1             0             1             0                No
+------------- ------------- ------------- ---------------- ----------------
+1             0             0             0                No
+------------- ------------- ------------- ---------------- ----------------
+0             0             0             0                No
+============= ============= ============= ================ ================
+
+
+
+-----------------
+Filters for flags
+-----------------
+
+The following filters can be used to determine if a grouped peak gets flagged to keep. If the column all_sample_valid is 0 then
+the grouped peak will be removed
+
+
+* RSD of intensity for each biological sample class
+* minfrac for each biological sample class
+* RSD of retention time for each biological sample class
+* intensity threshold for each biological sample class
+* Blank subtraction. If a blank peak is found where the intensity of any corresponding biological sample class is not greater that the he s2b threshold (sample/blank). Then this grouped peak will be flagged for removal
+
+The blank grouped peaks also have there own filters. If the blank peak passes this criteria and s2b threshold detailed above
+then the peak will be removed.
+
+* RSD of intensity
+* minfrac
+* intensity threshold
+
+Additionally there is a filter for assessing how well the peaks fit the gaussian shape. Note that this can only be performed
+when XCMS has fit_gauss option and verbose columns set to TRUE. Also, these peaks are just removed and not flagged.
+
+
+
+-----------
+Regrouping
+-----------
+The resulting xcmsSet object where all peaks have been removed needs to be regrouped otherwise the individual peaks
+associated with each file will not be correctly linked to the grouped peaks.
+
+This tool will re-group the xcmsSet object and check the newly created re-grouped xcmsSet object to see if any peaks are
+still being flagged. If so the process will be repeated untill the xcmsSet object only contains peaks that match
+the peak criteria.
+
+The output file is an xcmsSet.RData file.
+    ]]></help>
+</tool>
\ No newline at end of file