Mercurial > repos > recetox > october_recetox_xmsannotator_advanced

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Fri Oct 29 09:49:13 2021 +0000
@@ -0,0 +1,65 @@
+<macros>
+    <token name="@TOOL_VERSION@">0.9.0</token>
+    <xml name="requirements">
+        <requirements>
+            <container type="docker">recetox/recetox-xmsannotator:october4</container>
+        </requirements>
+    </xml>
+    <xml name="creator">
+        <creator>
+            <organization
+                url="https://www.recetox.muni.cz/"
+                name="RECETOX MUNI" />
+        </creator>
+    </xml>
+    <xml name="inputs">
+        <param name="peak_table" type="data" format="csv,parquet">
+            <label>Peak table</label>
+            <help><![CDATA[
+                A peak-intensity table such as outputted from apLCMS.
+                The file is required to contain the fields <em>mz</em> and <em>rt</em>.
+                Columns for feature intensity in a sample have to start with <em>intensity</em>.
+
+            ]]></help>
+        </param>
+        <param name="compound_table" type="data" format="csv,parquet">
+            <label>Compound database</label>
+            <help><![CDATA[
+                Database of compounds according to which the annotation is performed.
+                The database is required to contain the fields <em>compound_id</em>, <em>monoisotopic_mass</em>, and <em>molecular_formula</em>.
+            ]]></help>
+        </param>
+        <param name="adduct_table" type="data" format="csv,parquet" optional="true">
+            <label>Adduct database (optional)</label>
+            <help><![CDATA[
+                Database of adduct which is combined with the database of compound to form a molecule-adduct pairs.
+                The database is required to contain <em>adduct</em>, <em>charge</em>, <em>mass</em>, and <em>n_molecules</em>.
+            ]]></help>
+        </param>
+    </xml>
+
+    <xml name="outputs">
+        <data name="annotation_parquet" format="parquet"/>
+    </xml>
+
+    <xml name="tolerance">
+        <param name="mass_tolerance_ppm" type="integer" min="0" value="5">
+            <label>Mass tolerance [ppm]</label>
+            <help>Mass tolerance in ppm for database matching.</help>
+        </param>
+        <yield/>
+    </xml>
+    <token name="@HELP@">
+        <![CDATA[
+        Annotate the peak intensity table (e.g. from an apLCMS run) with compounds from the compounds database
+        using advanced methods.
+
+        The annotation process generates all possible compound-adduct pairs and matches those pairs to the measured
+        peaks. A compound-adduct pair is pronounced as a match to a certain peak when the difference of their masses are
+        withing some tolerance.
+        ]]>
+    </token>
+    <xml name="citations">
+        <citation type="doi">10.1021/acs.analchem.6b01214</citation>
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/october_recetox_xmsannotator_advanced.xml	Fri Oct 29 09:49:13 2021 +0000
@@ -0,0 +1,172 @@
+<tool id="october_recetox_xmsannotator_advanced" name="OCTOBER RECETOX xMSannotator advanced" version="@TOOL_VERSION@+galaxy0">
+    <description>annotate peak intensity table including scores and confidence levels</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="creator"/>
+    <expand macro="requirements" />
+    <command detect_errors="aggressive"><![CDATA[
+        Rscript -e "n_workers <- \${GALAXY_SLOTS:-1}" -e "source('${wrapper}')"
+    ]]></command>
+
+    <configfiles>
+        <configfile name="wrapper"><![CDATA[
+            library(recetox.xmsannotator)
+
+            annotation <- advanced_annotation(
+            #if $peak_table.is_of_type("parquet")
+                peak_table = load_peak_table_parquet("${peak_table}"),
+            #end if
+                adduct_table = load_adduct_table_parquet("${adduct_table}"),
+                compound_table = load_compound_table_parquet("${compound_table}"),
+                mass_tolerance = 1e-6 * ${mass_tolerance_ppm},
+                time_tolerance = $time_tolerance,
+                correlation_threshold = as.double($clustering.correlation_threshold),
+                min_cluster_size = as.integer($clustering.min_cluster_size),
+                deep_split = as.integer($clustering.deep_split),
+                network_type = "$clustering.network_type",
+            ## #if $scoring.expected_adducts
+            ##     expected_adducts = load_expected_adducts_csv("${scoring.expected_adducts}"),
+            ## #end if
+            ## #if $scoring.boost_compounds
+            ##     boost_compounds = load_boost_compounds_csv("${scoring.boost_compounds}"),
+            ## #end if
+                redundancy_filtering = $scoring.redundancy_filtering,
+                n_workers = n_workers,
+            ## new params
+                intensity_deviation_tolerance = as.double($intensity_deviation_tolerance),
+                mass_defect_tolerance = as.double($mass_defect_tolerance),
+                mass_defect_precision = as.double($mass_defect_precision),
+                peak_rt_width = as.integer($peak_rt_width),
+                maximum_isotopes = as.integer($maximum_isotopes),
+                min_ions_per_chemical = as.integer($min_ions_per_chemical),
+                filter_by = "$filter_by"
+            )
+
+            save_parquet(data = annotation, file = "${annotation_parquet}")
+        ]]></configfile>
+    </configfiles>
+
+    <inputs>
+        <expand macro="inputs"/>
+        <expand macro="tolerance">
+            <param name="time_tolerance" type="float" value="10" min="0">
+                <label>Retention time tolerance [s]</label>
+                <help>
+                    Retention time tolerance in seconds for finding peaks derived from the same parent compound.
+
+                </help>
+            </param>
+        </expand>
+        <section name="clustering" title="Clustering">
+            <param name="correlation_method" type="select" display="radio" label="Correlation method">
+                <option value="pearson" selected="true"/>
+                <option value="spearman"/>
+            </param>
+            <param name="correlation_threshold" type="float" value="0.7">
+                <label>Correlation threshold</label>
+                <help>Correlation threshold between peaks to qualify as adducts/isotopes of the same metabolite.</help>
+            </param>
+            <param name="min_cluster_size" type="integer" value="10" min="1">
+                <label>Minimum cluster size</label>
+                <help>The minimum number of nodes to be considered as a cluster.</help>
+            </param>
+            <param name="deep_split" type="integer" value="2" min="0" max="4">
+                <label>Deep split</label>
+                <help>
+                    Deep split provides a rough control over sensitivity to cluster splitting. The higher the value,
+                    the more and smaller clusters will be produced (see WGCNA package documentation).
+                </help>
+            </param>
+            <param name="network_type" type="select" display="radio">
+                <label>Network type</label>
+                <help>
+                    Network type parameter affects how the network's adjacency matrix is created from the correlation
+                    matrix (see WGCNA package documentation).
+                </help>
+                <option value="signed"/>
+                <option value="unsigned" selected="true"/>
+            </param>
+        </section>
+        <section name="scoring" title="Scoring" expanded="true">
+            <param name="strict_boosting" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE">
+                <label>Strict boosting</label>
+                <help>
+                    Boost the scores of metabolites that not only belongs to the same pathway but also to the same
+                    cluster. Otherwise, do not account for cluster membership.
+                </help>
+            </param>
+            <!-- <param name="expected_adducts" type="data" format="csv" optional="true">
+                <label>Expected adducts (optional)</label>
+
+                <help>
+                    Require the presence of certain adducts for a high confidence match. By default, at least the
+                    presence of an M+H adduct is required for a high confidence match.
+                </help>
+            </param> -->
+            <!-- <param name="boost_compounds" type="data" format="csv" optional="true">
+                <label>Validated compounds score boosting (optional)</label>
+                <help>
+                    Table of previously validated compounds to boost their scores and confidence levels.
+                    The 1st column of the table must contain IDs of compounds.
+                    The optional 2nd and 3rd columns may contain mz values and retention times.
+                </help>
+            </param> -->
+            <param name="min_isp" type="integer" min="0" value="1">
+                <label>Minimum number of expected isotopes</label>
+                <help>
+                    Minimum number of adducts/isotopes to be present for a match to be considered as a high confidence match.
+                </help>
+            </param>
+            <param name="max_isp" type="integer" min="0" value="5">
+                <label>Maximum number of expected isotopes</label>
+                <help>
+                    Maximum number of adducts/isotopes to be present for a match to be considered as a high confidence match.
+                </help>
+            </param>
+            <param name="redundancy_filtering" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE">
+                <label>Redundancy filtering</label>
+                <help>Whether to filter out low-scored multiple matcher or not.</help>
+            </param>
+        </section>
+        <param name="intensity_deviation_tolerance" type="float" value="0.1">
+            <label>intensity_deviation_tolerance</label>
+        </param>
+        <param name="mass_defect_tolerance" type="float" value="0.1">
+            <label>mass_defect_tolerance</label>
+        </param>
+        <param name="mass_defect_precision" type="float" value="0.01">
+            <label>mass_defect_precision</label>
+        </param>
+        <param name="peak_rt_width" type="integer" value="1">
+            <label>peak_rt_width</label>
+        </param>
+        <param name="maximum_isotopes" type="integer" value="10">
+            <label>maximum_isotopes</label>
+        </param>
+        <param name="min_ions_per_chemical" type="integer" value="2">
+            <label>min_ions_per_chemical</label>
+        </param>
+        <param name="filter_by" type="text" value="c('M-H', 'M+H')">
+            <!-- turn sanitizer off for prototype, TODO refactor this for production -->
+            <sanitizer sanitize="false"></sanitizer>
+            <label>filter_by</label>
+        </param>
+    </inputs>
+
+    <outputs>
+        <expand macro="outputs"/>
+    </outputs>
+
+    <help>
+        <![CDATA[
+        @HELP@
+        Then, a score and a confidence level is assigned to each match based on peak correlation
+        clustering, metabolite pathway associations, adducts expectations, and isotope conformations.
+        ]]>
+    </help>
+
+    <citations>
+        <expand macro="citations"/>
+    </citations>
+</tool>