view october_recetox_xmsannotator_advanced.xml @ 1:8fcf5ca25f5d draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox-xmsannotator commit b26a7ce6459f8e456fd166ba1aaada4e7e66dfa6"
author recetox
date Fri, 29 Oct 2021 12:24:44 +0000
parents e2e2e455fcfb
children 116ec7c1c038
line wrap: on
line source

<tool id="october_recetox_xmsannotator_advanced" name="OCTOBER RECETOX xMSannotator advanced" version="@TOOL_VERSION@+galaxy0">
    <description>annotate peak intensity table including scores and confidence levels</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="creator"/>
    <expand macro="requirements" />
    <command detect_errors="aggressive"><![CDATA[
        Rscript -e "n_workers <- \${GALAXY_SLOTS:-1}" -e "source('${wrapper}')"
    ]]></command>

    <configfiles>
        <configfile name="wrapper"><![CDATA[
            library(recetox.xmsannotator)

            annotation <- advanced_annotation(
            #if $peak_table.is_of_type("parquet")
                peak_table = load_peak_table_parquet("${peak_table}"),
            #end if
                adduct_table = load_adduct_table_parquet("${adduct_table}"),
                adduct_weights = as.data.frame("${adduct_weights}"),
                compound_table = load_compound_table_parquet("${compound_table}"),
                mass_tolerance = 1e-6 * ${mass_tolerance_ppm},
                time_tolerance = $time_tolerance,
                correlation_threshold = as.double($clustering.correlation_threshold),
                min_cluster_size = as.integer($clustering.min_cluster_size),
                deep_split = as.integer($clustering.deep_split),
                network_type = "$clustering.network_type",
            ## #if $scoring.expected_adducts
            ##     expected_adducts = load_expected_adducts_csv("${scoring.expected_adducts}"),
            ## #end if
            ## #if $scoring.boost_compounds
            ##     boost_compounds = load_boost_compounds_csv("${scoring.boost_compounds}"),
            ## #end if
                redundancy_filtering = $scoring.redundancy_filtering,
                n_workers = n_workers,
            ## new params
                intensity_deviation_tolerance = as.double($intensity_deviation_tolerance),
                mass_defect_tolerance = as.double($mass_defect_tolerance),
                mass_defect_precision = as.double($mass_defect_precision),
                peak_rt_width = as.integer($peak_rt_width),
                maximum_isotopes = as.integer($maximum_isotopes),
                min_ions_per_chemical = as.integer($min_ions_per_chemical),
                filter_by = "$filter_by"
            )

            save_parquet(data = annotation, file = "${annotation_parquet}")
        ]]></configfile>
    </configfiles>

    <inputs>
        <expand macro="inputs"/>
        <expand macro="tolerance">
            <param name="time_tolerance" type="float" value="10" min="0">
                <label>Retention time tolerance [s]</label>
                <help>
                    Retention time tolerance in seconds for finding peaks derived from the same parent compound.
                </help>
            </param>
        </expand>
        <section name="clustering" title="Clustering">
            <param name="correlation_method" type="select" display="radio" label="Correlation method">
                <option value="pearson" selected="true"/>
                <option value="spearman"/>
            </param>
            <param name="correlation_threshold" type="float" value="0.7">
                <label>Correlation threshold</label>
                <help>Correlation threshold between peaks to qualify as adducts/isotopes of the same metabolite.</help>
            </param>
            <param name="min_cluster_size" type="integer" value="10" min="1">
                <label>Minimum cluster size</label>
                <help>The minimum number of nodes to be considered as a cluster.</help>
            </param>
            <param name="deep_split" type="integer" value="2" min="0" max="4">
                <label>Deep split</label>
                <help>
                    Deep split provides a rough control over sensitivity to cluster splitting. The higher the value,
                    the more and smaller clusters will be produced (see WGCNA package documentation).
                </help>
            </param>
            <param name="network_type" type="select" display="radio">
                <label>Network type</label>
                <help>
                    Network type parameter affects how the network's adjacency matrix is created from the correlation
                    matrix (see WGCNA package documentation).
                </help>
                <option value="signed"/>
                <option value="unsigned" selected="true"/>
            </param>
        </section>
        <section name="scoring" title="Scoring" expanded="true">
            <param name="strict_boosting" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE">
                <label>Strict boosting</label>
                <help>
                    Boost the scores of metabolites that not only belongs to the same pathway but also to the same
                    cluster. Otherwise, do not account for cluster membership.
                </help>
            </param>
            <!-- <param name="expected_adducts" type="data" format="csv" optional="true">
                <label>Expected adducts (optional)</label>

                <help>
                    Require the presence of certain adducts for a high confidence match. By default, at least the
                    presence of an M+H adduct is required for a high confidence match.
                </help>
            </param> -->
            <!-- <param name="boost_compounds" type="data" format="csv" optional="true">
                <label>Validated compounds score boosting (optional)</label>
                <help>
                    Table of previously validated compounds to boost their scores and confidence levels.
                    The 1st column of the table must contain IDs of compounds.
                    The optional 2nd and 3rd columns may contain mz values and retention times.
                </help>
            </param> -->
            <param name="min_isp" type="integer" min="0" value="1">
                <label>Minimum number of expected isotopes</label>
                <help>
                    Minimum number of adducts/isotopes to be present for a match to be considered as a high confidence match.
                </help>
            </param>
            <param name="max_isp" type="integer" min="0" value="5">
                <label>Maximum number of expected isotopes</label>
                <help>
                    Maximum number of adducts/isotopes to be present for a match to be considered as a high confidence match.
                </help>
            </param>
            <param name="redundancy_filtering" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE">
                <label>Redundancy filtering</label>
                <help>Whether to filter out low-scored multiple matcher or not.</help>
            </param>
        </section>
        <param name="intensity_deviation_tolerance" type="float" value="0.1">
            <label>intensity_deviation_tolerance</label>
        </param>
        <param name="mass_defect_tolerance" type="float" value="0.1">
            <label>mass_defect_tolerance</label>
        </param>
        <param name="mass_defect_precision" type="float" value="0.01">
            <label>mass_defect_precision</label>
        </param>
        <param name="peak_rt_width" type="integer" value="1">
            <label>peak_rt_width</label>
        </param>
        <param name="maximum_isotopes" type="integer" value="10">
            <label>maximum_isotopes</label>
        </param>
        <param name="min_ions_per_chemical" type="integer" value="2">
            <label>min_ions_per_chemical</label>
        </param>
        <param name="filter_by" type="text" value="c('M-H', 'M+H')">
            <!-- turn sanitizer off for prototype, TODO refactor this for production -->
            <sanitizer sanitize="false"></sanitizer>
            <label>filter_by</label>
        </param>
    </inputs>

    <outputs>
        <expand macro="outputs"/>
    </outputs>

    <help>
        <![CDATA[
        @HELP@
        Then, a score and a confidence level is assigned to each match based on peak correlation
        clustering, metabolite pathway associations, adducts expectations, and isotope conformations.
        ]]>
    </help>

    <citations>
        <expand macro="citations"/>
    </citations>
</tool>