view october_recetox_xmsannotator_advanced.xml @ 14:51f8e69c9d47 draft

"planemo upload for repository https://github.com/RECETOX/galaxytools/tree/master/tools/recetox-xmsannotator commit c3ef30761f7b9641b0cea7464eb945a37ed9cb6f"
author recetox
date Wed, 03 Nov 2021 16:31:58 +0000
parents 29b04e82bc4e
children e72157bd26db
line wrap: on
line source

<tool id="october_recetox_xmsannotator_advanced" name="OCTOBER RECETOX xMSannotator advanced" version="@TOOL_VERSION@+galaxy4">
    <description>annotate peak intensity table including scores and confidence levels</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="creator"/>
    <expand macro="requirements" />
    <command detect_errors="aggressive"><![CDATA[
        Rscript -e "n_workers <- \${GALAXY_SLOTS:-1}" -e "source('${wrapper}')"
    ]]></command>

    <configfiles>
        <configfile name="wrapper"><![CDATA[
            library(recetox.xmsannotator)

            peak_table = arrow::read_parquet("${peak_table}")
            if("peak" %in% colnames(peak_table)) {
              if(!is.integer(peak_table$peak)) {
                peak_table$peak <- as.integer(peak_table$peak)
              }
            }

            annotation <- advanced_annotation(
                peak_table = peak_table,
                adduct_table = load_adduct_table_parquet("${adduct_table}"),
                adduct_weights = as.data.frame(read.csv("${adduct_weights}")),
                compound_table = load_compound_table_parquet("${compound_table}"),
                mass_tolerance = 1e-6 * ${mass_tolerance_ppm},
                time_tolerance = $time_tolerance,
                correlation_threshold = as.double($clustering.correlation_threshold),
                min_cluster_size = as.integer($clustering.min_cluster_size),
                deep_split = as.integer($clustering.deep_split),
                network_type = "$clustering.network_type",
                redundancy_filtering = $scoring.redundancy_filtering,
                n_workers = n_workers,
                intensity_deviation_tolerance = as.double($intensity_deviation_tolerance),
                mass_defect_tolerance = as.double($mass_defect_tolerance),
                mass_defect_precision = as.double($mass_defect_precision),
                peak_rt_width = as.integer($peak_rt_width),
                maximum_isotopes = as.integer($maximum_isotopes),
                min_ions_per_chemical = as.integer($min_ions_per_chemical),
                filter_by = $filter_by
            )
            save_parquet(data = annotation, file = "${annotation_parquet}")
        ]]></configfile>
    </configfiles>

    <inputs>
        <expand macro="inputs"/>
        <expand macro="tolerance">
            <param name="time_tolerance" type="float" value="10" min="0">
                <label>Retention time tolerance [s]</label>
                <help>
                    Retention time tolerance in seconds for finding peaks derived from the same parent compound.
                </help>
            </param>
        </expand>
        <section name="clustering" title="Clustering">
            <param name="correlation_threshold" type="float" value="0.7">
                <label>Correlation threshold</label>
                <help>Correlation threshold between peaks to qualify as adducts/isotopes of the same metabolite.</help>
            </param>
            <param name="min_cluster_size" type="integer" value="10" min="1">
                <label>Minimum cluster size</label>
                <help>The minimum number of nodes to be considered as a cluster.</help>
            </param>
            <param name="deep_split" type="integer" value="2" min="0" max="4">
                <label>Deep split</label>
                <help>
                    Deep split provides a rough control over sensitivity to cluster splitting. The higher the value,
                    the more and smaller clusters will be produced (see WGCNA package documentation).
                </help>
            </param>
            <param name="network_type" type="select" display="radio">
                <label>Network type</label>
                <help>
                    Network type parameter affects how the network's adjacency matrix is created from the correlation
                    matrix (see WGCNA package documentation).
                </help>
                <option value="signed">Signed</option>
                <option value="unsigned" selected="true">Unsigned</option>
            </param>
        </section>
        <section name="scoring" title="Scoring" expanded="true">
            <param name="strict_boosting" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE">
                <label>Strict boosting</label>
                <help>
                    Boost the scores of metabolites that not only belongs to the same pathway but also to the same
                    cluster. Otherwise, do not account for cluster membership.
                </help>
            </param>
            <param name="min_isp" type="integer" min="0" value="1">
                <label>Minimum number of expected isotopes</label>
                <help>
                    Minimum number of adducts/isotopes to be present for a match to be considered as a high confidence match.
                </help>
            </param>
            <param name="max_isp" type="integer" min="0" value="5">
                <label>Maximum number of expected isotopes</label>
                <help>
                    Maximum number of adducts/isotopes to be present for a match to be considered as a high confidence match.
                </help>
            </param>
            <param name="redundancy_filtering" type="boolean" checked="true" truevalue="TRUE" falsevalue="FALSE">
                <label>Redundancy filtering</label>
                <help>Whether to filter out low-scored multiple matcher or not.</help>
            </param>
        </section>
        <param name="intensity_deviation_tolerance" type="float" value="0.1">
            <label>intensity_deviation_tolerance</label>
        </param>
        <param name="mass_defect_tolerance" type="float" value="0.1">
            <label>mass_defect_tolerance</label>
        </param>
        <param name="mass_defect_precision" type="float" value="0.01">
            <label>mass_defect_precision</label>
        </param>
        <param name="peak_rt_width" type="integer" value="1">
            <label>peak_rt_width</label>
        </param>
        <param name="maximum_isotopes" type="integer" value="10">
            <label>maximum_isotopes</label>
        </param>
        <param name="min_ions_per_chemical" type="integer" value="2">
            <label>min_ions_per_chemical</label>
        </param>
        <param name="filter_by" type="text" value="c('M-H', 'M+H')">
            <!-- turn sanitizer off for prototype, TODO refactor this for production -->
            <sanitizer sanitize="false"></sanitizer>
            <label>filter_by</label>
        </param>
    </inputs>

    <outputs>
        <expand macro="outputs"/>
    </outputs>

    <help>
        <![CDATA[
        @HELP@
        Then, a score and a confidence level is assigned to each match based on peak correlation
        clustering, metabolite pathway associations, adducts expectations, and isotope conformations.
        ]]>
    </help>

    <citations>
        <expand macro="citations"/>
    </citations>
</tool>