view kmindex_query.xml @ 1:5ff85ac22974 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/kmindex commit 6e165c3acaf1cda32e0c51a72b89eca059a93e3a
author iuc
date Wed, 11 Mar 2026 11:36:47 +0000
parents 58820a4096c1
children
line wrap: on
line source

<tool id="kmindex_query" name="kmindex query" version="@TOOL_VERSION@+galaxy1" profile="@PROFILE@">
    <description>query k-mer index with sequencing data</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="edam_ontology"/>
    <expand macro="xrefs"/>
    <expand macro="requirements"/>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[
        ## Run kmindex query2
        #import re
        #set $identifier = str($fastx.element_identifier)
        #set $safe_name = re.sub('[^\w\-\.]', '_', $identifier)
        ## Add extension only if filename doesn't already have appropriate extension
        #if $fastx.is_of_type('fasta.gz') and not ($safe_name.endswith('.fa.gz') or $safe_name.endswith('.fasta.gz'))
            #set $safe_name = $safe_name + '.fa.gz'
        #elif $fastx.is_of_type('fasta') and not ($safe_name.endswith('.fa') or $safe_name.endswith('.fasta'))
            #set $safe_name = $safe_name + '.fa'
        #elif $fastx.is_of_type('fastqsanger.gz', 'fastq.gz') and not ($safe_name.endswith('.fq.gz') or $safe_name.endswith('.fastq.gz'))
            #set $safe_name = $safe_name + '.fq.gz'
        #elif $fastx.is_of_type('fastqsanger', 'fastq') and not ($safe_name.endswith('.fq') or $safe_name.endswith('.fastq'))
            #set $safe_name = $safe_name + '.fq'
        #end if
        #if $db_opts.db_opts_selector == "histdb"
            #set INDEX = $db_opts.histdb.extra_files_path
        #else
            #set INDEX = $db_opts.kmindex.fields.path
        #end if
        ln -s '$fastx' '$safe_name' &&
        kmindex query2
            --index '$INDEX'
            --fastx '$safe_name'
            --zvalue $zvalue
            --threshold $threshold
            --output query_output
            --format $format
            $fast
            --threads "\${GALAXY_SLOTS:-1}"
            --verbose '$verbose'
    ]]></command>
    <inputs>
        <conditional name="db_opts">
            <param name="db_opts_selector" type="select" label="Kmindex source">
                <option value="histdb">From your history</option>
                <option value="db" selected="true">Locally installed kmindex indexes</option>
            </param>
            <when value="histdb">
                <param name="histdb" type="data" format="kmindex" label="Kmindex" />
            </when>
            <when value="db">
                <param name="kmindex" type="select" label="kmindex">
                    <options from_data_table="kmindex"/>
                </param>
            </when>
        </conditional>
        <param argument="--fastx" type="data" format="fasta,fastq,fasta.gz,fastq.gz" label="Query sequences" help="FASTA or FASTQ file to query (supports gzip/bzip2)"/>
        <param argument="--zvalue" type="integer" value="0" min="0" label="Z-value" help="Index s-mers and query (s+z)-mers (0 = standard k-mer query)"/>
        <param argument="--threshold" type="float" value="0.0" min="0.0" max="1.0" label="Shared k-mers threshold" help="Minimum proportion of shared k-mers (0.0-1.0)"/>
        <param argument="--format" type="select" label="Output format" help="Format of the output file">
            <option value="json" selected="true">JSON</option>
            <option value="matrix">Matrix</option>
        </param>
        <param argument="--fast" type="boolean" truevalue="--fast" falsevalue="" checked="false" label="Fast mode" help="Keep more pages in cache for faster queries"/>
        <expand macro="common_params"/>
    </inputs>
    <outputs>
        <collection name="output_matrix" type="list" label="${tool.name} on ${on_string}: results (matrix)">
            <filter>format == 'matrix'</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.tsv$" directory="query_output" format="tabular"/>
        </collection>
        <collection name="output" type="list" label="${tool.name} on ${on_string}: results (json)">
            <filter>format != 'matrix'</filter>
            <discover_datasets pattern="(?P&lt;designation&gt;.*)\.json$" directory="query_output" format="json"/>
        </collection>
    </outputs>
    <tests>
        <!-- Test 1: Basic JSON query -->
        <test expect_num_outputs="1">
            <conditional name="db_opts">
                <param name="db_opts_selector" value="histdb"/>
                <param name="histdb" ftype="kmindex" class="Directory" value="index1" />
            </conditional>
            <param name="fastx" value="query1.fasta"/>
            <param name="format" value="json"/>
            <output_collection name="output" type="list" count="1">
                <element name="abundance_test" ftype="json" value="expected_query_t1.json" />
            </output_collection>
        </test>
        <!-- Test 2: Matrix output format -->
        <test expect_num_outputs="1">
            <conditional name="db_opts">
                <param name="db_opts_selector" value="histdb"/>
                <param name="histdb" ftype="kmindex" class="Directory" value="index1" />
            </conditional>
            <param name="fastx" value="query1.fasta.gz"/>
            <param name="format" value="matrix"/>
            <output_collection name="output_matrix" type="list" count="1">
                <element name="abundance_test" ftype="tabular" value="expected_query2_index1.tsv" />
            </output_collection>
        </test>
        <!-- Test 3: Query with threshold and z-value -->
        <test expect_num_outputs="1">
            <conditional name="db_opts">
                <param name="db_opts_selector" value="histdb"/>
                <param name="histdb" ftype="kmindex" class="Directory" value="index1" />
            </conditional>
            <param name="fastx" value="query2.fastq.gz"/>
            <param name="threshold" value="0.5"/>
            <param name="zvalue" value="5"/>
            <param name="format" value="json"/>
            <output_collection name="output" type="list" count="1">
                <element name="abundance_test" ftype="json" value="expected_query_t4.json" />
            </output_collection>
        </test>
        <!-- Test 4: query pre-built configured index1 -->
        <test expect_num_outputs="1">
            <conditional name="db_opts">
                <param name="db_opts_selector" value="db"/>
                <param name="kmindex" value="index1" />
            </conditional>
            <param name="fastx" value="query1.fasta"/>
            <param name="format" value="json"/>
            <output_collection name="output" type="list" count="1">
                <element name="abundance_test" ftype="json" value="expected_query_t1.json" />
            </output_collection>
        </test>
        <!-- Test 5: query pre-built configured index2 -->
        <test expect_num_outputs="1">
            <conditional name="db_opts">
                <param name="db_opts_selector" value="db"/>
                <param name="kmindex" value="index2" />
            </conditional>
            <param name="fastx" value="query1.fasta"/>
            <param name="format" value="json"/>
            <output_collection name="output" type="list" count="1">
                <element name="test_index" ftype="json" value="expected_query_t6.json" />
            </output_collection>
        </test>
        <!-- Test 6: using register index, JSON output -->
        <test expect_num_outputs="1">
            <conditional name="db_opts">
                <param name="db_opts_selector" value="db"/>
                <param name="kmindex" ftype="kmindex" value="register" />
            </conditional>
            <param name="fastx" value="query1.fasta"/>
            <param name="format" value="json"/>
            <output_collection name="output" type="list" count="2">
                <element name="index1" ftype="json" value="expected_query2_index1.json" />
                <element name="index2" ftype="json" value="expected_query2_index2.json" />
            </output_collection>
        </test>
        <!-- Test 7: using register index, matrix output -->
        <test expect_num_outputs="1">
            <conditional name="db_opts">
                <param name="db_opts_selector" value="db"/>
                <param name="kmindex" ftype="kmindex" value="register" />
            </conditional>
            <param name="fastx" value="query1.fasta"/>
            <param name="format" value="matrix"/>
            <output_collection name="output_matrix" type="list" count="2">
                <element name="index1" ftype="tabular" value="expected_query2_index1_register.tsv" />
                <element name="index2" ftype="tabular" value="expected_query2_index2_register.tsv" />
            </output_collection>
        </test>
    </tests>
    <help><![CDATA[
**What it does**

kmindex query2 searches a pre-built k-mer index to find the percentage of shared k-mers between query sequences and indexed samples.

**Input**

- A k-mer index (created by kmindex build) or kmindex register
- Query sequences in FASTA or FASTQ format (can be gzipped)

**Output**

The output format depends on your selection:

- **JSON**: Detailed results in JSON format
- **Matrix**: Tab-separated matrix of query-sample similarities

**Parameters**

- **Z-value**: Query with (k+z)-mers instead of k-mers to reduce false positives (Findere algorithm)
- **Threshold**: Filter results to show only matches above this similarity threshold (0.0-1.0)
- **Fast mode**: Keeps more data in cache for faster repeated queries

**More Information**

For more details, see the kmindex documentation at https://tlemane.github.io/kmindex/
    ]]></help>
    <expand macro="citations"/>
</tool>