Mercurial > repos > iuc > fur

<tool id="fur" name="Fur" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>find unique genomic regions from target and neighbor genomes</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <xrefs>
        <xref type="bio.tools">fur</xref>
    </xrefs>
    <expand macro="requirements"/>
    <version_command>fur -v</version_command>
    <command><![CDATA[
mkdir -p 'work/targets' 'work/neighbors' 'out' &&
#for $target_index, $target in enumerate($t):
    ln -s '$target' 'work/targets/target_${target_index}.fa' &&
#end for

#for $neighbor_index, $neighbor in enumerate($n):
    ln -s '$neighbor' 'work/neighbors/neighbor_${neighbor_index}.fa' &&
#end for

makeFurDb
    -t 'work/targets'
    -n 'work/neighbors'
    -d 'work/fur.db'
    -T "\${GALAXY_SLOTS:-1}"
    2> 'out/summary.txt' &&

fur -d 'work/fur.db'
    -w '$analysis.window_length'
    -q '$analysis.quantile'
    -e '$analysis.evalue'
    -n '$analysis.min_region_length'
    -t "\${GALAXY_SLOTS:-1}"
    $analysis.exact_matches_only
    $analysis.megablast_only
    $analysis.mask_neighbors
> 'out/unique_regions.fasta' 2>> 'out/summary.txt'
    ]]></command>
    <inputs>
        <param argument="-t" type="data" multiple="true" format="fasta" label="Target genome FASTA files" help="One or more target genomes used to discover shared unique regions."/>
        <param argument="-n" type="data" multiple="true" format="fasta" label="Neighbor genome FASTA files" help="One or more closely related neighbor genomes used for subtraction."/>
        <section name="analysis" title="Analysis options" expanded="true">
            <param argument="-w" name="window_length" type="integer" min="1" value="80" label="Sliding window length" help="Window size used in the first subtraction step."/>
            <param argument="-q" name="quantile" type="float" min="0" max="1" value="0.1" label="Quantile of match length distribution" help="Lower values are more stringent in the first subtraction step."/>
            <param argument="-e" name="evalue" type="float" min="0" value="1e-5" label="BLAST E-value" help="E-value threshold used in the final subtraction step."/>
            <param argument="-n" name="min_region_length" type="integer" min="1" value="100" label="Minimum region length" help="Only regions with at least this many nucleotides are reported."/>
            <param argument="-x" name="exact_matches_only" type="boolean" truevalue="-x" falsevalue="" checked="false" label="Keep only exact matches during target intersection"/>
            <param argument="-m" name="megablast_only" type="boolean" truevalue="-m" falsevalue="" checked="false" label="Use only megablast for the final subtraction"/>
            <param argument="-M" name="mask_neighbors" type="boolean" truevalue="-M" falsevalue="" checked="false" label="Enable masked BLAST database search" help="Recommended for mammalian genomes when masking information is available."/>
        </section>
    </inputs>
    <outputs>
        <data name="unique_regions" format="fasta" from_work_dir="out/unique_regions.fasta" label="${tool.name} on ${on_string}: unique regions"/>
        <data name="summary" format="txt" from_work_dir="out/summary.txt" label="${tool.name} on ${on_string}: run summary"/>
    </outputs>
    <tests>
        <test expect_num_outputs="2">
            <param name="t" value="target_0.fa,target_1.fa" ftype="fasta"/>
            <param name="n" value="neighbor_0.fa" ftype="fasta"/>
            <output name="unique_regions" file="exp_unique_regions_default.fasta"/>
            <output name="summary">
                <assert_contents>
                    <has_text text="Subtraction_2"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <param name="t" value="target_0.fa,target_1.fa" ftype="fasta"/>
            <param name="n" value="neighbor_0.fa" ftype="fasta"/>
            <section name="analysis">
                <param name="min_region_length" value="150"/>
            </section>
            <output name="unique_regions" file="exp_unique_regions_n150.fasta"/>
            <output name="summary">
                <assert_contents>
                    <has_text text="Intersection           0"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help format="markdown"><![CDATA[
`Fur` discovers genomic regions that are shared by target genomes but absent from neighbor genomes.

This wrapper runs the full Fur workflow in one step:

1. Build a temporary Fur database with `makeFurDb` from target and neighbor FASTA collections.
2. Run `fur` on that database to report unique regions.

### Inputs

- **Target genome FASTA files**: one or more genomes that define the group of interest.
- **Neighbor genome FASTA files**: one or more related genomes that should not contain the marker.

### Outputs

- **Unique regions**: FASTA file with candidate marker regions.
- **Run summary**: textual report from `makeFurDb` and `fur`, including per-step counts.

### Notes
- The temporary Fur database is created inside the job working directory and is not retained as an output.
    ]]></help>
    <expand macro="citations"/>
</tool>