Mercurial > repos > iuc > evidencemodeler

<tool id="evidencemodeler" name="EVidenceModeler" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="21.05">
    <description>combines ab intio gene predictions, protein and transcript alignments into gene structures</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <xrefs>
        <xref type="bio.tools">EvidenceModeler</xref>
    </xrefs>
    <expand macro="requirements"/>

    <command detect_errors="exit_code"><![CDATA[

    ln -s '$input_genome' ./input_genome.fasta &&
    ln -s '$input_predictions' ./input_predictions.gff &&
    ln -s '$input_weights' ./input_weights.txt &&
    ln -s '$input_proteins' ./input_proteins.gff &&
    ln -s '$input_transcript' ./input_transcript.gff &&

    EVidenceModeler
    --sample_id galaxy
    --genome './input_genome.fasta'
    --gene_predictions './input_predictions.gff'
    --weights './input_weights.txt'
    --protein_alignments './input_proteins.gff'
    --segmentSize $segmentsize
    --overlapSize $overlapsize

    #if $input_transcript:
        --transcript_alignments './input_transcript.gff'
    #end if

    #if $opt.input_repeat:
        --repeats '$opt.input_repeat'
    #end if

    #if $opt.input_terminalexon:
        --terminalExons '$opt.input_terminalexon'
    #end if

    --stop_codons $opt.stop_codon
    --min_intron_length $opt.min_intron_length
    --search_long_introns $opt.search_long_introns
    --re_search_intergenic $opt.re_search_intergenic
    --terminal_intergenic_re_search $opt.terminal_intergenic_re_search

    ]]></command>

    <inputs>
        <param name="input_genome" type="data" format="fasta" label="Genome input"/>
        <param name="input_predictions" type="data" format="gff3" label="Gene predictions input"/>
        <param name="input_weights" type="data" format="gff3" label="Weights for evidence types file" help="See documentation for formatting: 'Weights' section"/>
        <param name="input_proteins" type="data" format="gff3" label="Protein alignments input" help="Optional but recommended"/>
        <param name="input_transcript" type="data" optional="true" format="gff3" label="Transcript alignments input" help="Optional but recommended"/>
        <param argument="--segmentSize" name="segmentsize" value="100000" type="integer" label="Length of a single sequence" help="This value must be less than 1 MB" />
        <param argument="--overlapSize" name="overlapsize" value="10000" type="integer" label="Length of sequence overlap between segmented sequences" help="The length must be at least equivalent to one or two expected gene lengths" />
        <section name="opt" title="Advanced option" expanded="false">
            <param name="input_repeat" type="data" optional="true" format="gff3" label="Masked genome repeats"/>
            <param name="input_terminalexon" type="data" optional="true" format="gff3" label="Additional file of terminal exons to be taken into account" help="From long-orfs PASA"/>
            <param name="stop_codon" argument="--stop_codons" type="select" multiple="true" optional="true" label="List of stop codon" help="For Tetrahymena, set TGA">
                <option value="TAA,TGA,TAG" selected="true">TAA,TGA,TAG</option>
                <option value="TAA">TAA</option>
                <option value="TGA">TGA</option>
                <option value="TAG">TAG</option>
            </param>
            <param argument="--min_intron_length" type="integer" value="20" label="Minimum length for an intron" help="Default 20 bp" />
            <param argument="--search_long_introns" type="select" label="Reexamine long introns" help="Can find nested genes, but also can result in false positives">
                <option value="0" selected="true">Off</option>
                <option value="1">On</option>
            </param>
            <param argument="--re_search_intergenic" type="select" label="Reexamines intergenic regions of minimum length">
                <option value="0" selected="true">Off</option>
                <option value="1">On</option>
            </param>
            <param argument="--terminal_intergenic_re_search" type="select" label="Reexamines intergenic regions of minimum length">
                <option value="0" selected="true">Off</option>
                <option value="1">On</option>
            </param>
        </section>
    </inputs>

    <outputs>
        <data name='evm_gff' format='gff' label="${tool.name} on ${on_string}: GFF3" from_work_dir="galaxy.EVM.gff3"/>
        <data name='evm_pep' format='fasta' label="${tool.name} on ${on_string}: PEP" from_work_dir="galaxy.EVM.pep"/>
    </outputs>

    <tests>
        <test expect_num_outputs="2">
            <param name="input_genome" value="genome.fasta"/>
            <param name="input_predictions" value="gene_predictions.gff3"/>
            <param name="input_weights" value="weights.txt"/>
            <param name="input_proteins" value="protein_alignments.gff3"/>
            <param name="input_transcript" value="transcript_alignments.gff3"/>
            <param name="segmentsize" value="100000"/>
            <param name="overlapsize" value="10000"/>
            <conditional name="opt">
                <param name="adv" value="true"/>
                <param name="min_intron_length" value="20"/>
                <param name="search_long_introns" value="0"/>
                <param name="re_search_intergenic" value="0"/>
                <param name="terminal_intergenic_re_search" value="0"/>
            </conditional>
            <output name="evm_pep" ftype="fasta">
                <assert_contents>
                    <has_text text="evm.model.Contig1.3 evm.TU.Contig1.3  EVM prediction Contig1.3 Contig1:7611-9749(-)"/>
                    <has_text text="evm.model.Contig1.10 evm.TU.Contig1.10  EVM prediction Contig1.10 Contig1:57371-59941(+)"/>
                    <has_n_lines n="108" delta="0"/>
                    <has_n_columns n="1" delta="0"/>
                </assert_contents>
            </output>
            <output name="evm_gff" ftype="gff">
                <assert_contents>
                    <has_text text="ID=evm.TU.Contig1.1;Name=EVM%20prediction%20Contig1.1"/>
                    <has_text text="ID=evm.TU.Contig1.4;Name=EVM%20prediction%20Contig1.4"/>
                    <has_n_lines n="191" delta="0"/>
                    <has_n_columns n="9" delta="0"/>
                    <!-- the sep=";" is used to count the gff properties -->
                    <has_n_columns n="2" delta="0" sep=";"/>
                </assert_contents>
            </output>
        </test>
    </tests>

    <help><![CDATA[
        EvidenceModeler_: EVidenceModeler (aka EVM) is a software package that combines ab intio
        gene predictions and protein and transcript alignments into weighted consensus gene structures.
        EVM provides a flexible and intuitive framework for combining various types of evidence into a
        single automated gene structure annotation system.

        .. _EvidenceModeler: https://github.com/EVidenceModeler/EVidenceModeler.github.io
    ]]></help>
    <expand macro="citation"></expand>
</tool>