view fatovcf.xml @ 3:18c9f51a37a1 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ucsc_tools/fatovcf commit cf03c3f6b7afb8e673cc5ced6178d4f6f96a9116
author iuc
date Tue, 03 Dec 2024 09:45:26 +0000
parents d49aa685f7f6
children
line wrap: on
line source

<tool id="fatovcf" name="faToVcf" version="@TOOL_VERSION@+galaxy0" profile="21.05" license="MIT">
    <description>
        Convert a FASTA alignment file to Variant Call Format (VCF) single-nucleotide diffs
    </description>
    <macros>
        <token name="@TOOL_VERSION@">473</token>
    </macros>
    <xrefs>
        <xref type="bio.tools">UCSC_Genome_Browser_Utilities</xref>
    </xrefs>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">ucsc-fatovcf</requirement>
    </requirements>
    <version_command><![CDATA[ echo "@TOOL_VERSION@" ]]></version_command>
    <command detect_errors="exit_code"><![CDATA[
    #if $in_fasta
        ln -s '$in_fasta' in.fa &&
    #end if
    faToVcf
        in.fa
        '$out'
        #if $ref_seq.refSeq == "customRef"
            -ref=$ref_seq.ref
        #end if
        $ambiguous
        #if $excl_seq.excludeFile
            -excludeFile='$excl_seq.excludeFile'
        #end if
        -maxDiff=$excl_seq.maxDiff
        #if $mask_sites.maskSites
            -maskSites='$mask_sites.maskSites'
        #end if
        #if $mask_sites.windowSize > 0
            -windowSize=$mask_sites.windowSize
            -minAmbigInWindow=$mask_sites.minAmbigInWindow
        #end if
        $includeNoAltN
        -minAc=$minAc
        -minAf=$minAf
        #if $output.startOffset > 0
            -startOffset=$output.startOffset
        #end if
        $output.includeRef
        $output.noGenotypes
        #if $output.vcfChrom
            -vcfChrom='$output.vcfChrom'
        #end if
    ]]></command>
    <inputs>
        <param name="in_fasta" format="fasta" type="data" label="FASTA Alignment" help="Must contain a series of sequences with different names and the same length. Both N and - are treated as missing information." />

        <conditional name="ref_seq">
            <param name="refSeq" type="select" label="Determine reference sequence" help="Which sequence from the FASTA file should be used as the reference sequence.">
                <option value="" selected="true">Use the first sequence as reference</option>
                <option value="customRef">Use a different sequence as reference</option>
            </param>
            <when value="customRef">
                <param argument="-ref" type="text" label="Name of sequence that should be used as reference sequence:" help="Must be present in the FASTA file." />
            </when>
            <when value="" />
        </conditional>

        <param name="ambiguous" type="select" label="Treat ambiguous bases" help="If 1: Treat ambiguous bases as N, ambiguous bases (N, R, V etc.) are treated as N (no call). If 2: Resolve ambiguous characters, if the character represents two bases and one is the reference base, convert it to the non-reference base. Otherwise convert it to N. Default: 0: Don't treat ambiguous bases">
            <option value="" selected="true">0: Don't treat ambiguous bases</option>
            <option value="-ambiguousToN">1: Treat ambiguous bases as N (no call)</option>
            <option value="-resolveAmbiguous">2: Resolve ambiguous characters (convert)</option>
        </param>

        <section name="excl_seq" title="Exclude sequences" expanded="true">
            <param argument="-excludeFile" format="txt" type="data" optional="true" label="Exclude sequences from text file" help="Exclude sequences named in file which has one sequence name per line." />
            <param argument="-maxDiff" type="integer" min="0" value="0" label="Maximum number of mismatches" help="Exclude sequences with more than N mismatches with the reference sequence. If -windowSize is used, sequences are masked accordingly before the mismatches are counted. Default: 0" />
        </section>

        <section name="mask_sites" title="Mask sites" expanded="true">
            <param argument="-maskSites" format="vcf" type="data" optional="true" label="Mask sites at given positions (VCF file)" help="Exclude variants in positions recommended for masking in file. Typically https://github.com/W-L/ProblematicSites_SARS-CoV2/raw/master/problematic_sites_sarsCov2.vcf" />
            <param argument="-windowSize" type="integer" min="0" value="0" label="Window size to mask bases" help="Mask any base for which there are at least -minAmbigWindow bases in a window of +-N bases around the base. Masking approach adapted from https://github.com/roblanf/sarscov2phylo/blob/master/scripts/mask_seq.py Use -windowSize=7 for same results. Default: 0" />
            <param argument="-minAmbigInWindow" type="integer" min="1" value="2" label="Minimum of ambiguous characters within the window given above" help="When -windowSize is provided, mask any base for which there are at least this many N, ambiguous or gap characters within the window. Default: 2" />
        </section>

        <param argument="-includeNoAltN" type="boolean" truevalue="-includeNoAltN" falsevalue="" label="Include positions without defined ALT allele" help="Include base positions with no alternate alleles observed, but at least one N (missing base/no-call). Default: false" />
        <param argument="-minAc" type="integer" min="0" value="0" label="Minimum allele count" help="Ignore alternate alleles observed fewer than N times. Default: 0" />
        <param argument="-minAf" type="float" min="0.0" max="1.0" value="0.0" label="Minimum allele frequency" help="Ignore alternate alleles observed in less than F of non-N bases. Default: 0.0" />

        <section name="output" title="Output VCF options" expanded="true">
            <param argument="-startOffset" type="integer" min="0" value="0" label="Start offset" help="Add N bases to each position, e.g. for trimmed alignments. Default: 0" />
            <param argument="-includeRef" type="boolean" truevalue="-includeRef" falsevalue="" label="Include the reference in the genotype columns" help="Default: omitted as redundant (false)" />
            <param argument="-noGenotypes" type="boolean" truevalue="-noGenotypes" falsevalue="" label="Output 8-column VCF" help="VCF without the sample genotype columns. Default: false" />
            <param argument="-vcfChrom" type="text" optional="true" label="Use this sequence for the CHROM column in the VCF" help="Default: name of the reference sequence." />
        </section>
    </inputs>
    <outputs>
        <data name="out" format="vcf" />
    </outputs>
    <tests>
        <test expect_num_outputs="1"> <!-- default params -->
            <param name="in_fasta" value="input.fa" />
            <output name="out" ftype="vcf" file="out1.vcf" lines_diff="4" />
        </test>
        <test expect_num_outputs="1"> <!-- set a value for every input parameter-->
            <param name="in_fasta" value="input.fa" />

            <conditional name="ref_seq">
                <param name="refSeq" value="customRef" />
                <param name="ref" value="sample3" />
            </conditional>

            <param name="ambiguous" value="-ambiguousToN" />

            <section name="excl_seq">
                <param name="excludeFile" value="excl.txt" />
                <param name="maxDiff" value="3" />
            </section>

            <section name="mask_sites">
                <param name="maskSites" value="mask.vcf" />
                <param name="windowSize" value="7" />
                <param name="minAmbigInWindow" value="3" />
            </section>

            <param name="includeNoAltN" value="true" />
            <param name="minAc" value="1" />
            <param name="minAf" value="0.1" />

            <section name="output">
                <param name="startOffset" value="1" />
                <param name="includeRef" value="true" />
                <param name="noGenotypes" value="true" />
                <param name="vcfChrom" value="sample1" />
            </section>

            <output name="out" ftype="vcf" file="out2.vcf" lines_diff="4" />
        </test>
    </tests>
    <help><![CDATA[
**What it does**

`faToVcf`_ is a tool to extract a VCF from a multi-sequence FASTA alignment.

For implementation details see faToVcf's `source code`_.

.. _faToVcf: http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/FOOTER.txt
.. _source code: https://github.com/ucscGenomeBrowser/kent/blob/master/src/hg/utils/faToVcf/faToVcf.c

    ]]>    </help>
    <citations>
        <citation type="doi">10.1093/bib/bbs038</citation>
    </citations>
</tool>