Mercurial > repos > iuc > ucsc_fatovcf
view fatovcf.xml @ 3:18c9f51a37a1 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ucsc_tools/fatovcf commit cf03c3f6b7afb8e673cc5ced6178d4f6f96a9116
| author | iuc |
|---|---|
| date | Tue, 03 Dec 2024 09:45:26 +0000 |
| parents | d49aa685f7f6 |
| children |
line wrap: on
line source
<tool id="fatovcf" name="faToVcf" version="@TOOL_VERSION@+galaxy0" profile="21.05" license="MIT"> <description> Convert a FASTA alignment file to Variant Call Format (VCF) single-nucleotide diffs </description> <macros> <token name="@TOOL_VERSION@">473</token> </macros> <xrefs> <xref type="bio.tools">UCSC_Genome_Browser_Utilities</xref> </xrefs> <requirements> <requirement type="package" version="@TOOL_VERSION@">ucsc-fatovcf</requirement> </requirements> <version_command><![CDATA[ echo "@TOOL_VERSION@" ]]></version_command> <command detect_errors="exit_code"><![CDATA[ #if $in_fasta ln -s '$in_fasta' in.fa && #end if faToVcf in.fa '$out' #if $ref_seq.refSeq == "customRef" -ref=$ref_seq.ref #end if $ambiguous #if $excl_seq.excludeFile -excludeFile='$excl_seq.excludeFile' #end if -maxDiff=$excl_seq.maxDiff #if $mask_sites.maskSites -maskSites='$mask_sites.maskSites' #end if #if $mask_sites.windowSize > 0 -windowSize=$mask_sites.windowSize -minAmbigInWindow=$mask_sites.minAmbigInWindow #end if $includeNoAltN -minAc=$minAc -minAf=$minAf #if $output.startOffset > 0 -startOffset=$output.startOffset #end if $output.includeRef $output.noGenotypes #if $output.vcfChrom -vcfChrom='$output.vcfChrom' #end if ]]></command> <inputs> <param name="in_fasta" format="fasta" type="data" label="FASTA Alignment" help="Must contain a series of sequences with different names and the same length. Both N and - are treated as missing information." /> <conditional name="ref_seq"> <param name="refSeq" type="select" label="Determine reference sequence" help="Which sequence from the FASTA file should be used as the reference sequence."> <option value="" selected="true">Use the first sequence as reference</option> <option value="customRef">Use a different sequence as reference</option> </param> <when value="customRef"> <param argument="-ref" type="text" label="Name of sequence that should be used as reference sequence:" help="Must be present in the FASTA file." /> </when> <when value="" /> </conditional> <param name="ambiguous" type="select" label="Treat ambiguous bases" help="If 1: Treat ambiguous bases as N, ambiguous bases (N, R, V etc.) are treated as N (no call). If 2: Resolve ambiguous characters, if the character represents two bases and one is the reference base, convert it to the non-reference base. Otherwise convert it to N. Default: 0: Don't treat ambiguous bases"> <option value="" selected="true">0: Don't treat ambiguous bases</option> <option value="-ambiguousToN">1: Treat ambiguous bases as N (no call)</option> <option value="-resolveAmbiguous">2: Resolve ambiguous characters (convert)</option> </param> <section name="excl_seq" title="Exclude sequences" expanded="true"> <param argument="-excludeFile" format="txt" type="data" optional="true" label="Exclude sequences from text file" help="Exclude sequences named in file which has one sequence name per line." /> <param argument="-maxDiff" type="integer" min="0" value="0" label="Maximum number of mismatches" help="Exclude sequences with more than N mismatches with the reference sequence. If -windowSize is used, sequences are masked accordingly before the mismatches are counted. Default: 0" /> </section> <section name="mask_sites" title="Mask sites" expanded="true"> <param argument="-maskSites" format="vcf" type="data" optional="true" label="Mask sites at given positions (VCF file)" help="Exclude variants in positions recommended for masking in file. Typically https://github.com/W-L/ProblematicSites_SARS-CoV2/raw/master/problematic_sites_sarsCov2.vcf" /> <param argument="-windowSize" type="integer" min="0" value="0" label="Window size to mask bases" help="Mask any base for which there are at least -minAmbigWindow bases in a window of +-N bases around the base. Masking approach adapted from https://github.com/roblanf/sarscov2phylo/blob/master/scripts/mask_seq.py Use -windowSize=7 for same results. Default: 0" /> <param argument="-minAmbigInWindow" type="integer" min="1" value="2" label="Minimum of ambiguous characters within the window given above" help="When -windowSize is provided, mask any base for which there are at least this many N, ambiguous or gap characters within the window. Default: 2" /> </section> <param argument="-includeNoAltN" type="boolean" truevalue="-includeNoAltN" falsevalue="" label="Include positions without defined ALT allele" help="Include base positions with no alternate alleles observed, but at least one N (missing base/no-call). Default: false" /> <param argument="-minAc" type="integer" min="0" value="0" label="Minimum allele count" help="Ignore alternate alleles observed fewer than N times. Default: 0" /> <param argument="-minAf" type="float" min="0.0" max="1.0" value="0.0" label="Minimum allele frequency" help="Ignore alternate alleles observed in less than F of non-N bases. Default: 0.0" /> <section name="output" title="Output VCF options" expanded="true"> <param argument="-startOffset" type="integer" min="0" value="0" label="Start offset" help="Add N bases to each position, e.g. for trimmed alignments. Default: 0" /> <param argument="-includeRef" type="boolean" truevalue="-includeRef" falsevalue="" label="Include the reference in the genotype columns" help="Default: omitted as redundant (false)" /> <param argument="-noGenotypes" type="boolean" truevalue="-noGenotypes" falsevalue="" label="Output 8-column VCF" help="VCF without the sample genotype columns. Default: false" /> <param argument="-vcfChrom" type="text" optional="true" label="Use this sequence for the CHROM column in the VCF" help="Default: name of the reference sequence." /> </section> </inputs> <outputs> <data name="out" format="vcf" /> </outputs> <tests> <test expect_num_outputs="1"> <!-- default params --> <param name="in_fasta" value="input.fa" /> <output name="out" ftype="vcf" file="out1.vcf" lines_diff="4" /> </test> <test expect_num_outputs="1"> <!-- set a value for every input parameter--> <param name="in_fasta" value="input.fa" /> <conditional name="ref_seq"> <param name="refSeq" value="customRef" /> <param name="ref" value="sample3" /> </conditional> <param name="ambiguous" value="-ambiguousToN" /> <section name="excl_seq"> <param name="excludeFile" value="excl.txt" /> <param name="maxDiff" value="3" /> </section> <section name="mask_sites"> <param name="maskSites" value="mask.vcf" /> <param name="windowSize" value="7" /> <param name="minAmbigInWindow" value="3" /> </section> <param name="includeNoAltN" value="true" /> <param name="minAc" value="1" /> <param name="minAf" value="0.1" /> <section name="output"> <param name="startOffset" value="1" /> <param name="includeRef" value="true" /> <param name="noGenotypes" value="true" /> <param name="vcfChrom" value="sample1" /> </section> <output name="out" ftype="vcf" file="out2.vcf" lines_diff="4" /> </test> </tests> <help><![CDATA[ **What it does** `faToVcf`_ is a tool to extract a VCF from a multi-sequence FASTA alignment. For implementation details see faToVcf's `source code`_. .. _faToVcf: http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/FOOTER.txt .. _source code: https://github.com/ucscGenomeBrowser/kent/blob/master/src/hg/utils/faToVcf/faToVcf.c ]]> </help> <citations> <citation type="doi">10.1093/bib/bbs038</citation> </citations> </tool>
