Mercurial > repos > iuc > bcftools_csq

<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Haplotype aware consequence predictor</description>
    <macros>
        <token name="@EXECUTABLE@">csq</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements">
        <expand macro="samtools_requirement"/>
    </expand>
    <expand macro="version_command"/>
    <command detect_errors="aggressive"><![CDATA[
@PREPARE_INPUT_FILE@
#set $section = $sec_required.reference_source
@PREPARE_FASTA_REF@

#set $section = $sec_restrict
@PREPARE_TARGETS_FILE@
@PREPARE_REGIONS_FILE@

bcftools @EXECUTABLE@

## csq required inputs section
#set $section = $sec_required.reference_source
@FASTA_REF@
--gff-annot '$sec_required.gff_annot'

## csq options section
#if str($sec_csq_opts.ncsq):
    --ncsq $sec_csq_opts.ncsq
#end if
$sec_csq_opts.local_csq
#if $sec_csq_opts.phase:
    --phase $sec_csq_opts.phase
#end if
#if str($sec_csq_opts.custom_tag):
    --custom-tag '$sec_csq_opts.custom_tag'
#end if
#if str($sec_csq_opts.trim_protein_seq)
    --trim-protein-seq $sec_csq_opts.trim_protein_seq
#end if
#if $sec_csq_opts.genetic_code
    --genetic-code '$sec_csq_opts.genetic_code'
#end if

## Subset section
#set $section = $sec_subset
@SAMPLES@

## Filter section
#set $section = $sec_restrict
@INCLUDE@
@EXCLUDE@
@REGIONS@
@TARGETS@

@OUTPUT_TYPE@

## Primary Input/Outputs
@INPUT_FILE@
> '$output_file'

]]>
    </command>
    <inputs>
        <expand macro="macro_input"/>
        <section name="sec_required" expanded="true" title="Required References">
            <expand macro="macro_fasta_ref"/>
            <param name="gff_annot" type="data" format="gff3" label="GFF3 annotation file" help="From Ensembl:  ftp://ftp.ensembl.org/pub/current_gff3/"/>
        </section>
        <section name="sec_csq_opts" expanded="true" title="CSQ Options">
            <param name="ncsq" type="integer" value="16" min="1" max="50" label="maximum number of consequences to consider per site" help="-ncsq 16"/>
            <param name="local_csq" type="boolean" truevalue="--local-csq" falsevalue="" checked="false" label="run localized predictions considering only one VCF record at a time" help="--local-csq switch off haplotype-aware calling, run localized predictions considering only one VCF record at a time"/>
            <param name="phase" type="select" optional="true" label="phase" help="how to construct haplotypes and how to deal with unphased data">
                <option value="a">take GTs as is, create haplotypes regardless of phase (0/1 -&gt; 0|1)</option>
                <option value="m">merge *all* GTs into a single haplotype (0/1 -&gt; 1, 1/2 -&gt; 1)</option>
                <option value="r">require phased GTs, throw an error on unphased het GTs</option>
                <option value="R">create non-reference haplotypes if possible (0/1 -&gt; 1|1, 1/2 -&gt; 1|2)</option>
                <option value="s">skip unphased GTs</option>
            </param>
            <param name="custom_tag" type="text" value="" optional="true" label="use this custom tag to store consequences rather than the default BCSQ tag">
                <validator type="regex" message="">^(\w+)?$</validator>
            </param>
            <param argument="--trim-protein-seq" type="integer" min="0" value="" optional="true" label="Abbreviate protein-changing predictions" help="Abbreviate protein-changing predictions to max INT aminoacids"/>
            <param argument="--genetic-code" type="select" optional="true" label="Genetic code table" help="Choose a predefined genetic code table used to translate coding sequences. Leave unset for the default bcftools behaviour.">
                <option value="0">0: Standard simplified</option>
                <option value="1">1: Standard</option>
                <option value="2">2: Vertebrate mitochondrial</option>
                <option value="3">3: Yeast mitochondrial</option>
                <option value="4">4: Mold, Protozoan, Coelenterate mitochondrial; Mycoplasma; Spiroplasma</option>
                <option value="5">5: Invertebrate mitochondrial</option>
                <option value="6">6: Ciliate, Dasycladacean and Hexamita nuclear</option>
                <option value="9">9: Echinoderm and Flatworm mitochondrial</option>
                <option value="10">10: Euplotid nuclear</option>
                <option value="11">11: Bacterial, Archaeal and Plant plastid</option>
                <option value="12">12: Alternative yeast nuclear</option>
                <option value="13">13: Ascidian mitochondrial</option>
                <option value="14">14: Alternative flatworm mitochondrial</option>
                <option value="15">15: Blepharisma nuclear</option>
                <option value="16">16: Chlorophycean mitochondrial</option>
                <option value="21">21: Trematode mitochondrial</option>
                <option value="22">22: Scenedesmus obliquus mitochondrial</option>
                <option value="23">23: Thraustochytrium mitochondrial</option>
                <option value="24">24: Pterobranchia mitochondrial</option>
                <option value="25">25: Candidate Division SR1 and Gracilibacteria</option>
                <option value="26">26: Pachysolen tannophilus nuclear</option>
                <option value="27">27: Karyorelict nuclear</option>
                <option value="28">28: Condylostoma nuclear</option>
                <option value="29">29: Mesodinium nuclear</option>
                <option value="30">30: Peritrich nuclear</option>
                <option value="31">31: Blastocrithidia nuclear</option>
                <option value="33">33: Cephalodiscidae mitochondrial UAA-Tyr</option>
            </param>
        </section>
        <section name="sec_restrict" expanded="false" title="Restrict to">
            <expand macro="macro_region_restrict"/>
            <expand macro="macro_target_restrict"/>
            <expand macro="macro_include"/>
            <expand macro="macro_exclude"/>
        </section>
        <section name="sec_subset" expanded="false" title="Subset Options">
            <expand macro="macro_samples"/>
        </section>
        <expand macro="macro_select_output_type"/>
    </inputs>
    <outputs>
        <expand macro="macro_vcf_output"/>
    </outputs>
    <tests>
        <test>
            <param name="input_file" ftype="vcf" value="csq.vcf"/>
            <section name="sec_required">
                <expand macro="test_using_reference" ref="csq.fa"/>
            </section>
            <section name="sec_required">
                <param name="gff_annot" ftype="gff3" value="csq.gff3"/>
            </section>
            <param name="output_type" value="v"/>
            <output name="output_file">
                <assert_contents>
                    <has_text text="BCSQ"/>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="csq.vcf"/>
            <section name="sec_required">
                <expand macro="test_using_reference" select_from="cached" ref="csq"/>
            </section>
            <section name="sec_required">
                <param name="gff_annot" ftype="gff3" value="csq.gff3"/>
            </section>
            <param name="output_type" value="v"/>
            <output name="output_file">
                <assert_contents>
                    <has_text text="BCSQ"/>
                </assert_contents>
            </output>
        </test>
        <!-- Test protein seq prediction-->
        <test>
            <param name="input_file" ftype="vcf" value="csq.vcf"/>
            <section name="sec_required">
                <expand macro="test_using_reference" select_from="cached" ref="csq"/>
            </section>
            <section name="sec_required">
                <param name="gff_annot" ftype="gff3" value="csq.gff3"/>
            </section>
            <param name="output_type" value="v"/>
            <section name="sec_csq_opts">
                <param name="trim_protein_seq" value="10"/>
                <param name="genetic_code" value="1"/>
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_text text="BCSQ"/>
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--trim-protein-seq"/>
                <has_text text="--genetic-code '1'"/>
            </assert_command>
        </test>
        <!-- Test region overlap-->
        <test>
            <param name="input_file" ftype="vcf" value="csq.vcf"/>
            <section name="sec_required">
                <expand macro="test_using_reference" select_from="cached" ref="csq"/>
            </section>
            <section name="sec_required">
                <param name="gff_annot" ftype="gff3" value="csq.gff3"/>
            </section>
            <param name="output_type" value="v"/>
            <section name="sec_restrict">
                <conditional name="regions">
                    <param name="regions_src" value="regions" />
                    <repeat name="region_specs">
                        <param name="chrom" value="1" />
                    </repeat>
                </conditional>
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_text text="BCSQ"/>
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--regions-overlap"/>
            </assert_command>
        </test>
    </tests>
    <help><![CDATA[
=====================================
 bcftools @EXECUTABLE@
=====================================

Haplotype aware consequence predictor which correctly handles combined variants such as MNPs split over
multiple VCF records, SNPs separated by an intron (but adjacent in the spliced transcript) or nearby
frame-shifting indels which in combination in fact are not frame-shifting.

The output VCF is annotated with INFO/BCSQ and FORMAT/BCSQ tag (configurable with the -c option).
The latter is a bitmask of indexes to INFO/BCSQ, with interleaved haplotypes. See the usage examples
below for using the %TBCSQ converter in query for extracting a more human readable form from this bitmask.
The contruction of the bitmask limits the number of consequences that can be referenced in the FORMAT/BCSQ tags.
By default this is 16, but if more are required, see the --ncsq option.

The program requires on input a VCF/BCF file, the reference genome in fasta format (--fasta-ref)
and genomic features in the GFF3 format downloadable from the Ensembl website (--gff-annot),
and outputs an annotated VCF/BCF file. Currently, only Ensembl GFF3 files are supported.

By default, the input VCF should be phased. If phase is unknown, or only partially known,
the --phase option can be used to indicate how to handle unphased data. Alternatively,
haplotype aware calling can be turned off with the --local-csq option.

If conflicting (overlapping) variants within one haplotype are detected, a warning will
be emitted and predictions will be based on only the first variant in the analysis.

Symbolic alleles are not supported. They will remain unannotated in the output VCF and are
ignored for the prediction analysis.


@REGIONS_HELP@
@TARGETS_HELP@

@BCFTOOLS_MANPAGE@#@EXECUTABLE@

@BCFTOOLS_HOWTOS@
]]>
    </help>
    <expand macro="citations"/>
</tool>
author	iuc
date	Tue, 02 Dec 2025 07:49:22 +0000
parents	10da4a86ee5c
children