view bcftools_consensus.xml @ 26:62ed6ee05b6f draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/bcftools commit f6efda26965eb73c9107d367fd5ffdf246ed0dbc
author iuc
date Tue, 02 Dec 2025 07:57:56 +0000
parents 5970245e8525
children
line wrap: on
line source

<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Create consensus sequence by applying VCF variants to a reference fasta file</description>
    <macros>
        <token name="@EXECUTABLE@">consensus</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements">
        <expand macro="samtools_requirement"/>
        <requirement type="package" version="5.3.0">gawk</requirement>
    </expand>
    <expand macro="version_command"/>
    <command detect_errors="aggressive"><![CDATA[
@PREPARE_INPUT_FILE@
#set $section = $reference_source
@PREPARE_FASTA_REF@

#set $section = $sec_restrict
#if $section.regions.regions_src != '__none__':
  samtools faidx
  #if $section.regions.regions_src == 'regions':
    #set $intervals = $section.regions.region_specs
    @PARSE_INTERVALS@
    #set $ref_regions_spec = " ".join("'" + c + "'" for c in $components)
    '$input_fa_ref' $ref_regions_spec |
  #else if $section.regions.regions_src == 'regions_file':
    -r '$section.regions.regions_file' '$input_fa_ref' |
  #end if
  #set $input_fa_ref = "-"
#end if

bcftools @EXECUTABLE@

#if $section.regions.regions_src != '__none__':
  --regions-overlap $section.regions_overlap
#end if

#set $section = $reference_source
@FASTA_REF@

$mode.select
#if not str($mode.select):
  $mode.specify_samples.how
  #if str($mode.specify_samples.how) in ["-s", "-S"]:
    '${mode.specify_samples.invert_samples}${mode.specify_samples.samples_spec}'
  #end if
#else if str($mode.select) == "-H":
  #if str(mode.haplotype.rule) in ["", "pIu"]:
    #set haplotype_option = str(mode.haplotype.allele_n) + str(mode.haplotype.rule)
  #else:
    #set haplotype_option = str(mode.haplotype.rule)
  #end if
  $haplotype_option
  #if $mode.sample:
    --sample '$mode.sample'
  #end if
#end if

#set $masks = []
#for $m in $masking_options.mask:
  #if $m:
    #silent $masks.append($m)
  #end if
#end for

#if $masks:
  #if $masking_options.mask_with:
    #set $masking_instructions = []
    #for $i in str($masking_options.mask_with).split(",", maxsplit=len($masks)-1):
      #silent $masking_instructions.append("--mask-with '" + $i + "'")
    #end for
  #else:
    #set $masking_instructions = [""]
  #end if
  #if len($masking_instructions) == 1:
    ## use same masking instruction for all masks
    #set $masking_instructions = [""] * (len($masks) - 1) + $masking_instructions
  #else if len($masking_instructions) < len($masks):
    ## fill in empty masking instructions for missing ones, which will make bcftools complain about them
    #silent $masking_instructions.extend(["--mask-with ''"] * (len($masks) - len($masking_instructions)))
  #end if

  #for $m, $i in zip($masks, $masking_instructions):
    --mask '$m' $i
  #end for
#end if

#if $masking_options.absent:
--absent '$masking_options.absent'
#end if

#if $masking_options.mark_del:
  --mark-del '$masking_options.mark_del'
#end if

$masking_options.insertions.mark_ins
#if str($masking_options.insertions.mark_ins) == "--mark-ins":
  '$masking_options.insertions.ins_custom'
#end if

$masking_options.snvs.mark_snv
#if str($masking_options.snvs.mark_snv) == "--mark-snv":
  '$masking_options.snvs.snv_custom'
#end if

#set $section = $sec_restrict
@INCLUDE@
@EXCLUDE@

#if $chain:
  --chain '$chain_file'
#end if

## Primary Input/Outputs
#if str($rename) == "no"
  --output '$output_file'
#end if
@INPUT_FILE@
#if str($rename) == "yes":
  #set basename=$input_file.element_identifier
  | awk 'BEGIN {i=1} {if (match($0, /^>/)) {if (i==1) {name="${basename}"} else {name=sprintf("%s-%d","${basename}",i);} print(gensub(/>[^ ]+( ?.*)/, ">" name "\\1", 1)); i=i+1;} else {print}}' > '$output_file'
#end if
]]>
    </command>
    <inputs>
        <expand macro="macro_input"/>
        <expand macro="macro_fasta_ref"/>
        <conditional name="mode">
            <param name="select" type="select" label="Consensus building mode; at each variant site ...">
                <option value="-s -">ignore any sample genotypes; incorporate first allele from ALT column (-s -)</option>
                <option value="-I -s -">ignore any sample genotypes; incorporate IUPAC code representing all alleles from REF/ALT coulmns (-I -s -)</option>
                <option value="" selected="true">incorporate IUPAC code representing the genotypes of all selected samples (default)</option>
                <option value="-H">incorporate specific haplotype allele of one selected sample</option>
            </param>
            <when value="-s -" />
            <when value="-I -s -" />
            <when value="">
                <expand macro="macro_samples_enhanced" />
            </when>
            <when value="-H">
                <expand macro="macro_sample" help="The name of the single sample alleles of which should get used for the consensus sequence. This field is optional only if your input VCF dataset specifies exactly one sample."/>
                <conditional name="haplotype">
                    <param name="rule" type="select" label="From the selected sample's genotype use ...'">
                        <option value="">the Nth allele (for both phased and unphased genotypes) (-H N)</option>
                        <option value="pIu">the Nth allele if the genotype is phased, the IUPAC code representing the genotype if it's unphased (-H NpIu)</option>
                        <option value="R">the ALT allele where the sample is homozygous, the REF allele otherwise (-H R)</option>
                        <option value="A">the ALT allele where the sample is homozygous or heterozygous (-H A)</option>
                        <option value="LR">the ALT allele where the sample is homozygous, the REF allele where it's heterozygous unless the ALT allele is longer (-H LR)</option>
                        <option value="LA">the ALT allele where the sample is homozygous, the ALT allele where it's heterozygous unless the REF allele is longer (-H LA)</option>
                        <option value="SR">the ALT allele where the sample is homozygous, the REF allele where it's heterozygous unless the ALT allele is shorter (-H SR</option>
                        <option value="SA">the ALT allele where the sample is homozygous, the ALT allele where it's heterozygous unless the REF allele is shorter (-H SA)</option>
                    </param>
                    <when value="">
                        <param name="allele_n" type="integer" min="1" value="1" label="where N is" />
                    </when>
                    <when value="pIu">
                        <param name="allele_n" type="integer" min="1" value="1" label="where N is" />
                    </when>
                    <when value="R" />
                    <when value="A" />
                    <when value="LR" />
                    <when value="LA" />
                    <when value="SR" />
                    <when value="SA" />
                </conditional>
            </when>
        </conditional>
        <section name="masking_options" expanded="false" title="Masking and marking options" help="The various options in this section are applied in the order they appear, i.e. 1) masking, 2) marking of absent sites, 3) SNV/indel marking.">
            <param argument="--mask" type="data" format="tabular" multiple="true" optional="true" label="Mask" help="Replace regions according to the next --mask-with option"/>
            <param argument="--mask-with" type="text" value="" optional="true" label="Mask with" help="Replace with CHAR (skips overlapping variants; default: N); use &quot;uc&quot; or &quot;lc&quot; to change to uppercase or lowercase, respectively. If you have provided more than one Mask dataset and you would like to apply a unique mask for the regions in each of them, then you can specify a comma-separated list of masking instructions (as many as mask datasets)." />
            <param argument="--absent" type="text" value="" optional="true" label="Mark absent" help="Replace reference bases at positions absent from the VCF input with a custom character.">
                <validator type="regex">^.$</validator>
            </param>
            <param argument="--mark-del" type="text" value="" optional="true" label="Mark deletions" help="Instead of removing the reference base at deleted positions, replace the base with a custom character.">
                <validator type="regex">^.$</validator>
            </param>
            <conditional name="insertions">
                <param argument="--mark-ins" type="select" label="Mark insertions" help="Highlight insertions in uppercase or lowercase, or by using a fixed character instead of inserted bases, leaving the rest as is">
                    <option value="">Do not mark insertions</option>
                    <option value="--mark-ins uc">Uppercase (uc)</option>
                    <option value="--mark-ins lc">Lowercase (lc)</option>
                    <option value="--mark-ins">Custom character</option>
                </param>
                <when value="" />
                <when value="--mark-ins uc" />
                <when value="--mark-ins lc" />
                <when value="--mark-ins">
                    <param name="ins_cutom" type="text" optional="false" label="Character to use instead of an inserted base">
                        <validator type="empty_field"/>
                    </param>
                </when>
            </conditional>
            <conditional name="snvs">
                <param argument="--mark-snv" type="select" label="Mark substitutions" help="Highlight substitutions in uppercase or lowercase, or by using a fixed character instead of substituted bases, leaving the rest as is">
                    <option value="">Do not mark substitutions</option>
                    <option value="--mark-snv uc">Uppercase (uc)</option>
                    <option value="--mark-snv lc">Lowercase (lc)</option>
                    <option value="--mark-snv">Custom character</option>
                </param>
                <when value="" />
                <when value="--mark-snv uc" />
                <when value="--mark-snv lc" />
                <when value="--mark-snv">
                    <param name="snv_custom" type="text" optional="false" label="Character to use instead of a subtituted base">
                        <validator type="empty_field"/>
                    </param>
                </when>
            </conditional>
        </section>
        <section name="sec_restrict" expanded="false" title="Restrict to">
            <expand macro="macro_include"/>
            <expand macro="macro_exclude"/>
            <expand macro="macro_region_restrict" label_select="Restrict consensus building to only specified regions of reference?"/>
        </section>
        <param name="chain" type="boolean" truevalue="yes" falsevalue="no" label="Write a chain file for liftover"/>
        <param name="rename" type="boolean" truevalue="yes" falsevalue="no" label="Set output FASTA ID from name of VCF"/>
    </inputs>
    <outputs>
        <data name="output_file" format="fasta" label="${tool.name} on ${on_string}: consensus fasta"/>
        <data name="chain_file" format="txt" label="${tool.name} on ${on_string}: chain">
            <filter>chain</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="2">
            <expand macro="test_using_reference" ref="consensus.fa"/>
            <param name="input_file" ftype="vcf" value="consensus.vcf"/>
            <section name="masking_options">
                <param name="mask" ftype="tabular" value="consensus.tab"/>
            </section>
            <param name="chain" value="true"/>
            <output name="output_file">
                <assert_contents>
                    <has_text text="NNNNNNNNNNNNNNNNNNNNNNNNNN"/>
                </assert_contents>
            </output>
            <output name="chain_file">
                <assert_contents>
                    <has_text text="chain 497 1 501 + 1 501 1 502 + 1 502 1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="2">
            <expand macro="test_using_reference" select_from="cached" ref="consensus"/>
            <param name="input_file" ftype="vcf" dbkey="?" value="consensus.vcf"/>
            <section name="masking_options">
                <param name="mask" ftype="tabular" value="consensus.tab"/>
            </section>
            <param name="chain" value="true"/>
            <output name="output_file">
                <assert_contents>
                    <has_text text="NNNNNNNNNNNNNNNNNNNNNNNNNN"/>
                </assert_contents>
            </output>
            <output name="chain_file">
                <assert_contents>
                    <has_text text="chain 497 1 501 + 1 501 1 502 + 1 502 1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <expand macro="test_using_reference" ref="consensus.fa"/>
            <param name="input_file" ftype="vcf" value="consensus.vcf"/>
            <section name="masking_options">
                <param name="mask" ftype="tabular" value="consensus.tab"/>
            </section>
            <param name="chain" value="false"/>
            <param name="rename" value="true"/>
            <output name="output_file">
                <assert_contents>
                    <has_text text="&gt;consensus.vcf"/>
                </assert_contents>
                <assert_contents>
                    <has_text text="&gt;consensus.vcf-2"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <expand macro="test_using_reference" ref="consensus.fa"/>
            <param name="input_file" ftype="vcf" value="consensus.vcf"/>
            <conditional name="mode">
                <param name="select" value="-s -"/>
            </conditional>
            <section name="sec_restrict">
                <param name="include" value="TYPE=&quot;snp&quot;"/>
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_text text="TACAAAATATGACATATCAAAAAGAACATAACCTACGTATCAACTAAAGTGGTTGTTTGA"/>
                </assert_contents>
            </output>
        </test>
        <!--Test absent option-->
        <test expect_num_outputs="1">
            <expand macro="test_using_reference" ref="consensus.fa"/>
            <param name="input_file" ftype="vcf" value="consensus.vcf"/>
            <conditional name="mode">
                <param name="select" value="-s -"/>
            </conditional>
            <section name="masking_options">
                <param name="absent" value="W"/>
            </section>
            <section name="sec_restrict">
                <param name="include" value="TYPE=&quot;snp&quot;"/>
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_text text="WWWAWAWWAWWWWWWWWCWWWWWWWW"/>
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--absent"/>
            </assert_command>
        </test>
        <test expect_num_outputs="1">
            <expand macro="test_using_reference" ref="consensus.fa"/>
            <param name="input_file" ftype="vcf" value="consensus.vcf"/>
            <conditional name="mode">
                <param name="select" value="-s -"/>
            </conditional>
            <section name="masking_options">
                <param name="mark_del" value="-"/>
                <conditional name="insertions">
                    <param name="mark_ins" value="--mark-ins uc"/>
                </conditional>
                <conditional name="snvs">
                    <param name="mark_snv" value="--mark-snv uc"/>
                </conditional>
            </section>
            <section name="sec_restrict">
                <param name="include" value="TYPE=&quot;snp&quot;"/>
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_text text="TACAAAATATGACATATCAAAAAGAACATAACCTACGTATCAACTAAAGTGGTTGTTTGA"/>
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--mark-del"/>
                <has_text text="--mark-ins"/>
                <has_text text="--mark-snv"/>
            </assert_command>
        </test>
        <test expect_num_outputs="1">
            <expand macro="test_using_reference" ref="csq.fa"/>
            <param name="input_file" ftype="vcf" value="csq.vcf"/>
            <section name="masking_options">
                <param name="absent" value="."/>
                <param name="mark_del" value="-"/>
            </section>
            <section name="sec_restrict">
                <conditional name="regions">
                    <param name="regions_src" value="regions"/>
                    <repeat name="region_specs">
                        <param name="chrom" value="1"/>
                        <param name="start" value="161"/>
                        <param name="stop" value="190"/>
                    </repeat>
                </conditional>
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_line line="&gt;1:161-190"/>
                    <has_line line="-............................Y"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <expand macro="test_using_reference" ref="csq.fa"/>
            <param name="input_file" ftype="vcf" value="csq.vcf"/>
            <section name="masking_options">
                <param name="absent" value="."/>
                <param name="mark_del" value="-"/>
            </section>
            <section name="sec_restrict">
                <conditional name="regions">
                    <param name="regions_src" value="regions"/>
                    <repeat name="region_specs">
                        <param name="chrom" value="1"/>
                        <param name="start" value="161"/>
                        <param name="stop" value="190"/>
                    </repeat>
                    <param name="regions_overlap" value="0"/>
                </conditional>
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_line line="&gt;1:161-190"/>
                    <has_line line=".............................Y"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
=====================================
 bcftools @EXECUTABLE@ plugin
=====================================


Create consensus sequence by applying VCF variants to a reference fasta file.

@BCFTOOLS_MANPAGE@#@EXECUTABLE@

@BCFTOOLS_HOWTOS@

The option to set the new consensus' FASTA ID from the name of the VCF is provided by post-processing
the bcftools consensus output. It is primarily intended for use when the VCF is coming from a list
collection where the elements of the list are named meaningfully (e.g. named after sample names). This
is useful when consensus sequences are being prepared for, for example, feeding a multiple sequence
alignment to a phylogeny program.
]]>
    </help>
    <expand macro="citations"/>
</tool>