Mercurial > repos > iuc > bcftools_consensus
view bcftools_consensus.xml @ 26:62ed6ee05b6f draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/bcftools commit f6efda26965eb73c9107d367fd5ffdf246ed0dbc
| author | iuc |
|---|---|
| date | Tue, 02 Dec 2025 07:57:56 +0000 |
| parents | 5970245e8525 |
| children |
line wrap: on
line source
<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>Create consensus sequence by applying VCF variants to a reference fasta file</description> <macros> <token name="@EXECUTABLE@">consensus</token> <import>macros.xml</import> </macros> <expand macro="bio_tools"/> <expand macro="requirements"> <expand macro="samtools_requirement"/> <requirement type="package" version="5.3.0">gawk</requirement> </expand> <expand macro="version_command"/> <command detect_errors="aggressive"><![CDATA[ @PREPARE_INPUT_FILE@ #set $section = $reference_source @PREPARE_FASTA_REF@ #set $section = $sec_restrict #if $section.regions.regions_src != '__none__': samtools faidx #if $section.regions.regions_src == 'regions': #set $intervals = $section.regions.region_specs @PARSE_INTERVALS@ #set $ref_regions_spec = " ".join("'" + c + "'" for c in $components) '$input_fa_ref' $ref_regions_spec | #else if $section.regions.regions_src == 'regions_file': -r '$section.regions.regions_file' '$input_fa_ref' | #end if #set $input_fa_ref = "-" #end if bcftools @EXECUTABLE@ #if $section.regions.regions_src != '__none__': --regions-overlap $section.regions_overlap #end if #set $section = $reference_source @FASTA_REF@ $mode.select #if not str($mode.select): $mode.specify_samples.how #if str($mode.specify_samples.how) in ["-s", "-S"]: '${mode.specify_samples.invert_samples}${mode.specify_samples.samples_spec}' #end if #else if str($mode.select) == "-H": #if str(mode.haplotype.rule) in ["", "pIu"]: #set haplotype_option = str(mode.haplotype.allele_n) + str(mode.haplotype.rule) #else: #set haplotype_option = str(mode.haplotype.rule) #end if $haplotype_option #if $mode.sample: --sample '$mode.sample' #end if #end if #set $masks = [] #for $m in $masking_options.mask: #if $m: #silent $masks.append($m) #end if #end for #if $masks: #if $masking_options.mask_with: #set $masking_instructions = [] #for $i in str($masking_options.mask_with).split(",", maxsplit=len($masks)-1): #silent $masking_instructions.append("--mask-with '" + $i + "'") #end for #else: #set $masking_instructions = [""] #end if #if len($masking_instructions) == 1: ## use same masking instruction for all masks #set $masking_instructions = [""] * (len($masks) - 1) + $masking_instructions #else if len($masking_instructions) < len($masks): ## fill in empty masking instructions for missing ones, which will make bcftools complain about them #silent $masking_instructions.extend(["--mask-with ''"] * (len($masks) - len($masking_instructions))) #end if #for $m, $i in zip($masks, $masking_instructions): --mask '$m' $i #end for #end if #if $masking_options.absent: --absent '$masking_options.absent' #end if #if $masking_options.mark_del: --mark-del '$masking_options.mark_del' #end if $masking_options.insertions.mark_ins #if str($masking_options.insertions.mark_ins) == "--mark-ins": '$masking_options.insertions.ins_custom' #end if $masking_options.snvs.mark_snv #if str($masking_options.snvs.mark_snv) == "--mark-snv": '$masking_options.snvs.snv_custom' #end if #set $section = $sec_restrict @INCLUDE@ @EXCLUDE@ #if $chain: --chain '$chain_file' #end if ## Primary Input/Outputs #if str($rename) == "no" --output '$output_file' #end if @INPUT_FILE@ #if str($rename) == "yes": #set basename=$input_file.element_identifier | awk 'BEGIN {i=1} {if (match($0, /^>/)) {if (i==1) {name="${basename}"} else {name=sprintf("%s-%d","${basename}",i);} print(gensub(/>[^ ]+( ?.*)/, ">" name "\\1", 1)); i=i+1;} else {print}}' > '$output_file' #end if ]]> </command> <inputs> <expand macro="macro_input"/> <expand macro="macro_fasta_ref"/> <conditional name="mode"> <param name="select" type="select" label="Consensus building mode; at each variant site ..."> <option value="-s -">ignore any sample genotypes; incorporate first allele from ALT column (-s -)</option> <option value="-I -s -">ignore any sample genotypes; incorporate IUPAC code representing all alleles from REF/ALT coulmns (-I -s -)</option> <option value="" selected="true">incorporate IUPAC code representing the genotypes of all selected samples (default)</option> <option value="-H">incorporate specific haplotype allele of one selected sample</option> </param> <when value="-s -" /> <when value="-I -s -" /> <when value=""> <expand macro="macro_samples_enhanced" /> </when> <when value="-H"> <expand macro="macro_sample" help="The name of the single sample alleles of which should get used for the consensus sequence. This field is optional only if your input VCF dataset specifies exactly one sample."/> <conditional name="haplotype"> <param name="rule" type="select" label="From the selected sample's genotype use ...'"> <option value="">the Nth allele (for both phased and unphased genotypes) (-H N)</option> <option value="pIu">the Nth allele if the genotype is phased, the IUPAC code representing the genotype if it's unphased (-H NpIu)</option> <option value="R">the ALT allele where the sample is homozygous, the REF allele otherwise (-H R)</option> <option value="A">the ALT allele where the sample is homozygous or heterozygous (-H A)</option> <option value="LR">the ALT allele where the sample is homozygous, the REF allele where it's heterozygous unless the ALT allele is longer (-H LR)</option> <option value="LA">the ALT allele where the sample is homozygous, the ALT allele where it's heterozygous unless the REF allele is longer (-H LA)</option> <option value="SR">the ALT allele where the sample is homozygous, the REF allele where it's heterozygous unless the ALT allele is shorter (-H SR</option> <option value="SA">the ALT allele where the sample is homozygous, the ALT allele where it's heterozygous unless the REF allele is shorter (-H SA)</option> </param> <when value=""> <param name="allele_n" type="integer" min="1" value="1" label="where N is" /> </when> <when value="pIu"> <param name="allele_n" type="integer" min="1" value="1" label="where N is" /> </when> <when value="R" /> <when value="A" /> <when value="LR" /> <when value="LA" /> <when value="SR" /> <when value="SA" /> </conditional> </when> </conditional> <section name="masking_options" expanded="false" title="Masking and marking options" help="The various options in this section are applied in the order they appear, i.e. 1) masking, 2) marking of absent sites, 3) SNV/indel marking."> <param argument="--mask" type="data" format="tabular" multiple="true" optional="true" label="Mask" help="Replace regions according to the next --mask-with option"/> <param argument="--mask-with" type="text" value="" optional="true" label="Mask with" help="Replace with CHAR (skips overlapping variants; default: N); use "uc" or "lc" to change to uppercase or lowercase, respectively. If you have provided more than one Mask dataset and you would like to apply a unique mask for the regions in each of them, then you can specify a comma-separated list of masking instructions (as many as mask datasets)." /> <param argument="--absent" type="text" value="" optional="true" label="Mark absent" help="Replace reference bases at positions absent from the VCF input with a custom character."> <validator type="regex">^.$</validator> </param> <param argument="--mark-del" type="text" value="" optional="true" label="Mark deletions" help="Instead of removing the reference base at deleted positions, replace the base with a custom character."> <validator type="regex">^.$</validator> </param> <conditional name="insertions"> <param argument="--mark-ins" type="select" label="Mark insertions" help="Highlight insertions in uppercase or lowercase, or by using a fixed character instead of inserted bases, leaving the rest as is"> <option value="">Do not mark insertions</option> <option value="--mark-ins uc">Uppercase (uc)</option> <option value="--mark-ins lc">Lowercase (lc)</option> <option value="--mark-ins">Custom character</option> </param> <when value="" /> <when value="--mark-ins uc" /> <when value="--mark-ins lc" /> <when value="--mark-ins"> <param name="ins_cutom" type="text" optional="false" label="Character to use instead of an inserted base"> <validator type="empty_field"/> </param> </when> </conditional> <conditional name="snvs"> <param argument="--mark-snv" type="select" label="Mark substitutions" help="Highlight substitutions in uppercase or lowercase, or by using a fixed character instead of substituted bases, leaving the rest as is"> <option value="">Do not mark substitutions</option> <option value="--mark-snv uc">Uppercase (uc)</option> <option value="--mark-snv lc">Lowercase (lc)</option> <option value="--mark-snv">Custom character</option> </param> <when value="" /> <when value="--mark-snv uc" /> <when value="--mark-snv lc" /> <when value="--mark-snv"> <param name="snv_custom" type="text" optional="false" label="Character to use instead of a subtituted base"> <validator type="empty_field"/> </param> </when> </conditional> </section> <section name="sec_restrict" expanded="false" title="Restrict to"> <expand macro="macro_include"/> <expand macro="macro_exclude"/> <expand macro="macro_region_restrict" label_select="Restrict consensus building to only specified regions of reference?"/> </section> <param name="chain" type="boolean" truevalue="yes" falsevalue="no" label="Write a chain file for liftover"/> <param name="rename" type="boolean" truevalue="yes" falsevalue="no" label="Set output FASTA ID from name of VCF"/> </inputs> <outputs> <data name="output_file" format="fasta" label="${tool.name} on ${on_string}: consensus fasta"/> <data name="chain_file" format="txt" label="${tool.name} on ${on_string}: chain"> <filter>chain</filter> </data> </outputs> <tests> <test expect_num_outputs="2"> <expand macro="test_using_reference" ref="consensus.fa"/> <param name="input_file" ftype="vcf" value="consensus.vcf"/> <section name="masking_options"> <param name="mask" ftype="tabular" value="consensus.tab"/> </section> <param name="chain" value="true"/> <output name="output_file"> <assert_contents> <has_text text="NNNNNNNNNNNNNNNNNNNNNNNNNN"/> </assert_contents> </output> <output name="chain_file"> <assert_contents> <has_text text="chain 497 1 501 + 1 501 1 502 + 1 502 1"/> </assert_contents> </output> </test> <test expect_num_outputs="2"> <expand macro="test_using_reference" select_from="cached" ref="consensus"/> <param name="input_file" ftype="vcf" dbkey="?" value="consensus.vcf"/> <section name="masking_options"> <param name="mask" ftype="tabular" value="consensus.tab"/> </section> <param name="chain" value="true"/> <output name="output_file"> <assert_contents> <has_text text="NNNNNNNNNNNNNNNNNNNNNNNNNN"/> </assert_contents> </output> <output name="chain_file"> <assert_contents> <has_text text="chain 497 1 501 + 1 501 1 502 + 1 502 1"/> </assert_contents> </output> </test> <test expect_num_outputs="1"> <expand macro="test_using_reference" ref="consensus.fa"/> <param name="input_file" ftype="vcf" value="consensus.vcf"/> <section name="masking_options"> <param name="mask" ftype="tabular" value="consensus.tab"/> </section> <param name="chain" value="false"/> <param name="rename" value="true"/> <output name="output_file"> <assert_contents> <has_text text=">consensus.vcf"/> </assert_contents> <assert_contents> <has_text text=">consensus.vcf-2"/> </assert_contents> </output> </test> <test expect_num_outputs="1"> <expand macro="test_using_reference" ref="consensus.fa"/> <param name="input_file" ftype="vcf" value="consensus.vcf"/> <conditional name="mode"> <param name="select" value="-s -"/> </conditional> <section name="sec_restrict"> <param name="include" value="TYPE="snp""/> </section> <output name="output_file"> <assert_contents> <has_text text="TACAAAATATGACATATCAAAAAGAACATAACCTACGTATCAACTAAAGTGGTTGTTTGA"/> </assert_contents> </output> </test> <!--Test absent option--> <test expect_num_outputs="1"> <expand macro="test_using_reference" ref="consensus.fa"/> <param name="input_file" ftype="vcf" value="consensus.vcf"/> <conditional name="mode"> <param name="select" value="-s -"/> </conditional> <section name="masking_options"> <param name="absent" value="W"/> </section> <section name="sec_restrict"> <param name="include" value="TYPE="snp""/> </section> <output name="output_file"> <assert_contents> <has_text text="WWWAWAWWAWWWWWWWWCWWWWWWWW"/> </assert_contents> </output> <assert_command> <has_text text="--absent"/> </assert_command> </test> <test expect_num_outputs="1"> <expand macro="test_using_reference" ref="consensus.fa"/> <param name="input_file" ftype="vcf" value="consensus.vcf"/> <conditional name="mode"> <param name="select" value="-s -"/> </conditional> <section name="masking_options"> <param name="mark_del" value="-"/> <conditional name="insertions"> <param name="mark_ins" value="--mark-ins uc"/> </conditional> <conditional name="snvs"> <param name="mark_snv" value="--mark-snv uc"/> </conditional> </section> <section name="sec_restrict"> <param name="include" value="TYPE="snp""/> </section> <output name="output_file"> <assert_contents> <has_text text="TACAAAATATGACATATCAAAAAGAACATAACCTACGTATCAACTAAAGTGGTTGTTTGA"/> </assert_contents> </output> <assert_command> <has_text text="--mark-del"/> <has_text text="--mark-ins"/> <has_text text="--mark-snv"/> </assert_command> </test> <test expect_num_outputs="1"> <expand macro="test_using_reference" ref="csq.fa"/> <param name="input_file" ftype="vcf" value="csq.vcf"/> <section name="masking_options"> <param name="absent" value="."/> <param name="mark_del" value="-"/> </section> <section name="sec_restrict"> <conditional name="regions"> <param name="regions_src" value="regions"/> <repeat name="region_specs"> <param name="chrom" value="1"/> <param name="start" value="161"/> <param name="stop" value="190"/> </repeat> </conditional> </section> <output name="output_file"> <assert_contents> <has_line line=">1:161-190"/> <has_line line="-............................Y"/> </assert_contents> </output> </test> <test expect_num_outputs="1"> <expand macro="test_using_reference" ref="csq.fa"/> <param name="input_file" ftype="vcf" value="csq.vcf"/> <section name="masking_options"> <param name="absent" value="."/> <param name="mark_del" value="-"/> </section> <section name="sec_restrict"> <conditional name="regions"> <param name="regions_src" value="regions"/> <repeat name="region_specs"> <param name="chrom" value="1"/> <param name="start" value="161"/> <param name="stop" value="190"/> </repeat> <param name="regions_overlap" value="0"/> </conditional> </section> <output name="output_file"> <assert_contents> <has_line line=">1:161-190"/> <has_line line=".............................Y"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ ===================================== bcftools @EXECUTABLE@ plugin ===================================== Create consensus sequence by applying VCF variants to a reference fasta file. @BCFTOOLS_MANPAGE@#@EXECUTABLE@ @BCFTOOLS_HOWTOS@ The option to set the new consensus' FASTA ID from the name of the VCF is provided by post-processing the bcftools consensus output. It is primarily intended for use when the VCF is coming from a list collection where the elements of the list are named meaningfully (e.g. named after sample names). This is useful when consensus sequences are being prepared for, for example, feeding a multiple sequence alignment to a phylogeny program. ]]> </help> <expand macro="citations"/> </tool>
