view bcftools_norm.xml @ 26:051e5060b390 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/bcftools commit f6efda26965eb73c9107d367fd5ffdf246ed0dbc
author iuc
date Tue, 02 Dec 2025 07:55:48 +0000
parents 36b301c20b95
children
line wrap: on
line source

<tool name="bcftools @EXECUTABLE@" id="bcftools_@EXECUTABLE@" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>Left-align and normalize indels; check if REF alleles match the reference; split multiallelic sites into multiple rows; recover multiallelics from multiple rows</description>
    <macros>
        <token name="@EXECUTABLE@">norm</token>
        <import>macros.xml</import>
    </macros>
    <expand macro="bio_tools"/>
    <expand macro="requirements">
        <expand macro="samtools_requirement"/>
    </expand>
    <expand macro="version_command"/>
    <command detect_errors="aggressive"><![CDATA[
@PREPARE_INPUT_FILE@
#set $section = $reference_source
@PREPARE_FASTA_REF@

#set $section = $sec_restrict
@PREPARE_TARGETS_FILE@
@PREPARE_REGIONS_FILE@

bcftools @EXECUTABLE@

#set $section = $reference_source
@FASTA_REF@
--check-ref $check_ref
$normalize_indels
#if $rm_dup:
  --rm-dup "$rm_dup"
#end if
$atomization
#if $old_rec_tag
  --old-rec-tag '$old_rec_tag'
#end if
#if $multiallelics.mode:
  --multiallelics '${multiallelics.mode}${multiallelics.multiallelic_types}'
#end if
#if $multiallelics.mode == '+':
  ${multiallelics.strict_filter}
#end if
#if $sec_default.site_win:
  --site-win ${sec_default.site_win}
#end if
--sort ${sec_default.sort}
#if str($sec_filter_norm.include):
  --include '${sec_filter_norm.include}'
#end if
#if str($sec_filter_norm.exclude):
  --exclude '${sec_filter_norm.exclude}'
#end if

#set $section = $sec_restrict
@REGIONS@
@TARGETS@

@OUTPUT_TYPE@
@THREADS@

## Primary Input/Outputs
@INPUT_FILE@
> '$output_file'
]]>
    </command>
    <inputs>
        <expand macro="macro_input"/>
        <expand macro="macro_fasta_ref"/>
        <param name="check_ref" type="select" display="radio" label="When any REF allele does not match the reference genome base" help="Warnings about REF mismatches will be emitted to the standard error (stderr) stream, and it is recommended to check there for problems if you choose not to exit with an error immediately upon encountering a mismatch.">
            <option value="w">ignore the problem (-w)</option>
            <option value="wx">exclude the variant record from the output (-wx)</option>
            <option value="ws">fix the variant record using the reference genome information (-ws)</option>
            <option value="e">exit with an error (-e)</option>
        </param>
        <param name="atomization" type="select" display="radio" label="Atomize" help="Decompose complex variants (e.g. MNVs become consecutive SNVs)">
            <option value="">Do not atomize</option>
            <option value="--atomize">Atomize and use * ALT allele for proper genotype representation (--atomize)</option>
            <option value="--atomize --atom-overlaps .">Atomize and accept missing genotype information (legacy behavior; --atomize --atom-overlaps .)</option>
        </param>
        <param argument="--do-not-normalize" name="normalize_indels" type="boolean" truevalue="" falsevalue="--do-not-normalize" checked="false" label="Left-align and normalize indels?"/>
        <param argument="--old-rec-tag" type="text" value="" optional="true" label="Annotate modified records with INFO/STR indicating the original variant">
            <sanitizer invalid_char="">
                <valid initial="string.letters,string.digits">
                    <add value="_"/>
                    <add value="/"/>
                </valid>
            </sanitizer>
            <validator type="regex">[0-9a-zA-Z_/]+</validator>
        </param>
        <param name="rm_dup" type="select" display="radio" label="Perform deduplication for the folowing types of variant records">
            <option value="">do not deduplicate any records</option>
            <option value="snps">snps</option>
            <option value="indels">indels</option>
            <option value="both">both</option>
            <option value="any">any</option>
        </param>
        <conditional name="multiallelics">
            <param name="mode" type="select" label="~multiallelics">
                <option value="">preserve multiallelic/biallelic sites</option>
                <option value="-">split multiallelic sites into biallelic records (-)</option>
                <option value="+">join biallelic sites into multiallelic records (+)</option>
            </param>
            <when value=""/>
            <when value="-">
                <param name="multiallelic_types" type="select" display="radio" label="split the following variant types">
                    <option value="snps">SNPs</option>
                    <option value="indels">indels</option>
                    <option value="both" selected="true">both</option>
                </param>
            </when>
            <when value="+">
                <param name="multiallelic_types" type="select" display="radio" label="Merge the following variant types">
                    <option value="snps">SNPs</option>
                    <option value="indels">indels</option>
                    <option value="both" selected="true">SNPs and indels, but keep variants of the two types separate (both)</option>
                    <option value="any">SNPs and indels, and merge variant records of different types (any)</option>
                </param>
                <param name="strict_filter" type="boolean" truevalue="--strict-filter" falsevalue="" label="Strict Filter" help="merged site is PASS only if all sites being merged PASS"/>
            </when>
        </conditional>
        <section name="sec_restrict" expanded="false" title="Restrict all operations to">
            <expand macro="macro_region_restrict"/>
            <expand macro="macro_target_restrict"/>
        </section>
        <section name="sec_default" expanded="false" title="Other Options">
            <param name="site_win" type="integer" label="Site Window" value="1000" optional="True" help="(-w, --site-win) Buffer for sorting lines which changed position during realignment"/>
            <param argument="--sort" type="select" label="Sort normalized allele order" help="Select the sort order for output records after normalization.">
                <option value="pos" selected="true">Chromosome then position (pos)</option>
                <option value="lex">Lexicographical allele order (lex)</option>
            </param>
        </section>
        <section name="sec_filter_norm" expanded="false" title="Normalize only selected records">
            <param name="include" type="text" optional="true" label="Include (normalize only matching records)" help="Expression evaluated before normalization to select records to normalize">
                <expand macro="macro_include_exclude_validate_sanitize"/>
            </param>
            <param name="exclude" type="text" optional="true" label="Exclude (skip normalization for matching records)" help="Expression evaluated before normalization to skip records">
                <expand macro="macro_include_exclude_validate_sanitize"/>
            </param>
        </section>
        <expand macro="macro_select_output_type"/>
    </inputs>
    <outputs>
        <expand macro="macro_vcf_output"/>
    </outputs>
    <tests>
        <test>
            <param name="input_file" ftype="vcf" value="norm.vcf"/>
            <expand macro="test_using_reference" ref="norm.fa"/>
            <param name="normalize_indels" value="true"/>
            <param name="output_type" value="v"/>
            <output name="output_file">
                <assert_contents>
                    <has_text text="T,TAACCCTA"/>
                    <not_has_text text="TAA,TAACCCTAAA"/>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" dbkey="?" value="norm.vcf"/>
            <expand macro="test_using_reference" select_from="cached" ref="norm"/>
            <param name="normalize_indels" value="true"/>
            <param name="output_type" value="v"/>
            <output name="output_file">
                <assert_contents>
                    <has_text text="T,TAACCCTA"/>
                    <not_has_text text="TAA,TAACCCTAAA"/>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="norm.split.vcf"/>
            <expand macro="test_using_reference" ref="norm.fa"/>
            <conditional name="multiallelics">
                <param name="mode" value="-"/>
            </conditional>
            <param name="output_type" value="v"/>
            <output name="output_file">
                <assert_contents>
                    <not_has_text text="TAA,TAACCCTAAA"/>
                    <has_text_matching expression="1\t105\t.\tTAAACCCTAAA\tTAA\t"/>
                    <has_text_matching expression="1\t105\t.\tTAAACCCTAAA\tTAACCCTAAA\t"/>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="norm.split.vcf"/>
            <expand macro="test_using_reference" ref="norm.fa"/>
            <param name="normalize_indels" value="true"/>
            <conditional name="multiallelics">
                <param name="mode" value="-"/>
            </conditional>
            <param name="output_type" value="v"/>
            <output name="output_file">
                <assert_contents>
                    <not_has_text text="TAA,TAACCCTAAA"/>
                    <has_text_matching expression="1\t105\t.\tTAAACCCTA\tT\t"/>
                    <has_text_matching expression="1\t105\t.\tTA\tT\t"/>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="norm.merge.vcf"/>
            <expand macro="test_using_reference" ref="norm.fa"/>
            <conditional name="multiallelics">
                <param name="mode" value="+"/>
            </conditional>
            <param name="output_type" value="v"/>
            <output name="output_file">
                <assert_contents>
                    <has_text text="TAA,TAACCCTAAA"/>
                    <has_text_matching expression="2\t114\t.\tTC\tTTCC,TTC\t999\tFAIL"/>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="norm.merge.vcf"/>
            <expand macro="test_using_reference" ref="norm.fa"/>
            <conditional name="multiallelics">
                <param name="mode" value="+"/>
                <param name="strict_filter" value="true"/>
            </conditional>
            <param name="output_type" value="v"/>
            <output name="output_file">
                <assert_contents>
                    <has_text text="TAA,TAACCCTAAA"/>
                    <has_text_matching expression="2\t114\t.\tTC\tTTCC,TTC\t999\tPASS"/>
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="input_file" ftype="vcf" value="norm.setref.vcf"/>
            <expand macro="test_using_reference" ref="norm.fa"/>
            <param name="check_ref" value="ws"/>
            <param name="output_type" value="v"/>
            <output name="output_file">
                <assert_contents>
                    <has_text_matching expression="2\t101\t.\tA\tc\t999\tPASS"/>
                    <has_text_matching expression="2\t105\t.\tT\t&lt;DEL&gt;\t999\tPASS"/>
                </assert_contents>
            </output>
        </test>
        <!-- Test atomize option -->
        <test>
            <param name="input_file" ftype="vcf" value="norm.split.vcf"/>
            <expand macro="test_using_reference" ref="norm.fa"/>
            <param name="normalize_indels" value="true"/>
            <conditional name="multiallelics">
                <param name="mode" value="-snps"/>
            </conditional>
            <param name="atomization" value="--atomize --atom-overlaps ."/>
            <param name="output_type" value="v"/>
            <output name="output_file">
                <assert_contents>
                    <not_has_text text="TAA,TAACCCTAAA"/>
                    <has_text_matching expression="1\t105\t.\tTAAACCCTA\tT\t"/>
                    <has_text_matching expression="1\t105\t.\tTA\tT\t"/>
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--atomize --atom-overlaps ."/>
            </assert_command>
        </test>
        <!-- Test region overlap options -->
        <test>
            <param name="input_file" ftype="vcf" dbkey="?" value="norm.vcf"/>
            <expand macro="test_using_reference" select_from="cached" ref="norm"/>
            <param name="normalize_indels" value="true"/>
            <param name="output_type" value="v"/>
            <section name="sec_restrict">
                <conditional name="regions">
                    <param name="regions_src" value="regions" />
                    <repeat name="region_specs">
                        <param name="chrom" value="1" />
                    </repeat>
                </conditional>
            </section>
            <output name="output_file">
                <assert_contents>
                    <has_text text="T,TAACCCTA"/>
                    <not_has_text text="TAA,TAACCCTAAA"/>
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--regions-overlap"/>
            </assert_command>
        </test>
        <!-- Test normalization filters and sort -->
        <test>
            <param name="input_file" ftype="vcf" value="norm.vcf"/>
            <expand macro="test_using_reference" ref="norm.fa"/>
            <param name="normalize_indels" value="true"/>
            <section name="sec_default">
                <param name="sort" value="lex"/>
            </section>
            <section name="sec_filter_norm">
                <param name="include" value="QUAL&gt;10"/>
            </section>
            <param name="output_type" value="v"/>
            <output name="output_file">
                <assert_contents>
                    <has_text text="T,TAACCCTA"/>
                </assert_contents>
            </output>
            <assert_command>
                <has_text text="--sort lex"/>
                <has_text text="--include 'QUAL&gt;10'"/>
            </assert_command>
        </test>
    </tests>
    <help><![CDATA[
=====================================
 bcftools @EXECUTABLE@
=====================================


Left-align and normalize indels; check if REF alleles match the reference; split multiallelic sites into multiple rows; recover multiallelics from multiple rows.

@REGIONS_HELP@
@TARGETS_HELP@

@BCFTOOLS_MANPAGE@#@EXECUTABLE@

@BCFTOOLS_HOWTOS@
]]>
    </help>
    <expand macro="citations"/>
</tool>