view hal_halStats.xml @ 0:25dcde5bf94e draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/haltools commit 6244b9d15a5ad97ae20191e2f8fbafe2050c3cac
author iuc
date Fri, 06 Feb 2026 10:39:34 +0000
parents
children
line wrap: on
line source

<tool id="hal_halstats" name="halStats" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>retrieves basic statistics from a HAL file</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/> 
    <expand macro="stdio"/>
    <command detect_errors="aggressive"><![CDATA[
        set -o pipefail; ## Sets the pipeline’s exit code to halStats’s on failure.
        ( ## echo headers for specific numerical data
        #if $mode.option == '--baseComp':
            echo -e 'fraction_of_As\tfraction_of_Gs\tfraction_of_Cs\tfraction_of_Ts';
        #else if $mode.option == '--numSegments':
            echo -e 'numTopSegments\tnumBottomSegments';
        #end if
        halStats
            #if $mode.option == '--allCoverage':
                --allCoverage
            #else if $mode.option == '--branches':
                --branches
            #else if $mode.option == '--genomes':
                --genomes
            #else if $mode.option == '--metaData':
                --metaData
            #else if $mode.option == '--root':
                --root
            #else if $mode.option == '--tree':
                --tree
            #else if $mode.option == '--baseComp':
                --baseComp '$mode.baseComp'
            #else if $mode.option == '--bedSequences':
                --bedSequences '$mode.bedSequences'
            #else if $mode.option == '--bottomSegments':
                --bottomSegments '$mode.bottomSegments'
            #else if $mode.option == '--branchLength':
                --branchLength '$mode.branchLength'
            #else if $mode.option == '--children':
                --children '$mode.children'
            #else if $mode.option == '--chromSizes':
                --chromSizes '$mode.chromSizes'
            #else if $mode.option == '--coverage':
                --coverage '$mode.coverage'
            #else if $mode.option == '--genomeMetaData':
                --genomeMetaData '$mode.genomeMetaData'
            #else if $mode.option == '--numSegments':
                --numSegments '$mode.numSegments'
            #else if $mode.option == '--parent':
                --parent '$mode.parent'
            #else if $mode.option == '--percentID':
                --percentID '$mode.percentID'
            #else if $mode.option == '--sequenceStats':
                --sequenceStats '$mode.sequenceStats'
            #else if $mode.option == '--sequences':
                --sequences '$mode.sequences'
            #else if $mode.option == '--span':
                --span '$mode.span'
            #else if $mode.option == '--spanRoot':
                --spanRoot '$mode.spanRoot'
            #else if $mode.option == '--topSegments':
                --topSegments '$mode.topSegments'
            #end if
            '$input_hal'
        ## Pipes specific output to replace commas with tabs. Output is mostly numerical, and Genome names contain no commas, as this would invalidate the HAL Newick tree.
        #if $mode.option == '--allCoverage' or $mode.option == '--sequenceStats' or $mode.option == '--percentID' or $mode.option == '--coverage':
            | tr ',' '\t'
        #else if $mode.option == '--numSegments':
            | tr ' ' '\t' ## Replace spaces
        #end if
        ) > '$out_file'
    ]]></command>
    <inputs>
        <expand macro="input_hal"/>
        <conditional name="mode">
            <param name="option" type="select" label="Select the type of statistics you are interested in">
                <option value="" selected="true">Basic overview</option>
                <option value="--genomes">List of genomes in alignment (--genomes)</option>
                <option value="--sequences">List of sequences in a given genome (--sequences)</option>
                <option value="--bedSequences">List of sequences in a given genome (in BED format) (--bedSequences)</option>
                <option value="--sequenceStats">Stats for each sequence in a given genome (--sequenceStats)</option>
                <option value="--tree">Newick tree (--tree)</option>
                <option value="--branches">List of branches specified by the child genome (--branches)</option>
                <option value="--span">Branches on path (or spanning tree) between given list of genomes (--span)</option>
                <option value="--spanRoot">Genomes on path (or spanning tree) with spanning tree root between given list of genomes (--spanRoot)</option>
                <option value="--children">Names of children of a given genome (--children)</option>
                <option value="--root">Root genome name (--root)</option>
                <option value="--parent">Parent name of a given genome (--parent)</option>
                <option value="--branchLength">Branch length between a given genome and its parent (--branchLength)</option>
                <option value="--numSegments">Number of top and of bottom segments of a given genome (--numSegments)</option>
                <option value="--topSegments">Coordinates of all top segments of a given genome (in BED format) (--topSegments)</option>
                <option value="--bottomSegments">Coordinates of all bottom segments of a given genome (in BED format) (--bottomSegments)</option>
                <option value="--baseComp">Base composition by sampling every step bases (--baseComp)</option>
                <option value="--genomeMetaData">Metadata for a given genome (--genomeMetaData)</option>
                <option value="--metaData">Metadata for the entire alignment (--metaData)</option>
                <option value="--chromSizes">Name and length of each sequence in a given genome (in format used by wigToBigWig) (--chromSizes)</option>
                <option value="--percentID">Percent ID of a given genome with all other genomes (--percentID)</option>
                <option value="--coverage">Histogram of coverage of a given genome with all genomes (--coverage)</option>
                <option value="--allCoverage">Histogram of coverage from all genomes to all genomes (--allCoverage)</option>
            </param>
            <when value=""/>
            <when value="--allCoverage"/>
            <when value="--baseComp">
                <param name="baseComp" type="text" value="" label="Genome and step" help="Parameter value is of the form genome,step. Ex: human,1000">
                    <expand macro="sanitizer_default"/>
                    <validator type="regex" message="Please enter as genome,step without leading or trailing spaces">^[^\s,](?:[^,]*[^\s,])?,[0-9]+$</validator>
                </param>
            </when>
            <when value="--bedSequences">
                <param name="bedSequences" type="text" value="" label="Genome name">
                    <expand macro="sanitizer_default"/>
                    <expand macro="validator_trim"/>
                </param>
            </when>
            <when value="--topSegments">
                <param name="topSegments" type="text" value="" label="Genome name">
                    <expand macro="sanitizer_default"/>
                    <expand macro="validator_trim"/>
                </param>
            </when>
            <when value="--bottomSegments">
                <param name="bottomSegments" type="text" value="" label="Genome name">
                    <expand macro="sanitizer_default"/>
                    <expand macro="validator_trim"/>
                </param>
            </when>
            <when value="--branchLength">
                <param name="branchLength" type="text" value="" label="Genome name">
                    <expand macro="sanitizer_default"/>
                    <expand macro="validator_trim"/>
                </param>
            </when>
            <when value="--branches"/>
            <when value="--children">
                <param name="children" type="text" value="" label="Genome name">
                    <expand macro="sanitizer_default"/>
                    <expand macro="validator_trim"/>
                </param>
            </when>
            <when value="--chromSizes">
                <param name="chromSizes" type="text" value="" label="Genome name">
                    <expand macro="sanitizer_default"/>
                    <expand macro="validator_trim"/>
                </param>
            </when>
            <when value="--coverage">
                <param name="coverage" type="text" value="" label="Genome name">
                    <expand macro="sanitizer_default"/>
                    <expand macro="validator_trim"/>
                </param>
            </when>
            <when value="--genomeMetaData">
                <param name="genomeMetaData" type="text" value="" label="Genome name">
                    <expand macro="sanitizer_default"/>
                    <expand macro="validator_trim"/>
                </param>
            </when>
            <when value="--genomes"/>
            <when value="--metaData"/>
            <when value="--numSegments">
                <param name="numSegments" type="text" value="" label="Genome name">
                    <expand macro="sanitizer_default"/>
                    <expand macro="validator_trim"/>
                </param>
            </when>
            <when value="--parent">
                <param name="parent" type="text" value="" label="Genome name">
                    <expand macro="sanitizer_default"/>
                    <expand macro="validator_trim"/>
                </param>
            </when>
            <when value="--percentID">
                <param name="percentID" type="text" value="" label="Genome name" help="Only non-duplicated and unambiguous sites are considered">
                    <expand macro="sanitizer_default"/>
                    <expand macro="validator_trim"/>
                </param>
            </when>
            <when value="--root"/>
            <when value="--sequenceStats">
                <param name="sequenceStats" type="text" value="" label="Genome name">
                    <expand macro="sanitizer_default"/>
                    <expand macro="validator_trim"/>
                </param>
            </when>
            <when value="--sequences">
                <param name="sequences" type="text" value="" label="Genome name">
                    <expand macro="sanitizer_default"/>
                    <expand macro="validator_trim"/>
                </param>
            </when>
            <when value="--span">
                <param name="span" type="text" value="" label="List of genomes" help="Enter a comma-separated (no spaces) list of genomes">
                    <expand macro="sanitizer_default"/>
                    <expand macro="validator_comma_list"/>
                </param>
            </when>
            <when value="--spanRoot">
                <param name="spanRoot" type="text" value="" label="List of genomes" help="Enter a comma-separated (no spaces) list of genomes">
                    <expand macro="sanitizer_default"/>
                    <expand macro="validator_comma_list"/>
                </param>
            </when>
            <when value="--tree"/>
        </conditional>
    </inputs>
    <outputs>
        <data name="out_file" format="txt" label="${tool.name} on ${on_string}: Stats">
            <change_format>
                <when input="mode.option" value="--numSegments" format="tabular"/>
                <when input="mode.option" value="--allCoverage" format="tabular"/>
                <when input="mode.option" value="--sequenceStats" format="tabular"/>
                <when input="mode.option" value="--percentID" format="tabular"/>
                <when input="mode.option" value="--coverage" format="tabular"/>
                <when input="mode.option" value="--chromSizes" format="tabular"/>
                <when input="mode.option" value="--baseComp" format="tabular"/>
                <when input="mode.option" value="--metaData" format="tabular"/>
                <when input="mode.option" value="--genomeMetaData" format="tabular"/>
                <when input="mode.option" value="--bedSequences" format="bed"/>
                <when input="mode.option" value="--topSegments" format="bed"/>
                <when input="mode.option" value="--bottomSegments" format="bed"/>
            </change_format>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <output name="out_file" ftype="txt">
                <assert_contents>
                    <has_line line="(Genome_1:1,Genome_2:1,Genome_3:1)Genome_0;"/>
                    <has_line line="GenomeName, NumChildren, Length, NumSequences, NumTopSegments, NumBottomSegments"/>
                    <has_line line="Genome_0, 3, 1758, 1, 0, 8"/>
                    <has_n_lines n="10"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <conditional name="mode">
                <param name="option" value="--allCoverage"/>
            </conditional>
            <output name="out_file" ftype="tabular">
                <assert_contents>
                    <has_line line="FromGenome&#009; ToGenome&#009; sitesCovered1Times&#009; sitesCovered2Times&#009; sitesCovered3Times&#009; sitesCovered4Times&#009; sitesCovered5Times"/>
                    <has_line line="Genome_1&#009; Genome_1&#009; 5472&#009; 4688&#009; 3516&#009; 2637&#009; 1465"/>
                    <has_n_lines n="10"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <conditional name="mode">
                <param name="option" value="--bedSequences"/>
                <param name="bedSequences" value="Genome_0"/>
            </conditional>
            <output name="out_file" ftype="bed">
                <assert_contents>
                    <has_line line="Genome_0_seq&#009;0&#009;1758"/>
                    <has_n_lines n="1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <conditional name="mode">
                <param name="option" value="--topSegments"/>
                <param name="topSegments" value="Genome_1"/>
            </conditional>
            <output name="out_file" ftype="bed">
                <assert_contents>
                    <has_line line="Genome_1_seq&#009;0&#009;293"/>
                    <has_line line="Genome_1_seq&#009;3223&#009;3399"/>
                    <has_line line="Genome_1_seq&#009;5274&#009;5472"/>
                    <has_n_lines n="28"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <conditional name="mode">
                <param name="option" value="--bottomSegments"/>
                <param name="bottomSegments" value="Genome_0"/>
            </conditional>
            <output name="out_file" ftype="bed">
                <assert_contents>
                    <has_line line="Genome_0_seq&#009;0&#009;293"/>
                    <has_line line="Genome_0_seq&#009;1033&#009;1172"/>
                    <has_line line="Genome_0_seq&#009;1465&#009;1758"/>
                    <has_n_lines n="8"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <conditional name="mode">
                <param name="option" value="--tree"/>
            </conditional>
            <output name="out_file" ftype="txt">
                <assert_contents>
                    <has_line line="(Genome_1:1,Genome_2:1,Genome_3:1)Genome_0;"/>
                    <has_n_lines n="1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <conditional name="mode">
                <param name="option" value="--spanRoot"/>
                <param name="spanRoot" value="Genome_0,Genome_1"/>
            </conditional>
            <output name="out_file" ftype="txt">
                <assert_contents>
                    <has_text text="Genome_0 Genome_1"/>
                    <has_n_lines n="1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <conditional name="mode">
                <param name="option" value="--sequences"/>
                <param name="sequences" value="Genome_0"/>
            </conditional>
            <output name="out_file" ftype="txt">
                <assert_contents>
                    <has_line line="Genome_0_seq"/>
                    <has_n_lines n="1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <conditional name="mode">
                <param name="option" value="--sequenceStats"/>
                <param name="sequenceStats" value="Genome_0"/>
            </conditional>
            <output name="out_file" ftype="tabular">
                  <assert_contents>
                    <has_line line="SequenceName&#009; Length&#009; NumTopSegments&#009; NumBottomSegments"/>
                    <has_line line="Genome_0_seq&#009; 1758&#009; 0&#009; 8"/>
                    <has_n_lines n="3"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <conditional name="mode">
                <param name="option" value="--root"/>
            </conditional>
            <output name="out_file" ftype="txt">
                <assert_contents>
                    <has_line line="Genome_0"/>
                    <has_n_lines n="1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <conditional name="mode">
                <param name="option" value="--parent"/>
                <param name="parent" value="Genome_1"/>
            </conditional>
            <output name="out_file" ftype="txt">
                <assert_contents>
                    <has_line line="Genome_0"/>
                    <has_n_lines n="1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <conditional name="mode">
                <param name="option" value="--percentID"/>
                <param name="percentID" value="Genome_0"/>
            </conditional>
            <output name="out_file" ftype="tabular">
                <assert_contents>
                    <has_line line="Genome&#009; % ID&#009; numID&#009; numSites"/>
                    <has_line line="Genome_0&#009; 1&#009; 1758&#009; 1758"/>
                    <has_n_lines n="5"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <conditional name="mode">
                <param name="option" value="--numSegments"/>
                <param name="numSegments" value="Genome_1"/>
            </conditional>
            <output name="out_file" ftype="tabular">
                <assert_contents>
                    <has_line line="numTopSegments&#009;numBottomSegments"/>
                    <has_line line="28&#009;0"/>
                    <has_n_lines n="2"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <conditional name="mode">
                <param name="option" value="--genomes"/>
            </conditional>
            <output name="out_file" ftype="txt">
                <assert_contents>
                    <has_line line="Genome_0 Genome_1 Genome_2 Genome_3"/>
                    <has_n_lines n="1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <conditional name="mode">
                <param name="option" value="--children"/>
                <param name="children" value="Genome_0"/>
            </conditional>
            <output name="out_file" ftype="txt">
                <assert_contents>
                    <has_line line="Genome_1 Genome_2 Genome_3"/>    
                    <has_n_lines n="1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <conditional name="mode">
                <param name="option" value="--chromSizes"/>
                <param name="chromSizes" value="Genome_1"/>
            </conditional>
            <output name="out_file" ftype="tabular">
                <assert_contents>
                    <has_line line="Genome_1_seq&#009;5472"/>    
                    <has_n_lines n="1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <conditional name="mode">
                <param name="option" value="--branches"/>
            </conditional>
            <output name="out_file" ftype="txt">
                <assert_contents>
                    <has_line line="Genome_1 Genome_2 Genome_3"/>     
                    <has_n_lines n="1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <conditional name="mode">
                <param name="option" value="--branchLength"/>
                <param name="branchLength" value="Genome_1"/>
            </conditional>
            <output name="out_file" ftype="txt">
                <assert_contents>
                    <has_line line="1"/>     
                    <has_n_lines n="1"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <param name="input_hal" value="halTest.hal"/>
            <conditional name="mode">
                <param name="option" value="--baseComp"/>
                <param name="baseComp" value="Genome_0,1000"/>
            </conditional>
            <output name="out_file" ftype="tabular">
                <assert_contents>
                    <has_line line="fraction_of_As&#009;fraction_of_Gs&#009;fraction_of_Cs&#009;fraction_of_Ts"/>    
                    <has_line line="0.5&#009;0&#009;0.5&#009;0"/>     
                    <has_n_lines n="2"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
halStats prints structural and summary information from a HAL file, which must be provided as input.
It can list genomes, sequences, sizes, and relationships, provides sequence level statistics such as coverage histograms and percent ID, and can export sequence or segment information in BED format. 

It is useful for quick inspection of a HAL file and for extracting per genome or per sequence summaries.

-----

**Output**

The tool generates different output formats based on the selected type of statistic:

- **Tabular** for --coverage, --allCoverage, --sequenceStats, --percentID, --baseComp, --chromSizes, --metaData, --numSegments, or --genomeMetaData
- **BED** for --bedSequences, --topSegments, or --bottomSegments
- Plain **text** for all other type of statistics

    ]]></help>
    <expand macro="citation"/>
    <expand macro="creator"/>
</tool>