diff hal_halStats.xml @ 0:25dcde5bf94e draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/haltools commit 6244b9d15a5ad97ae20191e2f8fbafe2050c3cac
author iuc
date Fri, 06 Feb 2026 10:39:34 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hal_halStats.xml	Fri Feb 06 10:39:34 2026 +0000
@@ -0,0 +1,492 @@
+<tool id="hal_halstats" name="halStats" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>retrieves basic statistics from a HAL file</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/> 
+    <expand macro="stdio"/>
+    <command detect_errors="aggressive"><![CDATA[
+        set -o pipefail; ## Sets the pipeline’s exit code to halStats’s on failure.
+        ( ## echo headers for specific numerical data
+        #if $mode.option == '--baseComp':
+            echo -e 'fraction_of_As\tfraction_of_Gs\tfraction_of_Cs\tfraction_of_Ts';
+        #else if $mode.option == '--numSegments':
+            echo -e 'numTopSegments\tnumBottomSegments';
+        #end if
+        halStats
+            #if $mode.option == '--allCoverage':
+                --allCoverage
+            #else if $mode.option == '--branches':
+                --branches
+            #else if $mode.option == '--genomes':
+                --genomes
+            #else if $mode.option == '--metaData':
+                --metaData
+            #else if $mode.option == '--root':
+                --root
+            #else if $mode.option == '--tree':
+                --tree
+            #else if $mode.option == '--baseComp':
+                --baseComp '$mode.baseComp'
+            #else if $mode.option == '--bedSequences':
+                --bedSequences '$mode.bedSequences'
+            #else if $mode.option == '--bottomSegments':
+                --bottomSegments '$mode.bottomSegments'
+            #else if $mode.option == '--branchLength':
+                --branchLength '$mode.branchLength'
+            #else if $mode.option == '--children':
+                --children '$mode.children'
+            #else if $mode.option == '--chromSizes':
+                --chromSizes '$mode.chromSizes'
+            #else if $mode.option == '--coverage':
+                --coverage '$mode.coverage'
+            #else if $mode.option == '--genomeMetaData':
+                --genomeMetaData '$mode.genomeMetaData'
+            #else if $mode.option == '--numSegments':
+                --numSegments '$mode.numSegments'
+            #else if $mode.option == '--parent':
+                --parent '$mode.parent'
+            #else if $mode.option == '--percentID':
+                --percentID '$mode.percentID'
+            #else if $mode.option == '--sequenceStats':
+                --sequenceStats '$mode.sequenceStats'
+            #else if $mode.option == '--sequences':
+                --sequences '$mode.sequences'
+            #else if $mode.option == '--span':
+                --span '$mode.span'
+            #else if $mode.option == '--spanRoot':
+                --spanRoot '$mode.spanRoot'
+            #else if $mode.option == '--topSegments':
+                --topSegments '$mode.topSegments'
+            #end if
+            '$input_hal'
+        ## Pipes specific output to replace commas with tabs. Output is mostly numerical, and Genome names contain no commas, as this would invalidate the HAL Newick tree.
+        #if $mode.option == '--allCoverage' or $mode.option == '--sequenceStats' or $mode.option == '--percentID' or $mode.option == '--coverage':
+            | tr ',' '\t'
+        #else if $mode.option == '--numSegments':
+            | tr ' ' '\t' ## Replace spaces
+        #end if
+        ) > '$out_file'
+    ]]></command>
+    <inputs>
+        <expand macro="input_hal"/>
+        <conditional name="mode">
+            <param name="option" type="select" label="Select the type of statistics you are interested in">
+                <option value="" selected="true">Basic overview</option>
+                <option value="--genomes">List of genomes in alignment (--genomes)</option>
+                <option value="--sequences">List of sequences in a given genome (--sequences)</option>
+                <option value="--bedSequences">List of sequences in a given genome (in BED format) (--bedSequences)</option>
+                <option value="--sequenceStats">Stats for each sequence in a given genome (--sequenceStats)</option>
+                <option value="--tree">Newick tree (--tree)</option>
+                <option value="--branches">List of branches specified by the child genome (--branches)</option>
+                <option value="--span">Branches on path (or spanning tree) between given list of genomes (--span)</option>
+                <option value="--spanRoot">Genomes on path (or spanning tree) with spanning tree root between given list of genomes (--spanRoot)</option>
+                <option value="--children">Names of children of a given genome (--children)</option>
+                <option value="--root">Root genome name (--root)</option>
+                <option value="--parent">Parent name of a given genome (--parent)</option>
+                <option value="--branchLength">Branch length between a given genome and its parent (--branchLength)</option>
+                <option value="--numSegments">Number of top and of bottom segments of a given genome (--numSegments)</option>
+                <option value="--topSegments">Coordinates of all top segments of a given genome (in BED format) (--topSegments)</option>
+                <option value="--bottomSegments">Coordinates of all bottom segments of a given genome (in BED format) (--bottomSegments)</option>
+                <option value="--baseComp">Base composition by sampling every step bases (--baseComp)</option>
+                <option value="--genomeMetaData">Metadata for a given genome (--genomeMetaData)</option>
+                <option value="--metaData">Metadata for the entire alignment (--metaData)</option>
+                <option value="--chromSizes">Name and length of each sequence in a given genome (in format used by wigToBigWig) (--chromSizes)</option>
+                <option value="--percentID">Percent ID of a given genome with all other genomes (--percentID)</option>
+                <option value="--coverage">Histogram of coverage of a given genome with all genomes (--coverage)</option>
+                <option value="--allCoverage">Histogram of coverage from all genomes to all genomes (--allCoverage)</option>
+            </param>
+            <when value=""/>
+            <when value="--allCoverage"/>
+            <when value="--baseComp">
+                <param name="baseComp" type="text" value="" label="Genome and step" help="Parameter value is of the form genome,step. Ex: human,1000">
+                    <expand macro="sanitizer_default"/>
+                    <validator type="regex" message="Please enter as genome,step without leading or trailing spaces">^[^\s,](?:[^,]*[^\s,])?,[0-9]+$</validator>
+                </param>
+            </when>
+            <when value="--bedSequences">
+                <param name="bedSequences" type="text" value="" label="Genome name">
+                    <expand macro="sanitizer_default"/>
+                    <expand macro="validator_trim"/>
+                </param>
+            </when>
+            <when value="--topSegments">
+                <param name="topSegments" type="text" value="" label="Genome name">
+                    <expand macro="sanitizer_default"/>
+                    <expand macro="validator_trim"/>
+                </param>
+            </when>
+            <when value="--bottomSegments">
+                <param name="bottomSegments" type="text" value="" label="Genome name">
+                    <expand macro="sanitizer_default"/>
+                    <expand macro="validator_trim"/>
+                </param>
+            </when>
+            <when value="--branchLength">
+                <param name="branchLength" type="text" value="" label="Genome name">
+                    <expand macro="sanitizer_default"/>
+                    <expand macro="validator_trim"/>
+                </param>
+            </when>
+            <when value="--branches"/>
+            <when value="--children">
+                <param name="children" type="text" value="" label="Genome name">
+                    <expand macro="sanitizer_default"/>
+                    <expand macro="validator_trim"/>
+                </param>
+            </when>
+            <when value="--chromSizes">
+                <param name="chromSizes" type="text" value="" label="Genome name">
+                    <expand macro="sanitizer_default"/>
+                    <expand macro="validator_trim"/>
+                </param>
+            </when>
+            <when value="--coverage">
+                <param name="coverage" type="text" value="" label="Genome name">
+                    <expand macro="sanitizer_default"/>
+                    <expand macro="validator_trim"/>
+                </param>
+            </when>
+            <when value="--genomeMetaData">
+                <param name="genomeMetaData" type="text" value="" label="Genome name">
+                    <expand macro="sanitizer_default"/>
+                    <expand macro="validator_trim"/>
+                </param>
+            </when>
+            <when value="--genomes"/>
+            <when value="--metaData"/>
+            <when value="--numSegments">
+                <param name="numSegments" type="text" value="" label="Genome name">
+                    <expand macro="sanitizer_default"/>
+                    <expand macro="validator_trim"/>
+                </param>
+            </when>
+            <when value="--parent">
+                <param name="parent" type="text" value="" label="Genome name">
+                    <expand macro="sanitizer_default"/>
+                    <expand macro="validator_trim"/>
+                </param>
+            </when>
+            <when value="--percentID">
+                <param name="percentID" type="text" value="" label="Genome name" help="Only non-duplicated and unambiguous sites are considered">
+                    <expand macro="sanitizer_default"/>
+                    <expand macro="validator_trim"/>
+                </param>
+            </when>
+            <when value="--root"/>
+            <when value="--sequenceStats">
+                <param name="sequenceStats" type="text" value="" label="Genome name">
+                    <expand macro="sanitizer_default"/>
+                    <expand macro="validator_trim"/>
+                </param>
+            </when>
+            <when value="--sequences">
+                <param name="sequences" type="text" value="" label="Genome name">
+                    <expand macro="sanitizer_default"/>
+                    <expand macro="validator_trim"/>
+                </param>
+            </when>
+            <when value="--span">
+                <param name="span" type="text" value="" label="List of genomes" help="Enter a comma-separated (no spaces) list of genomes">
+                    <expand macro="sanitizer_default"/>
+                    <expand macro="validator_comma_list"/>
+                </param>
+            </when>
+            <when value="--spanRoot">
+                <param name="spanRoot" type="text" value="" label="List of genomes" help="Enter a comma-separated (no spaces) list of genomes">
+                    <expand macro="sanitizer_default"/>
+                    <expand macro="validator_comma_list"/>
+                </param>
+            </when>
+            <when value="--tree"/>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="out_file" format="txt" label="${tool.name} on ${on_string}: Stats">
+            <change_format>
+                <when input="mode.option" value="--numSegments" format="tabular"/>
+                <when input="mode.option" value="--allCoverage" format="tabular"/>
+                <when input="mode.option" value="--sequenceStats" format="tabular"/>
+                <when input="mode.option" value="--percentID" format="tabular"/>
+                <when input="mode.option" value="--coverage" format="tabular"/>
+                <when input="mode.option" value="--chromSizes" format="tabular"/>
+                <when input="mode.option" value="--baseComp" format="tabular"/>
+                <when input="mode.option" value="--metaData" format="tabular"/>
+                <when input="mode.option" value="--genomeMetaData" format="tabular"/>
+                <when input="mode.option" value="--bedSequences" format="bed"/>
+                <when input="mode.option" value="--topSegments" format="bed"/>
+                <when input="mode.option" value="--bottomSegments" format="bed"/>
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <output name="out_file" ftype="txt">
+                <assert_contents>
+                    <has_line line="(Genome_1:1,Genome_2:1,Genome_3:1)Genome_0;"/>
+                    <has_line line="GenomeName, NumChildren, Length, NumSequences, NumTopSegments, NumBottomSegments"/>
+                    <has_line line="Genome_0, 3, 1758, 1, 0, 8"/>
+                    <has_n_lines n="10"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="mode">
+                <param name="option" value="--allCoverage"/>
+            </conditional>
+            <output name="out_file" ftype="tabular">
+                <assert_contents>
+                    <has_line line="FromGenome&#009; ToGenome&#009; sitesCovered1Times&#009; sitesCovered2Times&#009; sitesCovered3Times&#009; sitesCovered4Times&#009; sitesCovered5Times"/>
+                    <has_line line="Genome_1&#009; Genome_1&#009; 5472&#009; 4688&#009; 3516&#009; 2637&#009; 1465"/>
+                    <has_n_lines n="10"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="mode">
+                <param name="option" value="--bedSequences"/>
+                <param name="bedSequences" value="Genome_0"/>
+            </conditional>
+            <output name="out_file" ftype="bed">
+                <assert_contents>
+                    <has_line line="Genome_0_seq&#009;0&#009;1758"/>
+                    <has_n_lines n="1"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="mode">
+                <param name="option" value="--topSegments"/>
+                <param name="topSegments" value="Genome_1"/>
+            </conditional>
+            <output name="out_file" ftype="bed">
+                <assert_contents>
+                    <has_line line="Genome_1_seq&#009;0&#009;293"/>
+                    <has_line line="Genome_1_seq&#009;3223&#009;3399"/>
+                    <has_line line="Genome_1_seq&#009;5274&#009;5472"/>
+                    <has_n_lines n="28"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="mode">
+                <param name="option" value="--bottomSegments"/>
+                <param name="bottomSegments" value="Genome_0"/>
+            </conditional>
+            <output name="out_file" ftype="bed">
+                <assert_contents>
+                    <has_line line="Genome_0_seq&#009;0&#009;293"/>
+                    <has_line line="Genome_0_seq&#009;1033&#009;1172"/>
+                    <has_line line="Genome_0_seq&#009;1465&#009;1758"/>
+                    <has_n_lines n="8"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="mode">
+                <param name="option" value="--tree"/>
+            </conditional>
+            <output name="out_file" ftype="txt">
+                <assert_contents>
+                    <has_line line="(Genome_1:1,Genome_2:1,Genome_3:1)Genome_0;"/>
+                    <has_n_lines n="1"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="mode">
+                <param name="option" value="--spanRoot"/>
+                <param name="spanRoot" value="Genome_0,Genome_1"/>
+            </conditional>
+            <output name="out_file" ftype="txt">
+                <assert_contents>
+                    <has_text text="Genome_0 Genome_1"/>
+                    <has_n_lines n="1"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="mode">
+                <param name="option" value="--sequences"/>
+                <param name="sequences" value="Genome_0"/>
+            </conditional>
+            <output name="out_file" ftype="txt">
+                <assert_contents>
+                    <has_line line="Genome_0_seq"/>
+                    <has_n_lines n="1"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="mode">
+                <param name="option" value="--sequenceStats"/>
+                <param name="sequenceStats" value="Genome_0"/>
+            </conditional>
+            <output name="out_file" ftype="tabular">
+                  <assert_contents>
+                    <has_line line="SequenceName&#009; Length&#009; NumTopSegments&#009; NumBottomSegments"/>
+                    <has_line line="Genome_0_seq&#009; 1758&#009; 0&#009; 8"/>
+                    <has_n_lines n="3"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="mode">
+                <param name="option" value="--root"/>
+            </conditional>
+            <output name="out_file" ftype="txt">
+                <assert_contents>
+                    <has_line line="Genome_0"/>
+                    <has_n_lines n="1"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="mode">
+                <param name="option" value="--parent"/>
+                <param name="parent" value="Genome_1"/>
+            </conditional>
+            <output name="out_file" ftype="txt">
+                <assert_contents>
+                    <has_line line="Genome_0"/>
+                    <has_n_lines n="1"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="mode">
+                <param name="option" value="--percentID"/>
+                <param name="percentID" value="Genome_0"/>
+            </conditional>
+            <output name="out_file" ftype="tabular">
+                <assert_contents>
+                    <has_line line="Genome&#009; % ID&#009; numID&#009; numSites"/>
+                    <has_line line="Genome_0&#009; 1&#009; 1758&#009; 1758"/>
+                    <has_n_lines n="5"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="mode">
+                <param name="option" value="--numSegments"/>
+                <param name="numSegments" value="Genome_1"/>
+            </conditional>
+            <output name="out_file" ftype="tabular">
+                <assert_contents>
+                    <has_line line="numTopSegments&#009;numBottomSegments"/>
+                    <has_line line="28&#009;0"/>
+                    <has_n_lines n="2"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="mode">
+                <param name="option" value="--genomes"/>
+            </conditional>
+            <output name="out_file" ftype="txt">
+                <assert_contents>
+                    <has_line line="Genome_0 Genome_1 Genome_2 Genome_3"/>
+                    <has_n_lines n="1"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="mode">
+                <param name="option" value="--children"/>
+                <param name="children" value="Genome_0"/>
+            </conditional>
+            <output name="out_file" ftype="txt">
+                <assert_contents>
+                    <has_line line="Genome_1 Genome_2 Genome_3"/>    
+                    <has_n_lines n="1"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="mode">
+                <param name="option" value="--chromSizes"/>
+                <param name="chromSizes" value="Genome_1"/>
+            </conditional>
+            <output name="out_file" ftype="tabular">
+                <assert_contents>
+                    <has_line line="Genome_1_seq&#009;5472"/>    
+                    <has_n_lines n="1"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="mode">
+                <param name="option" value="--branches"/>
+            </conditional>
+            <output name="out_file" ftype="txt">
+                <assert_contents>
+                    <has_line line="Genome_1 Genome_2 Genome_3"/>     
+                    <has_n_lines n="1"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="mode">
+                <param name="option" value="--branchLength"/>
+                <param name="branchLength" value="Genome_1"/>
+            </conditional>
+            <output name="out_file" ftype="txt">
+                <assert_contents>
+                    <has_line line="1"/>     
+                    <has_n_lines n="1"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="mode">
+                <param name="option" value="--baseComp"/>
+                <param name="baseComp" value="Genome_0,1000"/>
+            </conditional>
+            <output name="out_file" ftype="tabular">
+                <assert_contents>
+                    <has_line line="fraction_of_As&#009;fraction_of_Gs&#009;fraction_of_Cs&#009;fraction_of_Ts"/>    
+                    <has_line line="0.5&#009;0&#009;0.5&#009;0"/>     
+                    <has_n_lines n="2"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+halStats prints structural and summary information from a HAL file, which must be provided as input.
+It can list genomes, sequences, sizes, and relationships, provides sequence level statistics such as coverage histograms and percent ID, and can export sequence or segment information in BED format. 
+
+It is useful for quick inspection of a HAL file and for extracting per genome or per sequence summaries.
+
+-----
+
+**Output**
+
+The tool generates different output formats based on the selected type of statistic:
+
+- **Tabular** for --coverage, --allCoverage, --sequenceStats, --percentID, --baseComp, --chromSizes, --metaData, --numSegments, or --genomeMetaData
+- **BED** for --bedSequences, --topSegments, or --bottomSegments
+- Plain **text** for all other type of statistics
+
+    ]]></help>
+    <expand macro="citation"/>
+    <expand macro="creator"/>
+</tool>
\ No newline at end of file