Mercurial > repos > iuc > hal_halstats
diff hal_halStats.xml @ 0:25dcde5bf94e draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/haltools commit 6244b9d15a5ad97ae20191e2f8fbafe2050c3cac
| author | iuc |
|---|---|
| date | Fri, 06 Feb 2026 10:39:34 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/hal_halStats.xml Fri Feb 06 10:39:34 2026 +0000 @@ -0,0 +1,492 @@ +<tool id="hal_halstats" name="halStats" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> + <description>retrieves basic statistics from a HAL file</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"/> + <expand macro="stdio"/> + <command detect_errors="aggressive"><![CDATA[ + set -o pipefail; ## Sets the pipeline’s exit code to halStats’s on failure. + ( ## echo headers for specific numerical data + #if $mode.option == '--baseComp': + echo -e 'fraction_of_As\tfraction_of_Gs\tfraction_of_Cs\tfraction_of_Ts'; + #else if $mode.option == '--numSegments': + echo -e 'numTopSegments\tnumBottomSegments'; + #end if + halStats + #if $mode.option == '--allCoverage': + --allCoverage + #else if $mode.option == '--branches': + --branches + #else if $mode.option == '--genomes': + --genomes + #else if $mode.option == '--metaData': + --metaData + #else if $mode.option == '--root': + --root + #else if $mode.option == '--tree': + --tree + #else if $mode.option == '--baseComp': + --baseComp '$mode.baseComp' + #else if $mode.option == '--bedSequences': + --bedSequences '$mode.bedSequences' + #else if $mode.option == '--bottomSegments': + --bottomSegments '$mode.bottomSegments' + #else if $mode.option == '--branchLength': + --branchLength '$mode.branchLength' + #else if $mode.option == '--children': + --children '$mode.children' + #else if $mode.option == '--chromSizes': + --chromSizes '$mode.chromSizes' + #else if $mode.option == '--coverage': + --coverage '$mode.coverage' + #else if $mode.option == '--genomeMetaData': + --genomeMetaData '$mode.genomeMetaData' + #else if $mode.option == '--numSegments': + --numSegments '$mode.numSegments' + #else if $mode.option == '--parent': + --parent '$mode.parent' + #else if $mode.option == '--percentID': + --percentID '$mode.percentID' + #else if $mode.option == '--sequenceStats': + --sequenceStats '$mode.sequenceStats' + #else if $mode.option == '--sequences': + --sequences '$mode.sequences' + #else if $mode.option == '--span': + --span '$mode.span' + #else if $mode.option == '--spanRoot': + --spanRoot '$mode.spanRoot' + #else if $mode.option == '--topSegments': + --topSegments '$mode.topSegments' + #end if + '$input_hal' + ## Pipes specific output to replace commas with tabs. Output is mostly numerical, and Genome names contain no commas, as this would invalidate the HAL Newick tree. + #if $mode.option == '--allCoverage' or $mode.option == '--sequenceStats' or $mode.option == '--percentID' or $mode.option == '--coverage': + | tr ',' '\t' + #else if $mode.option == '--numSegments': + | tr ' ' '\t' ## Replace spaces + #end if + ) > '$out_file' + ]]></command> + <inputs> + <expand macro="input_hal"/> + <conditional name="mode"> + <param name="option" type="select" label="Select the type of statistics you are interested in"> + <option value="" selected="true">Basic overview</option> + <option value="--genomes">List of genomes in alignment (--genomes)</option> + <option value="--sequences">List of sequences in a given genome (--sequences)</option> + <option value="--bedSequences">List of sequences in a given genome (in BED format) (--bedSequences)</option> + <option value="--sequenceStats">Stats for each sequence in a given genome (--sequenceStats)</option> + <option value="--tree">Newick tree (--tree)</option> + <option value="--branches">List of branches specified by the child genome (--branches)</option> + <option value="--span">Branches on path (or spanning tree) between given list of genomes (--span)</option> + <option value="--spanRoot">Genomes on path (or spanning tree) with spanning tree root between given list of genomes (--spanRoot)</option> + <option value="--children">Names of children of a given genome (--children)</option> + <option value="--root">Root genome name (--root)</option> + <option value="--parent">Parent name of a given genome (--parent)</option> + <option value="--branchLength">Branch length between a given genome and its parent (--branchLength)</option> + <option value="--numSegments">Number of top and of bottom segments of a given genome (--numSegments)</option> + <option value="--topSegments">Coordinates of all top segments of a given genome (in BED format) (--topSegments)</option> + <option value="--bottomSegments">Coordinates of all bottom segments of a given genome (in BED format) (--bottomSegments)</option> + <option value="--baseComp">Base composition by sampling every step bases (--baseComp)</option> + <option value="--genomeMetaData">Metadata for a given genome (--genomeMetaData)</option> + <option value="--metaData">Metadata for the entire alignment (--metaData)</option> + <option value="--chromSizes">Name and length of each sequence in a given genome (in format used by wigToBigWig) (--chromSizes)</option> + <option value="--percentID">Percent ID of a given genome with all other genomes (--percentID)</option> + <option value="--coverage">Histogram of coverage of a given genome with all genomes (--coverage)</option> + <option value="--allCoverage">Histogram of coverage from all genomes to all genomes (--allCoverage)</option> + </param> + <when value=""/> + <when value="--allCoverage"/> + <when value="--baseComp"> + <param name="baseComp" type="text" value="" label="Genome and step" help="Parameter value is of the form genome,step. Ex: human,1000"> + <expand macro="sanitizer_default"/> + <validator type="regex" message="Please enter as genome,step without leading or trailing spaces">^[^\s,](?:[^,]*[^\s,])?,[0-9]+$</validator> + </param> + </when> + <when value="--bedSequences"> + <param name="bedSequences" type="text" value="" label="Genome name"> + <expand macro="sanitizer_default"/> + <expand macro="validator_trim"/> + </param> + </when> + <when value="--topSegments"> + <param name="topSegments" type="text" value="" label="Genome name"> + <expand macro="sanitizer_default"/> + <expand macro="validator_trim"/> + </param> + </when> + <when value="--bottomSegments"> + <param name="bottomSegments" type="text" value="" label="Genome name"> + <expand macro="sanitizer_default"/> + <expand macro="validator_trim"/> + </param> + </when> + <when value="--branchLength"> + <param name="branchLength" type="text" value="" label="Genome name"> + <expand macro="sanitizer_default"/> + <expand macro="validator_trim"/> + </param> + </when> + <when value="--branches"/> + <when value="--children"> + <param name="children" type="text" value="" label="Genome name"> + <expand macro="sanitizer_default"/> + <expand macro="validator_trim"/> + </param> + </when> + <when value="--chromSizes"> + <param name="chromSizes" type="text" value="" label="Genome name"> + <expand macro="sanitizer_default"/> + <expand macro="validator_trim"/> + </param> + </when> + <when value="--coverage"> + <param name="coverage" type="text" value="" label="Genome name"> + <expand macro="sanitizer_default"/> + <expand macro="validator_trim"/> + </param> + </when> + <when value="--genomeMetaData"> + <param name="genomeMetaData" type="text" value="" label="Genome name"> + <expand macro="sanitizer_default"/> + <expand macro="validator_trim"/> + </param> + </when> + <when value="--genomes"/> + <when value="--metaData"/> + <when value="--numSegments"> + <param name="numSegments" type="text" value="" label="Genome name"> + <expand macro="sanitizer_default"/> + <expand macro="validator_trim"/> + </param> + </when> + <when value="--parent"> + <param name="parent" type="text" value="" label="Genome name"> + <expand macro="sanitizer_default"/> + <expand macro="validator_trim"/> + </param> + </when> + <when value="--percentID"> + <param name="percentID" type="text" value="" label="Genome name" help="Only non-duplicated and unambiguous sites are considered"> + <expand macro="sanitizer_default"/> + <expand macro="validator_trim"/> + </param> + </when> + <when value="--root"/> + <when value="--sequenceStats"> + <param name="sequenceStats" type="text" value="" label="Genome name"> + <expand macro="sanitizer_default"/> + <expand macro="validator_trim"/> + </param> + </when> + <when value="--sequences"> + <param name="sequences" type="text" value="" label="Genome name"> + <expand macro="sanitizer_default"/> + <expand macro="validator_trim"/> + </param> + </when> + <when value="--span"> + <param name="span" type="text" value="" label="List of genomes" help="Enter a comma-separated (no spaces) list of genomes"> + <expand macro="sanitizer_default"/> + <expand macro="validator_comma_list"/> + </param> + </when> + <when value="--spanRoot"> + <param name="spanRoot" type="text" value="" label="List of genomes" help="Enter a comma-separated (no spaces) list of genomes"> + <expand macro="sanitizer_default"/> + <expand macro="validator_comma_list"/> + </param> + </when> + <when value="--tree"/> + </conditional> + </inputs> + <outputs> + <data name="out_file" format="txt" label="${tool.name} on ${on_string}: Stats"> + <change_format> + <when input="mode.option" value="--numSegments" format="tabular"/> + <when input="mode.option" value="--allCoverage" format="tabular"/> + <when input="mode.option" value="--sequenceStats" format="tabular"/> + <when input="mode.option" value="--percentID" format="tabular"/> + <when input="mode.option" value="--coverage" format="tabular"/> + <when input="mode.option" value="--chromSizes" format="tabular"/> + <when input="mode.option" value="--baseComp" format="tabular"/> + <when input="mode.option" value="--metaData" format="tabular"/> + <when input="mode.option" value="--genomeMetaData" format="tabular"/> + <when input="mode.option" value="--bedSequences" format="bed"/> + <when input="mode.option" value="--topSegments" format="bed"/> + <when input="mode.option" value="--bottomSegments" format="bed"/> + </change_format> + </data> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <output name="out_file" ftype="txt"> + <assert_contents> + <has_line line="(Genome_1:1,Genome_2:1,Genome_3:1)Genome_0;"/> + <has_line line="GenomeName, NumChildren, Length, NumSequences, NumTopSegments, NumBottomSegments"/> + <has_line line="Genome_0, 3, 1758, 1, 0, 8"/> + <has_n_lines n="10"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <conditional name="mode"> + <param name="option" value="--allCoverage"/> + </conditional> + <output name="out_file" ftype="tabular"> + <assert_contents> + <has_line line="FromGenome	 ToGenome	 sitesCovered1Times	 sitesCovered2Times	 sitesCovered3Times	 sitesCovered4Times	 sitesCovered5Times"/> + <has_line line="Genome_1	 Genome_1	 5472	 4688	 3516	 2637	 1465"/> + <has_n_lines n="10"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <conditional name="mode"> + <param name="option" value="--bedSequences"/> + <param name="bedSequences" value="Genome_0"/> + </conditional> + <output name="out_file" ftype="bed"> + <assert_contents> + <has_line line="Genome_0_seq	0	1758"/> + <has_n_lines n="1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <conditional name="mode"> + <param name="option" value="--topSegments"/> + <param name="topSegments" value="Genome_1"/> + </conditional> + <output name="out_file" ftype="bed"> + <assert_contents> + <has_line line="Genome_1_seq	0	293"/> + <has_line line="Genome_1_seq	3223	3399"/> + <has_line line="Genome_1_seq	5274	5472"/> + <has_n_lines n="28"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <conditional name="mode"> + <param name="option" value="--bottomSegments"/> + <param name="bottomSegments" value="Genome_0"/> + </conditional> + <output name="out_file" ftype="bed"> + <assert_contents> + <has_line line="Genome_0_seq	0	293"/> + <has_line line="Genome_0_seq	1033	1172"/> + <has_line line="Genome_0_seq	1465	1758"/> + <has_n_lines n="8"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <conditional name="mode"> + <param name="option" value="--tree"/> + </conditional> + <output name="out_file" ftype="txt"> + <assert_contents> + <has_line line="(Genome_1:1,Genome_2:1,Genome_3:1)Genome_0;"/> + <has_n_lines n="1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <conditional name="mode"> + <param name="option" value="--spanRoot"/> + <param name="spanRoot" value="Genome_0,Genome_1"/> + </conditional> + <output name="out_file" ftype="txt"> + <assert_contents> + <has_text text="Genome_0 Genome_1"/> + <has_n_lines n="1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <conditional name="mode"> + <param name="option" value="--sequences"/> + <param name="sequences" value="Genome_0"/> + </conditional> + <output name="out_file" ftype="txt"> + <assert_contents> + <has_line line="Genome_0_seq"/> + <has_n_lines n="1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <conditional name="mode"> + <param name="option" value="--sequenceStats"/> + <param name="sequenceStats" value="Genome_0"/> + </conditional> + <output name="out_file" ftype="tabular"> + <assert_contents> + <has_line line="SequenceName	 Length	 NumTopSegments	 NumBottomSegments"/> + <has_line line="Genome_0_seq	 1758	 0	 8"/> + <has_n_lines n="3"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <conditional name="mode"> + <param name="option" value="--root"/> + </conditional> + <output name="out_file" ftype="txt"> + <assert_contents> + <has_line line="Genome_0"/> + <has_n_lines n="1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <conditional name="mode"> + <param name="option" value="--parent"/> + <param name="parent" value="Genome_1"/> + </conditional> + <output name="out_file" ftype="txt"> + <assert_contents> + <has_line line="Genome_0"/> + <has_n_lines n="1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <conditional name="mode"> + <param name="option" value="--percentID"/> + <param name="percentID" value="Genome_0"/> + </conditional> + <output name="out_file" ftype="tabular"> + <assert_contents> + <has_line line="Genome	 % ID	 numID	 numSites"/> + <has_line line="Genome_0	 1	 1758	 1758"/> + <has_n_lines n="5"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <conditional name="mode"> + <param name="option" value="--numSegments"/> + <param name="numSegments" value="Genome_1"/> + </conditional> + <output name="out_file" ftype="tabular"> + <assert_contents> + <has_line line="numTopSegments	numBottomSegments"/> + <has_line line="28	0"/> + <has_n_lines n="2"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <conditional name="mode"> + <param name="option" value="--genomes"/> + </conditional> + <output name="out_file" ftype="txt"> + <assert_contents> + <has_line line="Genome_0 Genome_1 Genome_2 Genome_3"/> + <has_n_lines n="1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <conditional name="mode"> + <param name="option" value="--children"/> + <param name="children" value="Genome_0"/> + </conditional> + <output name="out_file" ftype="txt"> + <assert_contents> + <has_line line="Genome_1 Genome_2 Genome_3"/> + <has_n_lines n="1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <conditional name="mode"> + <param name="option" value="--chromSizes"/> + <param name="chromSizes" value="Genome_1"/> + </conditional> + <output name="out_file" ftype="tabular"> + <assert_contents> + <has_line line="Genome_1_seq	5472"/> + <has_n_lines n="1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <conditional name="mode"> + <param name="option" value="--branches"/> + </conditional> + <output name="out_file" ftype="txt"> + <assert_contents> + <has_line line="Genome_1 Genome_2 Genome_3"/> + <has_n_lines n="1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <conditional name="mode"> + <param name="option" value="--branchLength"/> + <param name="branchLength" value="Genome_1"/> + </conditional> + <output name="out_file" ftype="txt"> + <assert_contents> + <has_line line="1"/> + <has_n_lines n="1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="1"> + <param name="input_hal" value="halTest.hal"/> + <conditional name="mode"> + <param name="option" value="--baseComp"/> + <param name="baseComp" value="Genome_0,1000"/> + </conditional> + <output name="out_file" ftype="tabular"> + <assert_contents> + <has_line line="fraction_of_As	fraction_of_Gs	fraction_of_Cs	fraction_of_Ts"/> + <has_line line="0.5	0	0.5	0"/> + <has_n_lines n="2"/> + </assert_contents> + </output> + </test> + </tests> + <help><![CDATA[ +halStats prints structural and summary information from a HAL file, which must be provided as input. +It can list genomes, sequences, sizes, and relationships, provides sequence level statistics such as coverage histograms and percent ID, and can export sequence or segment information in BED format. + +It is useful for quick inspection of a HAL file and for extracting per genome or per sequence summaries. + +----- + +**Output** + +The tool generates different output formats based on the selected type of statistic: + +- **Tabular** for --coverage, --allCoverage, --sequenceStats, --percentID, --baseComp, --chromSizes, --metaData, --numSegments, or --genomeMetaData +- **BED** for --bedSequences, --topSegments, or --bottomSegments +- Plain **text** for all other type of statistics + + ]]></help> + <expand macro="citation"/> + <expand macro="creator"/> +</tool> \ No newline at end of file
