diff hal_hal2maf.xml @ 0:a3ff8225d554 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/haltools commit 6244b9d15a5ad97ae20191e2f8fbafe2050c3cac
author iuc
date Fri, 06 Feb 2026 10:38:29 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hal_hal2maf.xml	Fri Feb 06 10:38:29 2026 +0000
@@ -0,0 +1,333 @@
+<tool id="hal_hal2maf" name="hal2maf" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>converts HAL to MAF</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/> 
+    <expand macro="stdio"/>
+    <command detect_errors="aggressive"><![CDATA[
+        #if $file.append == 'true':
+            ## If an input MAF file is provided to append to, ensure that it is not modified.
+            cp '$input_maf' writable_maf.maf && 
+        #end if
+        hal2maf
+            #if $file.append == 'true':
+                --append
+            #end if 
+            #if $genome.reference == 'custom':
+                --refGenome '$genome.refGenome'
+                $genome.noAncestors
+            #end if 
+            #if $filter.option == '--targetGenomes':
+                --targetGenomes '$filter.targetGenomes'
+            #else if $filter.option == '--rootGenome':
+                --rootGenome '$filter.rootGenome'
+            #end if 
+            #if $convert.mode == 'sequence':
+                --refSequence '$convert.sequence'
+                --start $convert.start
+                --length $convert.length
+            #else if $convert.mode == 'target':
+                --refTargets '$convert.refTargets'
+            #else if $convert.mode == 'global':
+                --global
+            #end if 
+            --maxRefGap $maxRefGap
+            --maxBlockLen $maxBlockLen
+            $noDupes
+            $onlySequenceNames
+            $unique
+            $printTree
+            $onlyOrthologs
+            $keepEmptyRefBlocks
+            '$input_hal' writable_maf.maf
+            #if $compression.type == 'gz':
+                && gzip -c writable_maf.maf > '$output_file'
+            #else if $compression.type == 'bz2':
+                && bzip2 -c writable_maf.maf > '$output_file' 
+            #else:
+                && mv writable_maf.maf '$output_file'
+            #end if
+    ]]></command>
+    <inputs>
+        <expand macro="input_hal"/>
+        <expand macro="params_conditional_refGenome"/>
+        <conditional name="filter">
+            <param name="option" type="select" label="Genomes to include in the output">
+                <option value="" selected="true">All genomes (default)</option>
+                <option value="--targetGenomes">Specific target genomes (--targetGenomes)</option>
+                <option value="--rootGenome">Only genomes in the subtree to a root (--rootGenome)</option>
+            </param>
+            <when value=""/>
+            <when value="--targetGenomes">
+                <expand macro="params_targetGenomes"/>
+            </when>
+            <when value="--rootGenome">
+                <expand macro="params_rootGenome"/>
+            </when>
+        </conditional>
+        <conditional name="convert">
+            <param name="mode" type="select" label="Convert options">
+                <option value="" selected="true">All sequences (default)</option>
+                <option value="sequence">A reference sequence (--refSequence)</option>
+                <option value="target">Using reference intervals from a BED file (--refTargets)</option>
+                <option value="global">All columns in alignment, ignoring reference-based filters (--global)</option>
+            </param>
+            <when value=""/>
+            <when value="sequence">
+                <expand macro="params_sequence"/>
+                <expand macro="params_start"/>
+                <expand macro="params_length"/>
+            </when>
+            <when value="target">
+                <param name="refTargets" type="data" format="bed" label="BED file" help="BED file with reference genome intervals to convert. Only alignment columns overlapping these intervals are included"/>
+            </when>
+            <when value="global"/>
+        </conditional>
+        <conditional name="file">
+            <param name="append" type="select" label="Append new alignment to an existing MAF file">
+                <option value="false" selected="true">No, create a new MAF file (default)</option>
+                <option value="true">Yes, append to an existing MAF file (--append)</option>
+            </param>
+            <when value="true">
+                <param name="input_maf" type="data" format="maf" label="MAF file"/>
+            </when>
+            <when value="false"/>
+        </conditional>
+        <param argument="--maxRefGap" type="integer" min="0" value="0" label="Maximum gap length" help="Maximum gap length in reference genome"/>
+        <param argument="--maxBlockLen" type="integer" min="0" value="1000" label="Maximum block length" help="Maximum length of MAF block in output"/>
+        <param argument="--noDupes" type="boolean" truevalue="--noDupes" falsevalue="" checked="false" label="Ignore paralogy edges" help="Ignore paralogy edges"/>
+        <expand macro="params_onlySequenceNames"/>
+        <param argument="--unique" type="boolean" truevalue="--unique" falsevalue="" checked="false" label="Unique" help="Only write column whose left-most reference coordinate is in the specified range. This is used to ensure that the same column isn't sampled twice (due to duplications) by MAFs generated on distinct ranges"/>
+        <param argument="--printTree" type="boolean" truevalue="--printTree" falsevalue="" checked="false" label="Print Tree" help="Print a gene tree for every block"/>
+        <param argument="--onlyOrthologs" type="boolean" truevalue="--onlyOrthologs" falsevalue="" checked="false" label="Only Orthologs" help="Make only orthologs to the reference appear in the MAF blocks"/>
+        <param argument="--keepEmptyRefBlocks" type="boolean" truevalue="--global" falsevalue="" checked="false" label="Keep empty reference blocks" help="Keep blocks that contain no reference sequence"/>
+        <expand macro="params_conditional_compression"/>
+    </inputs>
+    <outputs>
+        <data name="output_file" format="maf" label="${tool.name} on ${on_string}: MAF file">
+            <change_format>
+                <when input="compression.type" value="gz" format="maf.gz"/>
+                <when input="compression.type" value="bz2" format="maf.bz2"/>
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="filter">
+                <param name="option" value=""/>
+            </conditional>
+            <output name="output_file" ftype="maf">
+                <assert_contents>
+                    <has_line line="##maf version=1 scoring=N/A"/>   
+                    <has_line line="# hal (Genome_1:1,Genome_2:1,Genome_3:1)Genome_0;"/>    
+                    <has_line line="a"/>    
+                    <has_text text="s&#009;Genome_0.Genome_0_seq&#009;293&#009;293&#009;+&#009;1758"/>
+                    <has_text text="s&#009;Genome_1.Genome_1_seq&#009;4571&#009;117&#009;+&#009;5472"/>
+                    <has_n_lines n="90"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="filter">
+                <param name="option" value=""/>
+            </conditional>
+             <conditional name="compression">
+                <param name="type" value="gz"/>
+            </conditional>
+            <output name="output_file" ftype="maf.gz" file="hal2maf_output.maf.gz"/>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="filter">
+                <param name="option" value=""/>
+            </conditional>
+             <conditional name="compression">
+                <param name="type" value="bz2"/>
+            </conditional>
+            <output name="output_file" ftype="maf.bz2" file="hal2maf_output.maf.bz2"/>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="filter">
+                <param name="option" value="--rootGenome"/>
+                <param name="rootGenome" value="Genome_1"/>
+            </conditional>
+            <param name="printTree" value="true"/>
+            <param name="onlySequenceNames" value="true"/>
+            <output name="output_file" ftype="maf">
+                <assert_contents>
+                    <has_line line="##maf version=1 scoring=N/A"/>   
+                    <has_line line="# hal (Genome_1:1,Genome_2:1,Genome_3:1)Genome_0;"/>   
+                    <has_text text="a tree="/>  
+                    <has_text text="Genome_1_seq,Genome_1_seq,Genome_2,Genome_3,Genome_3)Genome_0_seq;"/>    
+                    <has_text text="Genome_0_seq&#009;293&#009;293&#009;+&#009;1758"/>
+                    <has_text text="s&#009;Genome_1_seq&#009;4864&#009;117&#009;+&#009;5472"/>
+                    <has_n_lines n="52"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <param name="maxBlockLen" value="200"/>
+            <param name="noDupes" value="true"/>
+            <output name="output_file" ftype="maf">
+                <assert_contents>
+                    <has_line line="##maf version=1 scoring=N/A"/>   
+                    <has_line line="# hal (Genome_1:1,Genome_2:1,Genome_3:1)Genome_0;"/>   
+                    <has_line line="a"/>    
+                    <has_text text="s&#009;Genome_3.Genome_3_seq&#009;4102&#009;200&#009;+&#009;6139"/>    
+                    <has_text text="s&#009;Genome_1.Genome_1_seq&#009;3130&#009;93&#009;+&#009;5472"/>
+                    <has_text text="s&#009;Genome_0.Genome_0_seq&#009;1748&#009;10&#009;+&#009;1758&#009;CGCGTCGGCG"/>
+                    <has_n_lines n="69"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="genome">
+                <param name="reference" value="custom"/>
+                <param name="refGenome" value="Genome_1"/>
+                <param name="noAncestors" value="true"/>
+            </conditional>
+            <param name="onlyOrthologs" value="true"/>
+            <output name="output_file" ftype="maf">
+                <assert_contents>
+                    <has_line line="##maf version=1 scoring=N/A"/>   
+                    <has_line line="# hal (Genome_1:1,Genome_2:1,Genome_3:1)Genome_0;"/>   
+                    <has_line line="a"/>    
+                    <has_text text="s&#009;Genome_1.Genome_1_seq&#009;5135&#009;337&#009;+&#009;5472"/>    
+                    <has_text text="s&#009;Genome_2.Genome_2_seq&#009;2498&#009;139&#009;+&#009;4270"/>
+                    <has_text text="s&#009;Genome_3.Genome_3_seq&#009;4571&#009;117&#009;+&#009;6139"/>
+                    <has_n_lines n="230"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="genome">
+                <param name="reference" value="custom"/>
+                <param name="refGenome" value="Genome_1"/>
+            </conditional>
+            <param name="unique" value="true"/>
+            <output name="output_file" ftype="maf">
+                <assert_contents>
+                    <has_line line="##maf version=1 scoring=N/A"/>   
+                    <has_line line="# hal (Genome_1:1,Genome_2:1,Genome_3:1)Genome_0;"/>   
+                    <has_line line="a"/>    
+                    <has_text text="s&#009;Genome_3.Genome_3_seq&#009;1033&#009;139&#009;+&#009;6139"/>    
+                    <has_text text="s&#009;Genome_0.Genome_0_seq&#009;1348&#009;117&#009;+&#009;1758"/>
+                    <has_text text="s&#009;Genome_1.Genome_1_seq&#009;5274&#009;198&#009;+&#009;5472&#009;NNNNNNNNNNNNNNNN"/>
+                    <has_n_lines n="96"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="genome">
+                <param name="reference" value="custom"/>
+                <param name="refGenome" value="Genome_1"/>
+            </conditional>
+            <param name="maxRefGap" value="2000"/>
+            <output name="output_file" ftype="maf">
+                <assert_contents>
+                    <has_line line="##maf version=1 scoring=N/A"/>   
+                    <has_line line="# hal (Genome_1:1,Genome_2:1,Genome_3:1)Genome_0;"/>  
+                    <has_line line="a"/>    
+                    <has_text text="s&#009;Genome_2.Genome_2_seq&#009;2344&#009;154&#009;+&#009;4270"/>    
+                    <has_text text="s&#009;Genome_1.Genome_1_seq&#009;1465&#009;293&#009;+&#009;5472&#009;----------------"/>
+                    <has_text text="s&#009;Genome_1.Genome_1_seq&#009;3516&#009;293&#009;+&#009;5472&#009;NNNNNNNNNNNNNNNN"/>
+                    <has_n_lines n="101"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="convert">
+                <param name="mode" value="sequence"/>
+                <param name="sequence" value="Genome_0_seq"/>
+                <param name="start" value="4"/>
+                <param name="length" value="10"/>
+            </conditional>
+            <output name="output_file" ftype="maf">
+                <assert_contents>
+                    <has_line line="##maf version=1 scoring=N/A"/>   
+                    <has_line line="# hal (Genome_1:1,Genome_2:1,Genome_3:1)Genome_0;"/>  
+                    <has_line line="a"/>    
+                    <has_line line="s&#009;Genome_0.Genome_0_seq&#009;4&#009;10&#009;+&#009;1758&#009;TCGGGGGGGA"/>    
+                    <has_line line="s&#009;Genome_2.Genome_2_seq&#009;4&#009;10&#009;+&#009;4270&#009;TCGGGGGGGA"/>    
+                    <has_line line="s&#009;Genome_3.Genome_3_seq&#009;4&#009;10&#009;+&#009;6139&#009;TCGGGGGGGA"/>    
+                    <has_n_lines n="11"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="file">
+                <param name="append" value="true"/>
+                <param name="input_maf" value="hal2maf_append.maf"/>
+            </conditional>
+            <conditional name="filter">
+                <param name="option" value="--targetGenomes"/>
+                <param name="targetGenomes" value="Genome_2,Genome_3"/>
+            </conditional>
+            <output name="output_file" ftype="maf">
+                <assert_contents>
+                    <has_line line="##maf version=1 scoring=N/A"/>   
+                    <has_line line="# hal (Genome_1:1,Genome_2:1,Genome_3:1)Genome_0;"/>  
+                    <has_line line="a"/>    
+                    <has_text text="s&#009;Genome_1.Genome_1_seq&#009;0&#009;293&#009;+&#009;5472"/>    
+                    <has_text text="s&#009;Genome_3.Genome_3_seq&#009;4395&#009;176&#009;+&#009;6139"/>    
+                    <has_text text="s&#009;Genome_1.Genome_1_seq&#009;3223&#009;176&#009;+&#009;5472"/>    
+                    <has_n_lines n="152"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="convert">
+                <param name="mode" value="global"/>
+            </conditional>
+            <output name="output_file" ftype="maf">
+                <assert_contents>
+                    <has_line line="##maf version=1 scoring=N/A"/>   
+                    <has_line line="# hal (Genome_1:1,Genome_2:1,Genome_3:1)Genome_0;"/>  
+                    <has_line line="a"/>    
+                    <has_text text="s&#009;Genome_1.Genome_1_seq&#009;3516&#009;293&#009;+&#009;5472&#009;NNNNNNNNNNNN"/>    
+                    <has_text text="s&#009;Genome_2.Genome_2_seq&#009;1465&#009;293&#009;+&#009;4270&#009;------------"/>    
+                    <has_text text="s&#009;Genome_3.Genome_3_seq&#009;4571&#009;117&#009;+&#009;6139"/>    
+                    <has_n_lines n="113"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="1">
+            <param name="input_hal" value="halTest.hal"/>
+            <conditional name="convert">
+                <param name="mode" value="target"/>
+                <param name="refTargets" value="hal2maf.bed"/>
+            </conditional>
+            <output name="output_file" ftype="maf">
+                <assert_contents>
+                    <has_line line="##maf version=1 scoring=N/A"/>   
+                    <has_line line="# hal (Genome_1:1,Genome_2:1,Genome_3:1)Genome_0;"/>  
+                    <has_line line="a"/>    
+                    <has_text text="s&#009;Genome_0.Genome_0_seq&#009;0&#009;293&#009;+&#009;1758"/>    
+                    <has_text text="s&#009;Genome_2.Genome_2_seq&#009;3516&#009;293&#009;+&#009;4270"/>    
+                    <has_text text="s&#009;Genome_3.Genome_3_seq&#009;586&#009;293&#009;+&#009;6139"/>    
+                    <has_n_lines n="24"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help><![CDATA[
+hal2maf converts an input HAL alignment into a MAF file as output. 
+The tool supports converting the full alignment or restricting output to a selected reference genome, reference sequence, a set of target genomes, or only genomes belonging to the subtree rooted at a root genome.
+
+Additional controls allow filtering for orthologs, excluding ancestral sequences, keeping empty reference blocks, limiting block size, and controlling reference gaps. 
+MAF blocks can also include gene tree annotations if requested.
+    ]]></help>
+    <expand macro="citation"/>
+    <expand macro="creator"/>
+</tool>
\ No newline at end of file