diff amas_split.xml @ 0:b7e4e1487fc6 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/amas commit 158ec0e635067d354c425baf14b95cb616fd93c4
author iuc
date Tue, 02 Dec 2025 09:26:16 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/amas_split.xml	Tue Dec 02 09:26:16 2025 +0000
@@ -0,0 +1,112 @@
+<tool id="amas_split" name="AMAS split" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>split multiple alignments</description>
+    
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <xrefs>
+        <xref type="bio.tools">amas</xref>
+    </xrefs>
+
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+
+    <command detect_errors="exit_code"><![CDATA[
+        #import re
+        set -eu;
+
+        ## Let galaxy sniff input format
+        #set $in_format = $input_file.ext
+        #if $in_format == 'nex'
+            #set $in_format = 'nexus'
+        #end if
+
+        ## Check if inputs are interleaved
+        IN_FORMAT=\$(python '$__tool_directory__/check_interleaved.py' 
+            '${input_file}'
+            --format '${in_format}') &&
+
+        ## Create symlinks with original filename for consistent tests because
+        ##  input filenames are used as str vars
+        #set $safename_input = re.sub('[^\w\-_\.]', '_', $input_file.element_identifier)
+        ln -s '${input_file}' '${safename_input}';
+
+        python -m amas.AMAS
+        split
+        --split-by $split_by
+        $remove_empty
+        --out-format $out_format
+        --in-files $safename_input
+        --in-format "\${IN_FORMAT}"
+        --data-type $data_type
+        --cores "\${GALAXY_SLOTS:-1}"
+        $check_align
+    ]]></command>
+
+    <inputs>
+        <param name="input_file" type="data" format="fasta,phylip,nex" label="Sequence to split" multiple="false" help="Provide pre-aligned FASTA/PHYLIP/NEXUS file (DNA or protein); mixes of unaligned reads or contigs will produce meaningless results." />
+        <expand macro="output_format" label="Select output format for split alignments" />
+        <!-- If amas updates split to handle NEXUS format include nex format here -->
+        <param name="split_by" type="data" format="txt" label="Partitions file for splitting. Note: needs to be a partions file in the Unspecified format (See help section for more information)" 
+               help="A file defining how to split the concatenated alignment into separate gene/locus regions. Each line specifies a partition name and its position range (e.g., 'gene1 = 1-500' for unspecified format). See the help section for more information about partitions." />
+        <param argument="--remove-empty" type="boolean" label="Remove taxa that are entirely missing within a partition" checked="false" truevalue="--remove-empty" falsevalue="" />
+        <expand macro="data_type" />
+        <expand macro="check_align" />
+    </inputs>
+
+    <outputs>
+       <expand macro="collection_outputs" name="split_alignments" />
+    </outputs>
+
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input_file" value="inputs/concat_result.phylip" />
+            <param name="split_by" value="inputs/partitions_concat_unspecified.txt" />
+            <param name="remove_empty" value="true" />
+            <param name="out_format" value="fasta" />
+            <param name="data_type" value="dna" />
+            <param name="check_align" value="false" />
+            <output_collection name="split_alignments_fasta" type="list">
+                <element name="concat_result_p1_concat_1-out.fas" file="outputs/expected_split_partition1.fas" ftype="fasta" />
+                <element name="concat_result_p2_concat_2-out.fas" file="outputs/expected_split_partition2.fas" ftype="fasta" />
+            </output_collection>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+        **What it does**
+
+        AMAS Split divides a concatenated alignment back into separate gene/locus files using a partitions file. This is the reverse operation of AMAS Concat.
+
+        **Inputs**
+
+        - **Concatenated alignment**: A single alignment file containing multiple genes/loci joined end-to-end
+        - **Partitions file**: Defines the boundaries of each gene/locus (.txt file containing Unspecified formatting)
+        - **Input format**: Specify the format of your concatenated alignment
+        - **Data type**: Choose DNA for nucleotide sequences or Protein for amino acid sequences
+        - **Output format**: Select the desired format for the split alignment files
+        - **Remove empty sequences**: Optionally exclude taxa with only gaps/missing data in a partition
+
+        **Outputs**
+
+        A collection of alignment files, one per partition/gene defined in your partitions file.
+
+        @PARTITIONS_HELP@
+
+        **IMPORTANT**: A .txt file containing RAxML, or NEXUS formatting, or a .nex file containing NEXUS formatting that are produced using AMAS Concat will not work.
+
+        **Tip:** An example for your data can be generated using the AMAS concat tool.
+
+        **Use cases**
+
+        - Extract individual gene alignments from a concatenated dataset
+        - Analyze genes separately after joint phylogenetic analysis
+        - Apply gene-specific filtering or trimming
+        - Recover original locus alignments from published concatenated datasets
+
+        @AMAS_SHARED_HELP@
+    ]]></help>
+
+    <expand macro="citations" />
+</tool>
\ No newline at end of file