Mercurial > repos > iuc > amas_summary

diff amas_summary.xml @ 0:5e15238e9e55 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/amas commit 158ec0e635067d354c425baf14b95cb616fd93c4
author: iuc
date: Tue, 02 Dec 2025 09:26:31 +0000
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/amas_summary.xml	Tue Dec 02 09:26:31 2025 +0000
@@ -0,0 +1,105 @@
+<tool id="amas_summary" name="AMAS summary" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>summarise multiple alignments</description>
+    
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+
+    <xrefs>
+        <xref type="bio.tools">amas</xref>
+    </xrefs>
+
+    <expand macro="requirements" />
+    <expand macro="version_command" />
+
+    <command detect_errors="exit_code"><![CDATA[
+        #import re
+        set -eu;
+
+        @SNIFF_INPUT_FORMAT@
+
+        @CHECK_INTERLEAVED@
+
+        @SYMLINK_INPUTS@
+
+        python -m amas.AMAS
+        summary
+        $by_taxon
+        --in-files
+            @INPUT_FILENAMES@
+        --in-format "\${IN_FORMAT}"
+        --data-type $data_type
+        --cores "\${GALAXY_SLOTS:-1}"
+        $check_align
+    ]]></command>
+
+    <inputs>
+        <param name="input_files" type="data" format="fasta,phylip,nex" label="Sequence(s) to summarise" multiple="true" 
+               help="Provide pre-aligned FASTA/PHYLIP/NEXUS files (DNA or protein); mixes of unaligned reads or contigs will produce meaningless results." />
+        <param argument="--by-taxon" type="boolean" label="Also emit per-taxon summaries" checked="false" truevalue="--by-taxon" falsevalue="" />
+        <expand macro="data_type" />
+        <expand macro="check_align" />
+    </inputs>
+
+    <outputs>
+        <data name="summary_out" from_work_dir="summary.txt" format="txt" label="${tool.name} on ${on_string}: Alignment summary" />
+
+        <collection name="taxon_summaries" type="list" label="${tool.name} on ${on_string}: Per-taxon summaries">
+            <discover_datasets pattern="(?P&lt;name&gt;.+-seq-summary)\.txt" format="txt" />
+        </collection>
+    </outputs>
+
+    <tests>
+        <test expect_num_outputs="2">
+            <param name="input_files" value="inputs/fasta1.fas" />
+            <param name="by_taxon" value="true" />
+            <param name="data_type" value="dna" />
+            <param name="check_align" value="false" />
+            <output name="summary_out" file="outputs/expected_summary.txt" />
+            <output_collection name="taxon_summaries" type="list">
+                <element name="fasta1.fas-seq-summary" file="outputs/expected_taxa_summary.txt" ftype="txt" />
+            </output_collection>
+        </test>
+    </tests>
+
+    <help><![CDATA[
+        **What it does**
+
+        AMAS Summary calculates comprehensive statistics for sequence alignments, providing quality control metrics essential for phylogenomic analyses.
+
+        **Inputs**
+
+        - **Alignment files**: One or more pre-aligned sequence files (FASTA, PHYLIP, or NEXUS format)
+        - **Input format**: Specify the format of your input files
+        - **Data type**: Choose DNA for nucleotide sequences or Protein for amino acid sequences
+        - **Generate per-taxon summaries**: Optionally create detailed statistics for each sequence
+
+        **Outputs**
+
+        1. **Summary table** - Overall statistics for each alignment including:
+            - Number of taxa and alignment length
+            - Total matrix cells and proportion of missing data
+            - Variable sites and parsimony-informative sites
+            - GC content (DNA) or amino acid composition (protein)
+
+        2. **Per-taxon summaries** (optional): Individual statistics for each sequence showing taxon-specific missing data and character frequencies
+
+        **Statistics explained**
+
+        - **Variable sites**: Positions with more than one character state (measures sequence diversity)
+        - **Parsimony-informative sites**: Positions useful for phylogenetic inference (at least 2 taxa share each of 2+ states)
+        - **Missing data**: Proportion of gaps, N's (DNA), or X's (protein)
+        - **Matrix completeness**: Percentage of positions with actual sequence data
+
+        **Use cases**
+
+        - **Quality control**: Identify alignments with excessive missing data
+        - **Alignment comparison**: Compare statistics across multiple genes/loci
+        - **Taxon filtering**: Find sequences with poor coverage
+        - **Publication reporting**: Generate standardized alignment statistics for methods sections
+
+        @AMAS_SHARED_HELP@
+    ]]></help>
+
+    <expand macro="citations" />
+</tool>
\ No newline at end of file