panta: panta.xml comparison

comparison panta.xml @ 0:72296762b4f1 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/panta/ commit 9b05e32c37a0825eb503df9daaf39b9c48e07c5b

author	iuc
date	Mon, 15 Sep 2025 11:40:14 +0000
parents
children	b50893534705

comparison

equal deleted inserted replaced

--1:000000000000
+:72296762b4f1
+<tool id="panta" name="PanTA" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
+<description>Efficient inference of large prokaryotic pangenomes with PanTA</description>
+<macros>
+<import>macros.xml</import>
+</macros>
+<expand macro="requirements"/>
+<command detect_errors="exit_code"><![CDATA[
+mkdir out &&
+#import re, os
+#set input_directory = 'input_directory'
+mkdir $input_directory &&
+#if $input_type.input_type_selector == "gff":
+#for gff in $input_type.input_gff
+#set identifier = re.sub('[^\s\w\-\\.]','_',str($gff.element_identifier))
+ln -fs '$gff' '$input_directory/$identifier' &&
+#end for
+#elif $input_type.input_type_selector == "tsv":
+#set identifier = re.sub('[^\s\w\-\\.]','_',str($input_type.input_tsv.element_identifier))
+ln -fs '$input_type.input_tsv' '$input_directory/$identifier' &&
+#end if
+#if $mode.select_mode == "main":
+panta main
+#if $input_type.input_type_selector == "gff":
+-g $input_directory/*.gff
+#elif $input_type.input_type_selector == "tsv":
+-f $input_directory/*.tsv
+#end if
+-o out
+$dont_split
+--blast '$blast'
+--identity '$identity'
+--LD '$LD'
+--AL '$AL'
+--AS '$AS'
+--evalue '$evalue'
+--threads "\${GALAXY_SLOTS:-8}"
+--table '$table'
+#if $alignment != 'None':
+--alignment '$alignment'
+#end if
+&& tar -czf collection_dir.tar.gz -C out .
+#elif $mode.select_mode == "add":
+mkdir -p extracted_dir &&
+ln -s $collection_dir collection_dir.tar.gz &&
+tar --strip-components=1 -xzf collection_dir.tar.gz -C extracted_dir &&
+panta add
+#if $input_type.input_type_selector == "gff":
+-g $input_directory/*.gff
+#elif $input_type.input_type_selector == "tsv":
+-f $input_directory/*.tsv
+#end if
+-c extracted_dir
+$dont_split
+--blast '$blast'
+--identity '$identity'
+--LD '$LD'
+--AL '$AL'
+--AS '$AS'
+--evalue '$evalue'
+--threads "\${GALAXY_SLOTS:-8}"
+--table '$table'
+#if $alignment != 'None':
+--alignment '$alignment'
+#end if
+&& cp -r extracted_dir/* out
+#end if
+]]></command>
+<inputs>
+<conditional name="mode">
+<param label="Select mode" name="select_mode" type="select">
+<option selected="true" value="main">Use PanTA main</option>
+<option value="add">Use PanTA add</option>
+</param>
+<when value="main"/>
+<when value="add">
+<param name="collection_dir" type="data" format="tar,tar.gz" label="Previous collection directory"/>
+</when>
+</conditional>
+<conditional name="input_type">
+<param name="input_type_selector" type="select" label="Choose the input format">
+<option value="gff" selected="true">GFF File</option>
+<option value="tsv">TSV File</option>
+</param>
+<when value="gff">
+<param type="data_collection" name="input_gff" format="gff3" collection_type="list" label="Select input files to analyze" help="Select the files you wish to analyze with PanTA"/>
+</when>
+<when value="tsv">
+<param type="data" name="input_tsv" format="tsv,tabular" multiple="false" label="Select input file to analyze" help="Select the file you wish to analyze with PanTA"/>
+</when>
+</conditional>
+<param argument="--dont-split" type="boolean" truevalue="--dont-split" falsevalue="" label="Dont split" help="Decide for or against splitting paralog clusters"/>
+<param argument="--blast" type="select" label="Alignment method" help="Method for all-against-all alignment (default: diamond)">
+<option value="diamond" selected="True">Diamond</option>
+<option value="blast">Blast</option>
+</param>
+<param argument="--identity" type="float" value="0.7" label="Minimum percentage identity" help="Set the minimum percentage identity"/>
+<param argument="--LD" type="float" value="0.7" label="Length difference cutoff" help="Set the length difference cutoff between two sequences"/>
+<param argument="--AL" type="float" value="0" label="Alignment coverage for the longer sequence" help="Set the alignment coverage for the longer sequence"/>
+<param argument="--AS" type="float" value="0" label="Alignment coverage for the shorter sequence" help="Set the alignment coverage for the shorter sequence"/>
+<param argument="--evalue" type="float" value="1e-06" label="Blast evalue" help="Maximum expected value for reporting hits and lower values are stricter"/>
+<param argument="--table" type="integer" label="Codon table" help="Set the codon table"/>
+<param argument="--alignment" type="select" label="Run alignment for each gene cluster">
+<option value="None" selected="True">None</option>
+<option value="nucleotide">Nucleotide</option>
+<option value="protein">Protein</option>
+</param>
+</inputs>
+<outputs>
+<!--Basic PanTA main outputs -->
+<data format="json" name="annotated_clusters" label="${tool.name} on ${on_string} : Annotated Clusters" from_work_dir="out/annotated_clusters.json"/>
+<data format="tsv" name="blast_output" label="${tool.name} on ${on_string} : BLAST" from_work_dir="out/blast.tsv"/>
+<data format="json" name="clusters" label="${tool.name} on ${on_string} : Clusters" from_work_dir="out/clusters.json"/>
+<data format="csv" name="gene_annotation" label="${tool.name} on ${on_string} : Gene Annotation" from_work_dir="out/gene_annotation.csv"/>
+<data format="csv" name="gene_position" label="${tool.name} on ${on_string} : Gene Position" from_work_dir="out/gene_position.csv"/>
+<data format="csv" name="gene_presence_absence" label="${tool.name} on ${on_string} : Gene Presence Absence" from_work_dir="out/gene_presence_absence.csv"/>
+<data format="txt" name="gene_presence_absence_Rtab" label="${tool.name} on ${on_string} : Gene Presence Absence Rtab" from_work_dir="out/gene_presence_absence.Rtab"/>
+<data format="fasta" name="representative_clusters_nucl" label="${tool.name} on ${on_string} : Representative Clusters Nucl" from_work_dir="out/representative_clusters_nucl.fasta"/>
+<data format="fasta" name="representative_clusters_prot" label="${tool.name} on ${on_string} : Representative Clusters Prot" from_work_dir="out/representative_clusters_prot.fasta"/>
+<data format="fasta" name="representative" label="${tool.name} on ${on_string} : Representative FASTA" from_work_dir="out/representative.fasta"/>
+<data format="json" name="samples" label="${tool.name} on ${on_string} : Samples" from_work_dir="out/samples.json"/>
+<data format="txt" name="summary_statistics" label="${tool.name} on ${on_string} : Summary Statistics" from_work_dir="out/summary_statistics.txt"/>
+<!--Alignment outputs -->
+<data format="txt" name="core_gene_alignment" label="${tool.name} on ${on_string} : Core Gene Alignment" from_work_dir="out/core_gene_alignment.aln.gz">
+<filter> ['alignment'] != 'None' </filter>
+</data>
+<data format="txt" name="pan_genome_reference" label="${tool.name} on ${on_string} : Pan Genome Reference" from_work_dir="out/pan_genome_reference.fna">
+<filter> ['alignment'] != 'None' </filter>
+</data>
+</outputs>
+<tests>
+<!--Test 01: Basic PanTA main test for gff files: Tests the basic command for PanTA main -->
+<test expect_num_outputs="14">
+<conditional name="mode">
+<param name="select_mode" value="main"/>
+</conditional>
+<conditional name="input_type">
+<param name="input_type_selector" value="gff"/>
+<param name="input_gff">
+<collection type="list">
+<element name="GCA_021342655.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021342655.1.gff"/>
+<element name="GCA_021534865.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021534865.1.gff"/>
+<element name="GCA_021697815.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021697815.1.gff"/>
+<element name="GCA_021890555.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021890555.1.gff"/>
+</collection>
+</param>
+</conditional>
+<param name="table" value="10"/>
+<output name="annotated_clusters" ftype="json">
+<assert_contents>
+<has_text text="GCA_021342655.1-NZ_JAJTPH010000093.1-5475-cds-WP_000557454.1"/>
+<has_n_lines n="96508" delta='3'/>
+</assert_contents>
+</output>
+<output name="blast_output" ftype="tsv">
+<assert_contents>
+<has_text text="GCA_021342655.1-NZ_JAJTPH010000101.1-3-cds-WP_233337042.1"/>
+<has_n_lines n="30680" delta='3'/>
+</assert_contents>
+</output>
+<output name="clusters" ftype="json">
+<assert_contents>
+<has_text text="GCA_021890555.1-NZ_JAKLOD010000002.1-4878-cds-L3T02_RS24235"/>
+<has_n_lines n="24266" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_annotation" ftype="csv">
+<assert_contents>
+<has_text text="IclR family transcriptional regulator,6"/>
+<has_n_lines n="19712" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_position" ftype="csv">
+<assert_contents>
+<has_text text="1-NZ_JAJTPH010000010.1-23-cds-LXO36_RS13380"/>
+<has_n_lines n="195" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_presence_absence" ftype="csv">
+<assert_contents>
+<has_n_lines n="7682" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_presence_absence_Rtab" ftype="txt">
+<assert_contents>
+<has_n_lines n="7682" delta='3'/>
+</assert_contents>
+</output>
+<output name="representative_clusters_nucl" ftype="fasta">
+<assert_contents>
+<has_text text="GGTCTGCCCCACCGGCATCGACATTCGCGACGGCCTGCAGATCGAGTGCATTGGTTGCGC"/>
+<has_n_lines n="124180" delta='3'/>
+</assert_contents>
+</output>
+<output name="representative_clusters_prot" ftype="fasta">
+<assert_contents>
+<has_text text="GLPHRHRHSRRPADRVHWLRRLHRCLRQHHGQDGLPQGPDQLHNRTQSFRTEDPSAAPTP"/>
+<has_n_lines n="49014" delta='3'/>
+</assert_contents>
+</output>
+<output name="representative" ftype="fasta">
+<assert_contents>
+<has_text text="RIEGSVWPKSIRGSTPKVRGTCQIERAASESPHFMRFHVACPHCGEEQYLKFGDKETPFGLKWTPDDPSSVFYLCEHNACVIRQQELDFTDARYICEKTGIWTRDGILWFSSSGEEIEPPDSVTFHIWTAYSPFTTWVQIVKDWMKTKGDTGKRKTFVNTTLGETWEAKIGERPDAEVMAERKEHYSAPVPDRVAYLTAGIDSQLDRYEMRVWGWGPGEESWLIDRQIIMGRHDDEQTLLRVDEAINKTYTRRNGAEMS"/>
+<has_n_lines n="18795" delta='3'/>
+</assert_contents>
+</output>
+<output name="samples" ftype="json">
+<assert_contents>
+<has_n_lines n="22" delta='3'/>
+</assert_contents>
+</output>
+<output name="summary_statistics" ftype="txt">
+<assert_contents>
+<has_text text="Soft core genes"/>
+<has_n_lines n="6" delta='3'/>
+</assert_contents>
+</output>
+</test>
+<!--Test 2: PanTA main test for gff files: Tests the 'dont_split' parameter -->
+<test expect_num_outputs="14">
+<conditional name="mode">
+<param name="select_mode" value="main"/>
+</conditional>
+<conditional name="input_type">
+<param name="input_type_selector" value="gff"/>
+<param name="input_gff">
+<collection type="list">
+<element name="GCA_021342655.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021342655.1.gff"/>
+<element name="GCA_021534865.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021534865.1.gff"/>
+<element name="GCA_021697815.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021697815.1.gff"/>
+<element name="GCA_021890555.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021890555.1.gff"/>
+</collection>
+</param>
+</conditional>
+<param name="table" value="10"/>
+<param name="dont_split" value="true"/>
+<output name="annotated_clusters" ftype="json">
+<assert_contents>
+<has_text text="GCA_021342655.1-NZ_JAJTPH010000093.1-5475-cds-WP_000557454.1"/>
+<has_n_lines n="90588" delta='3'/>
+</assert_contents>
+</output>
+<output name="blast_output" ftype="tsv">
+<assert_contents>
+<has_text text="GCA_021342655.1-NZ_JAJTPH010000101.1-3-cds-WP_233337042.1"/>
+<has_n_lines n="30680" delta='3'/>
+</assert_contents>
+</output>
+<output name="clusters" ftype="json">
+<assert_contents>
+<has_text text="GCA_021890555.1-NZ_JAKLOD010000002.1-4878-cds-L3T02_RS24235"/>
+<has_n_lines n="24266" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_annotation" ftype="csv">
+<assert_contents>
+<has_text text="IclR family transcriptional regulator,6"/>
+<has_n_lines n="19712" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_position" ftype="csv">
+<assert_contents>
+<has_text text="1-NZ_JAJTPH010000010.1-23-cds-LXO36_RS13380"/>
+<has_n_lines n="195" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_presence_absence" ftype="csv">
+<assert_contents>
+<has_n_lines n="7089" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_presence_absence_Rtab" ftype="txt">
+<assert_contents>
+<has_n_lines n="7089" delta='3'/>
+</assert_contents>
+</output>
+<output name="representative_clusters_nucl" ftype="fasta">
+<assert_contents>
+<has_text text="GGTCTGCCCCACCGGCATCGACATTCGCGACGGCCTGCAGATCGAGTGCATTGGTTGCGC"/>
+<has_n_lines n="115793" delta='3'/>
+</assert_contents>
+</output>
+<output name="representative_clusters_prot" ftype="fasta">
+<assert_contents>
+<has_text text="GLPHRHRHSRRPADRVHWLRRLHRCLRQHHGQDGLPQGPDQLHNRTQSFRTEDPSAAPTP"/>
+<has_n_lines n="45624" delta='3'/>
+</assert_contents>
+</output>
+<output name="representative" ftype="fasta">
+<assert_contents>
+<has_text text="RIEGSVWPKSIRGSTPKVRGTCQIERAASESPHFMRFHVACPHCGEEQYLKFGDKETPFGLKWTPDDPSSVFYLCEHNACVIRQQELDFTDARYICEKTGIWTRDGILWFSSSGEEIEPPDSVTFHIWTAYSPFTTWVQIVKDWMKTKGDTGKRKTFVNTTLGETWEAKIGERPDAEVMAERKEHYSAPVPDRVAYLTAGIDSQLDRYEMRVWGWGPGEESWLIDRQIIMGRHDDEQTLLRVDEAINKTYTRRNGAEMS"/>
+<has_n_lines n="18795" delta='3'/>
+</assert_contents>
+</output>
+<output name="samples" ftype="json">
+<assert_contents>
+<has_n_lines n="22" delta='3'/>
+</assert_contents>
+</output>
+<output name="summary_statistics" ftype="txt">
+<assert_contents>
+<has_text text="Soft core genes"/>
+<has_n_lines n="6" delta='3'/>
+</assert_contents>
+</output>
+</test>
+<!--Test 3: PanTA main test for gff files: The 'alignment' parameter -->
+<test expect_num_outputs="14">
+<conditional name="mode">
+<param name="select_mode" value="main"/>
+</conditional>
+<conditional name="input_type">
+<param name="input_type_selector" value="gff"/>
+<param name="input_gff">
+<collection type="list">
+<element name="GCA_021342655.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021342655.1.gff"/>
+<element name="GCA_021534865.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021534865.1.gff"/>
+<element name="GCA_021697815.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021697815.1.gff"/>
+<element name="GCA_021890555.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021890555.1.gff"/>
+</collection>
+</param>
+</conditional>
+<param name="table" value="10"/>
+<param name="alignment" value="nucleotide"/>
+<output name="annotated_clusters" ftype="json">
+<assert_contents>
+<has_text text="GCA_021342655.1-NZ_JAJTPH010000093.1-5475-cds-WP_000557454.1"/>
+<has_n_lines n="96508" delta='3'/>
+</assert_contents>
+</output>
+<output name="blast_output" ftype="tsv">
+<assert_contents>
+<has_text text="GCA_021342655.1-NZ_JAJTPH010000101.1-3-cds-WP_233337042.1"/>
+<has_n_lines n="30680" delta='3'/>
+</assert_contents>
+</output>
+<output name="clusters" ftype="json">
+<assert_contents>
+<has_text text="GCA_021890555.1-NZ_JAKLOD010000002.1-4878-cds-L3T02_RS24235"/>
+<has_n_lines n="24266" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_annotation" ftype="csv">
+<assert_contents>
+<has_text text="IclR family transcriptional regulator,6"/>
+<has_n_lines n="19712" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_position" ftype="csv">
+<assert_contents>
+<has_text text="1-NZ_JAJTPH010000010.1-23-cds-LXO36_RS13380"/>
+<has_n_lines n="195" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_presence_absence" ftype="csv">
+<assert_contents>
+<has_n_lines n="7681" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_presence_absence_Rtab" ftype="txt">
+<assert_contents>
+<has_n_lines n="7681" delta='3'/>
+</assert_contents>
+</output>
+<output name="representative_clusters_nucl" ftype="fasta">
+<assert_contents>
+<has_text text="GGTCTGCCCCACCGGCATCGACATTCGCGACGGCCTGCAGATCGAGTGCATTGGTTGCGC"/>
+<has_n_lines n="124180" delta='3'/>
+</assert_contents>
+</output>
+<output name="representative_clusters_prot" ftype="fasta">
+<assert_contents>
+<has_text text="GLPHRHRHSRRPADRVHWLRRLHRCLRQHHGQDGLPQGPDQLHNRTQSFRTEDPSAAPTP"/>
+<has_n_lines n="49014" delta='3'/>
+</assert_contents>
+</output>
+<output name="representative" ftype="fasta">
+<assert_contents>
+<has_text text="RIEGSVWPKSIRGSTPKVRGTCQIERAASESPHFMRFHVACPHCGEEQYLKFGDKETPFGLKWTPDDPSSVFYLCEHNACVIRQQELDFTDARYICEKTGIWTRDGILWFSSSGEEIEPPDSVTFHIWTAYSPFTTWVQIVKDWMKTKGDTGKRKTFVNTTLGETWEAKIGERPDAEVMAERKEHYSAPVPDRVAYLTAGIDSQLDRYEMRVWGWGPGEESWLIDRQIIMGRHDDEQTLLRVDEAINKTYTRRNGAEMS"/>
+<has_n_lines n="18795" delta='3'/>
+</assert_contents>
+</output>
+<output name="samples" ftype="json">
+<assert_contents>
+<has_n_lines n="22" delta='3'/>
+</assert_contents>
+</output>
+<output name="summary_statistics" ftype="txt">
+<assert_contents>
+<has_text text="Soft core genes"/>
+<has_n_lines n="6" delta='3'/>
+</assert_contents>
+</output>
+<output name="core_gene_alignment" ftype="txt">
+<assert_contents>
+<has_n_lines n="96690" delta='3'/>
+</assert_contents>
+</output>
+<output name="pan_genome_reference" ftype="txt">
+<assert_contents>
+<has_text text="AAAGGCGTTTGGTATATAACGATGCCAG"/>
+<has_n_lines n="84292" delta='3'/>
+</assert_contents>
+</output>
+</test>
+<!--Test 4: Basic PanTA add test for gff files: Tests the basic command for PanTA add -->
+<test expect_num_outputs="14">
+<conditional name="mode">
+<param name="select_mode" value="add"/>
+<param name="collection_dir" location="https://zenodo.org/records/16568442/files/collection_dir.tar.gz" ftype="tar.gz"/>
+</conditional>
+<conditional name="input_type">
+<param name="input_type_selector" value="gff"/>
+<param name="input_gff">
+<collection type="list">
+<element name="GCA_021342735.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021342735.1.gff"/>
+<element name="GCA_021725855.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021725855.1.gff"/>
+<element name="GCA_021890695.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021890695.1.gff"/>
+</collection>
+</param>
+</conditional>
+<param name="table" value="10"/>
+<output name="annotated_clusters" ftype="json">
+<assert_contents>
+<has_text text="GCA_021342655.1-NZ_JAJTPH010000093.1-5475-cds-WP_000557454.1"/>
+<has_n_lines n="118811" delta='3'/>
+</assert_contents>
+</output>
+<output name="blast_output" ftype="tsv">
+<assert_contents>
+<has_text text="GCA_021342655.1-NZ_JAJTPH010000101.1-3-cds-WP_233337042.1"/>
+<has_n_lines n="38245" delta='3'/>
+</assert_contents>
+</output>
+<output name="clusters" ftype="json">
+<assert_contents>
+<has_text text="GCA_021890555.1-NZ_JAKLOD010000002.1-4878-cds-L3T02_RS24235"/>
+<has_n_lines n="39790" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_annotation" ftype="csv">
+<assert_contents>
+<has_text text="IclR family transcriptional regulator,6"/>
+<has_n_lines n="33564" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_position" ftype="csv">
+<assert_contents>
+<has_text text="1-NZ_JAJTPH010000010.1-23-cds-LXO36_RS13380"/>
+<has_n_lines n="363" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_presence_absence" ftype="csv">
+<assert_contents>
+<has_n_lines n="8523" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_presence_absence_Rtab" ftype="txt">
+<assert_contents>
+<has_n_lines n="8523" delta='3'/>
+</assert_contents>
+</output>
+<output name="representative_clusters_nucl" ftype="fasta">
+<assert_contents>
+<has_text text="GGTCTGCCCCACCGGCATCGACATTCGCGACGGCCTGCAGATCGAGTGCATTGGTTGCGC"/>
+<has_n_lines n="136572" delta='3'/>
+</assert_contents>
+</output>
+<output name="representative_clusters_prot" ftype="fasta">
+<assert_contents>
+<has_text text="GLPHRHRHSRRPADRVHWLRRLHRCLRQHHGQDGLPQGPDQLHNRTQSFRTEDPSAAPTP"/>
+<has_n_lines n="53952" delta='3'/>
+</assert_contents>
+</output>
+<output name="representative" ftype="fasta">
+<assert_contents>
+<has_text text="RIEGSVWPKSIRGSTPKVRGTCQIERAASESPHFMRFHVACPHCGEEQYLKFGDKETPFGLKWTPDDPSSVFYLCEHNACVIRQQELDFTDARYICEKTGIWTRDGILWFSSSGEEIEPPDSVTFHIWTAYSPFTTWVQIVKDWMKTKGDTGKRKTFVNTTLGETWEAKIGERPDAEVMAERKEHYSAPVPDRVAYLTAGIDSQLDRYEMRVWGWGPGEESWLIDRQIIMGRHDDEQTLLRVDEAINKTYTRRNGAEMS"/>
+<has_n_lines n="29001" delta='3'/>
+</assert_contents>
+</output>
+<output name="samples" ftype="json">
+<assert_contents>
+<has_n_lines n="37" delta='3'/>
+</assert_contents>
+</output>
+<output name="summary_statistics" ftype="txt">
+<assert_contents>
+<has_text text="Soft core genes"/>
+<has_n_lines n="6" delta='3'/>
+</assert_contents>
+</output>
+</test>
+<!--Test 5: Basic PanTA add test for gff files: Tests the 'dont_split' parameter -->
+<test expect_num_outputs="14">
+<conditional name="mode">
+<param name="select_mode" value="add"/>
+<param name="collection_dir" location="https://zenodo.org/records/16568442/files/collection_dir.tar.gz" ftype="tar.gz"/>
+</conditional>
+<conditional name="input_type">
+<param name="input_type_selector" value="gff"/>
+<param name="input_gff">
+<collection type="list">
+<element name="GCA_021342735.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021342735.1.gff"/>
+<element name="GCA_021725855.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021725855.1.gff"/>
+<element name="GCA_021890695.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021890695.1.gff"/>
+</collection>
+</param>
+</conditional>
+<param name="table" value="10"/>
+<param name="dont_split" value="true"/>
+<output name="annotated_clusters" ftype="json">
+<assert_contents>
+<has_text text="GCA_021342655.1-NZ_JAJTPH010000093.1-5475-cds-WP_000557454.1"/>
+<has_n_lines n="111811" delta='3'/>
+</assert_contents>
+</output>
+<output name="blast_output" ftype="tsv">
+<assert_contents>
+<has_text text="GCA_021342655.1-NZ_JAJTPH010000101.1-3-cds-WP_233337042.1"/>
+<has_n_lines n="38245" delta='3'/>
+</assert_contents>
+</output>
+<output name="clusters" ftype="json">
+<assert_contents>
+<has_text text="GCA_021890555.1-NZ_JAKLOD010000002.1-4878-cds-L3T02_RS24235"/>
+<has_n_lines n="39790" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_annotation" ftype="csv">
+<assert_contents>
+<has_text text="IclR family transcriptional regulator,6"/>
+<has_n_lines n="33564" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_position" ftype="csv">
+<assert_contents>
+<has_text text="1-NZ_JAJTPH010000010.1-23-cds-LXO36_RS13380"/>
+<has_n_lines n="363" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_presence_absence" ftype="csv">
+<assert_contents>
+<has_n_lines n="7825" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_presence_absence_Rtab" ftype="txt">
+<assert_contents>
+<has_n_lines n="7825" delta='3'/>
+</assert_contents>
+</output>
+<output name="representative_clusters_nucl" ftype="fasta">
+<assert_contents>
+<has_text text="GGTCTGCCCCACCGGCATCGACATTCGCGACGGCCTGCAGATCGAGTGCATTGGTTGCGC"/>
+<has_n_lines n="126631" delta='3'/>
+</assert_contents>
+</output>
+<output name="representative_clusters_prot" ftype="fasta">
+<assert_contents>
+<has_text text="GLPHRHRHSRRPADRVHWLRRLHRCLRQHHGQDGLPQGPDQLHNRTQSFRTEDPSAAPTP"/>
+<has_n_lines n="49946" delta='3'/>
+</assert_contents>
+</output>
+<output name="representative" ftype="fasta">
+<assert_contents>
+<has_text text="RIEGSVWPKSIRGSTPKVRGTCQIERAASESPHFMRFHVACPHCGEEQYLKFGDKETPFGLKWTPDDPSSVFYLCEHNACVIRQQELDFTDARYICEKTGIWTRDGILWFSSSGEEIEPPDSVTFHIWTAYSPFTTWVQIVKDWMKTKGDTGKRKTFVNTTLGETWEAKIGERPDAEVMAERKEHYSAPVPDRVAYLTAGIDSQLDRYEMRVWGWGPGEESWLIDRQIIMGRHDDEQTLLRVDEAINKTYTRRNGAEMS"/>
+<has_n_lines n="29001" delta='3'/>
+</assert_contents>
+</output>
+<output name="samples" ftype="json">
+<assert_contents>
+<has_n_lines n="37" delta='3'/>
+</assert_contents>
+</output>
+<output name="summary_statistics" ftype="txt">
+<assert_contents>
+<has_text text="Soft core genes"/>
+<has_n_lines n="6" delta='3'/>
+</assert_contents>
+</output>
+</test>
+<!--Test 6: Basic PanTA add test for gff files: Tests the 'alignment' parameter -->
+<test expect_num_outputs="14">
+<conditional name="mode">
+<param name="select_mode" value="add"/>
+<param name="collection_dir" location="https://zenodo.org/records/16568442/files/collection_dir.tar.gz" ftype="tar.gz"/>
+</conditional>
+<conditional name="input_type">
+<param name="input_type_selector" value="gff"/>
+<param name="input_gff">
+<collection type="list">
+<element name="GCA_021342735.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021342735.1.gff"/>
+<element name="GCA_021725855.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021725855.1.gff"/>
+<element name="GCA_021890695.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021890695.1.gff"/>
+</collection>
+</param>
+</conditional>
+<param name="table" value="10"/>
+<param name="alignment" value="nucleotide"/>
+<output name="annotated_clusters" ftype="json">
+<assert_contents>
+<has_text text="GCA_021342655.1-NZ_JAJTPH010000093.1-5475-cds-WP_000557454.1"/>
+<has_n_lines n="118811" delta='3'/>
+</assert_contents>
+</output>
+<output name="blast_output" ftype="tsv">
+<assert_contents>
+<has_text text="GCA_021342655.1-NZ_JAJTPH010000101.1-3-cds-WP_233337042.1"/>
+<has_n_lines n="38245" delta='3'/>
+</assert_contents>
+</output>
+<output name="clusters" ftype="json">
+<assert_contents>
+<has_text text="GCA_021890555.1-NZ_JAKLOD010000002.1-4878-cds-L3T02_RS24235"/>
+<has_n_lines n="39790" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_annotation" ftype="csv">
+<assert_contents>
+<has_text text="IclR family transcriptional regulator,6"/>
+<has_n_lines n="33564" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_position" ftype="csv">
+<assert_contents>
+<has_text text="1-NZ_JAJTPH010000010.1-23-cds-LXO36_RS13380"/>
+<has_n_lines n="363" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_presence_absence" ftype="csv">
+<assert_contents>
+<has_n_lines n="8523" delta='3'/>
+</assert_contents>
+</output>
+<output name="gene_presence_absence_Rtab" ftype="txt">
+<assert_contents>
+<has_n_lines n="8523" delta='3'/>
+</assert_contents>
+</output>
+<output name="representative_clusters_nucl" ftype="fasta">
+<assert_contents>
+<has_text text="GGTCTGCCCCACCGGCATCGACATTCGCGACGGCCTGCAGATCGAGTGCATTGGTTGCGC"/>
+<has_n_lines n="136572" delta='3'/>
+</assert_contents>
+</output>
+<output name="representative_clusters_prot" ftype="fasta">
+<assert_contents>
+<has_text text="GLPHRHRHSRRPADRVHWLRRLHRCLRQHHGQDGLPQGPDQLHNRTQSFRTEDPSAAPTP"/>
+<has_n_lines n="53952" delta='3'/>
+</assert_contents>
+</output>
+<output name="representative" ftype="fasta">
+<assert_contents>
+<has_text text="RIEGSVWPKSIRGSTPKVRGTCQIERAASESPHFMRFHVACPHCGEEQYLKFGDKETPFGLKWTPDDPSSVFYLCEHNACVIRQQELDFTDARYICEKTGIWTRDGILWFSSSGEEIEPPDSVTFHIWTAYSPFTTWVQIVKDWMKTKGDTGKRKTFVNTTLGETWEAKIGERPDAEVMAERKEHYSAPVPDRVAYLTAGIDSQLDRYEMRVWGWGPGEESWLIDRQIIMGRHDDEQTLLRVDEAINKTYTRRNGAEMS"/>
+<has_n_lines n="29001" delta='3'/>
+</assert_contents>
+</output>
+<output name="samples" ftype="json">
+<assert_contents>
+<has_n_lines n="37" delta='3'/>
+</assert_contents>
+</output>
+<output name="summary_statistics" ftype="txt">
+<assert_contents>
+<has_text text="Soft core genes"/>
+<has_n_lines n="6" delta='3'/>
+</assert_contents>
+</output>
+</test>
+</tests>
+<help><![CDATA[
+PanTA builds the pangenome of a large collection of genomes and adds a set of new genomes to an existing pangenome without rebuilding the accumulated pangenome from scratch. PanTA takes as input a list of genome assemblies and their annotations. It extracts the protein-coding regions as specified by the annotations and translates them into protein sequences. PanTA then generates output reports according to the standards set out by Roary, which include a spreadsheet detailing the presence and absence of each gene in each isolate as well as a summary of pangenome statistics.
+**INPUTS**
+- A collection of gff3 files or a tsv file.
+**OUTPUTS**
+- annotated_clusters.json
+- blast.tsv
+- clusters.json
+- gene_annotation.csv
+- gene_position.csv
+- gene_presence_absence.csv
+- gene_presence_absence.Rtab
+- representative_clusters_nucl.fasta
+- representative_clusters_prot.fasta
+- representative.fasta
+- samples.json
+- summary_statistics.txt
+- core_gene_alignment.aln.gz (requires alignment option)
+- pan_genome_reference.fna (requires alignment option)
+]]></help>
+<citations>
+<citation type="doi">10.6084/m9.figshare.23724705</citation>
+</citations>
+<expand macro="creator"/>
+</tool>

Mercurial > repos > iuc > panta

comparison panta.xml @ 0:72296762b4f1 draft