comparison panta.xml @ 0:72296762b4f1 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/panta/ commit 9b05e32c37a0825eb503df9daaf39b9c48e07c5b
author iuc
date Mon, 15 Sep 2025 11:40:14 +0000
parents
children b50893534705
comparison
equal deleted inserted replaced
-1:000000000000 0:72296762b4f1
1 <tool id="panta" name="PanTA" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@" license="MIT">
2 <description>Efficient inference of large prokaryotic pangenomes with PanTA</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7
8 <command detect_errors="exit_code"><![CDATA[
9 mkdir out &&
10 #import re, os
11 #set input_directory = 'input_directory'
12 mkdir $input_directory &&
13
14 #if $input_type.input_type_selector == "gff":
15 #for gff in $input_type.input_gff
16 #set identifier = re.sub('[^\s\w\-\\.]','_',str($gff.element_identifier))
17 ln -fs '$gff' '$input_directory/$identifier' &&
18 #end for
19 #elif $input_type.input_type_selector == "tsv":
20 #set identifier = re.sub('[^\s\w\-\\.]','_',str($input_type.input_tsv.element_identifier))
21 ln -fs '$input_type.input_tsv' '$input_directory/$identifier' &&
22 #end if
23
24 #if $mode.select_mode == "main":
25 panta main
26 #if $input_type.input_type_selector == "gff":
27 -g $input_directory/*.gff
28 #elif $input_type.input_type_selector == "tsv":
29 -f $input_directory/*.tsv
30 #end if
31 -o out
32 $dont_split
33 --blast '$blast'
34 --identity '$identity'
35 --LD '$LD'
36 --AL '$AL'
37 --AS '$AS'
38 --evalue '$evalue'
39 --threads "\${GALAXY_SLOTS:-8}"
40 --table '$table'
41 #if $alignment != 'None':
42 --alignment '$alignment'
43 #end if
44 && tar -czf collection_dir.tar.gz -C out .
45
46 #elif $mode.select_mode == "add":
47 mkdir -p extracted_dir &&
48 ln -s $collection_dir collection_dir.tar.gz &&
49 tar --strip-components=1 -xzf collection_dir.tar.gz -C extracted_dir &&
50 panta add
51 #if $input_type.input_type_selector == "gff":
52 -g $input_directory/*.gff
53 #elif $input_type.input_type_selector == "tsv":
54 -f $input_directory/*.tsv
55 #end if
56 -c extracted_dir
57 $dont_split
58 --blast '$blast'
59 --identity '$identity'
60 --LD '$LD'
61 --AL '$AL'
62 --AS '$AS'
63 --evalue '$evalue'
64 --threads "\${GALAXY_SLOTS:-8}"
65 --table '$table'
66 #if $alignment != 'None':
67 --alignment '$alignment'
68 #end if
69 && cp -r extracted_dir/* out
70 #end if
71 ]]></command>
72
73 <inputs>
74 <conditional name="mode">
75 <param label="Select mode" name="select_mode" type="select">
76 <option selected="true" value="main">Use PanTA main</option>
77 <option value="add">Use PanTA add</option>
78 </param>
79 <when value="main"/>
80 <when value="add">
81 <param name="collection_dir" type="data" format="tar,tar.gz" label="Previous collection directory"/>
82 </when>
83 </conditional>
84 <conditional name="input_type">
85 <param name="input_type_selector" type="select" label="Choose the input format">
86 <option value="gff" selected="true">GFF File</option>
87 <option value="tsv">TSV File</option>
88 </param>
89 <when value="gff">
90 <param type="data_collection" name="input_gff" format="gff3" collection_type="list" label="Select input files to analyze" help="Select the files you wish to analyze with PanTA"/>
91 </when>
92 <when value="tsv">
93 <param type="data" name="input_tsv" format="tsv,tabular" multiple="false" label="Select input file to analyze" help="Select the file you wish to analyze with PanTA"/>
94 </when>
95 </conditional>
96 <param argument="--dont-split" type="boolean" truevalue="--dont-split" falsevalue="" label="Dont split" help="Decide for or against splitting paralog clusters"/>
97 <param argument="--blast" type="select" label="Alignment method" help="Method for all-against-all alignment (default: diamond)">
98 <option value="diamond" selected="True">Diamond</option>
99 <option value="blast">Blast</option>
100 </param>
101 <param argument="--identity" type="float" value="0.7" label="Minimum percentage identity" help="Set the minimum percentage identity"/>
102 <param argument="--LD" type="float" value="0.7" label="Length difference cutoff" help="Set the length difference cutoff between two sequences"/>
103 <param argument="--AL" type="float" value="0" label="Alignment coverage for the longer sequence" help="Set the alignment coverage for the longer sequence"/>
104 <param argument="--AS" type="float" value="0" label="Alignment coverage for the shorter sequence" help="Set the alignment coverage for the shorter sequence"/>
105 <param argument="--evalue" type="float" value="1e-06" label="Blast evalue" help="Maximum expected value for reporting hits and lower values are stricter"/>
106 <param argument="--table" type="integer" label="Codon table" help="Set the codon table"/>
107 <param argument="--alignment" type="select" label="Run alignment for each gene cluster">
108 <option value="None" selected="True">None</option>
109 <option value="nucleotide">Nucleotide</option>
110 <option value="protein">Protein</option>
111 </param>
112 </inputs>
113
114 <outputs>
115 <!--Basic PanTA main outputs -->
116 <data format="json" name="annotated_clusters" label="${tool.name} on ${on_string} : Annotated Clusters" from_work_dir="out/annotated_clusters.json"/>
117 <data format="tsv" name="blast_output" label="${tool.name} on ${on_string} : BLAST" from_work_dir="out/blast.tsv"/>
118 <data format="json" name="clusters" label="${tool.name} on ${on_string} : Clusters" from_work_dir="out/clusters.json"/>
119 <data format="csv" name="gene_annotation" label="${tool.name} on ${on_string} : Gene Annotation" from_work_dir="out/gene_annotation.csv"/>
120 <data format="csv" name="gene_position" label="${tool.name} on ${on_string} : Gene Position" from_work_dir="out/gene_position.csv"/>
121 <data format="csv" name="gene_presence_absence" label="${tool.name} on ${on_string} : Gene Presence Absence" from_work_dir="out/gene_presence_absence.csv"/>
122 <data format="txt" name="gene_presence_absence_Rtab" label="${tool.name} on ${on_string} : Gene Presence Absence Rtab" from_work_dir="out/gene_presence_absence.Rtab"/>
123 <data format="fasta" name="representative_clusters_nucl" label="${tool.name} on ${on_string} : Representative Clusters Nucl" from_work_dir="out/representative_clusters_nucl.fasta"/>
124 <data format="fasta" name="representative_clusters_prot" label="${tool.name} on ${on_string} : Representative Clusters Prot" from_work_dir="out/representative_clusters_prot.fasta"/>
125 <data format="fasta" name="representative" label="${tool.name} on ${on_string} : Representative FASTA" from_work_dir="out/representative.fasta"/>
126 <data format="json" name="samples" label="${tool.name} on ${on_string} : Samples" from_work_dir="out/samples.json"/>
127 <data format="txt" name="summary_statistics" label="${tool.name} on ${on_string} : Summary Statistics" from_work_dir="out/summary_statistics.txt"/>
128
129 <!--Alignment outputs -->
130 <data format="txt" name="core_gene_alignment" label="${tool.name} on ${on_string} : Core Gene Alignment" from_work_dir="out/core_gene_alignment.aln.gz">
131 <filter> ['alignment'] != 'None' </filter>
132 </data>
133 <data format="txt" name="pan_genome_reference" label="${tool.name} on ${on_string} : Pan Genome Reference" from_work_dir="out/pan_genome_reference.fna">
134 <filter> ['alignment'] != 'None' </filter>
135 </data>
136 </outputs>
137
138 <tests>
139 <!--Test 01: Basic PanTA main test for gff files: Tests the basic command for PanTA main -->
140 <test expect_num_outputs="14">
141 <conditional name="mode">
142 <param name="select_mode" value="main"/>
143 </conditional>
144 <conditional name="input_type">
145 <param name="input_type_selector" value="gff"/>
146 <param name="input_gff">
147 <collection type="list">
148 <element name="GCA_021342655.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021342655.1.gff"/>
149 <element name="GCA_021534865.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021534865.1.gff"/>
150 <element name="GCA_021697815.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021697815.1.gff"/>
151 <element name="GCA_021890555.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021890555.1.gff"/>
152 </collection>
153 </param>
154 </conditional>
155 <param name="table" value="10"/>
156 <output name="annotated_clusters" ftype="json">
157 <assert_contents>
158 <has_text text="GCA_021342655.1-NZ_JAJTPH010000093.1-5475-cds-WP_000557454.1"/>
159 <has_n_lines n="96508" delta='3'/>
160 </assert_contents>
161 </output>
162 <output name="blast_output" ftype="tsv">
163 <assert_contents>
164 <has_text text="GCA_021342655.1-NZ_JAJTPH010000101.1-3-cds-WP_233337042.1"/>
165 <has_n_lines n="30680" delta='3'/>
166 </assert_contents>
167 </output>
168 <output name="clusters" ftype="json">
169 <assert_contents>
170 <has_text text="GCA_021890555.1-NZ_JAKLOD010000002.1-4878-cds-L3T02_RS24235"/>
171 <has_n_lines n="24266" delta='3'/>
172 </assert_contents>
173 </output>
174 <output name="gene_annotation" ftype="csv">
175 <assert_contents>
176 <has_text text="IclR family transcriptional regulator,6"/>
177 <has_n_lines n="19712" delta='3'/>
178 </assert_contents>
179 </output>
180 <output name="gene_position" ftype="csv">
181 <assert_contents>
182 <has_text text="1-NZ_JAJTPH010000010.1-23-cds-LXO36_RS13380"/>
183 <has_n_lines n="195" delta='3'/>
184 </assert_contents>
185 </output>
186 <output name="gene_presence_absence" ftype="csv">
187 <assert_contents>
188 <has_n_lines n="7682" delta='3'/>
189 </assert_contents>
190 </output>
191 <output name="gene_presence_absence_Rtab" ftype="txt">
192 <assert_contents>
193 <has_n_lines n="7682" delta='3'/>
194 </assert_contents>
195 </output>
196 <output name="representative_clusters_nucl" ftype="fasta">
197 <assert_contents>
198 <has_text text="GGTCTGCCCCACCGGCATCGACATTCGCGACGGCCTGCAGATCGAGTGCATTGGTTGCGC"/>
199 <has_n_lines n="124180" delta='3'/>
200 </assert_contents>
201 </output>
202 <output name="representative_clusters_prot" ftype="fasta">
203 <assert_contents>
204 <has_text text="GLPHRHRHSRRPADRVHWLRRLHRCLRQHHGQDGLPQGPDQLHNRTQSFRTEDPSAAPTP"/>
205 <has_n_lines n="49014" delta='3'/>
206 </assert_contents>
207 </output>
208 <output name="representative" ftype="fasta">
209 <assert_contents>
210 <has_text text="RIEGSVWPKSIRGSTPKVRGTCQIERAASESPHFMRFHVACPHCGEEQYLKFGDKETPFGLKWTPDDPSSVFYLCEHNACVIRQQELDFTDARYICEKTGIWTRDGILWFSSSGEEIEPPDSVTFHIWTAYSPFTTWVQIVKDWMKTKGDTGKRKTFVNTTLGETWEAKIGERPDAEVMAERKEHYSAPVPDRVAYLTAGIDSQLDRYEMRVWGWGPGEESWLIDRQIIMGRHDDEQTLLRVDEAINKTYTRRNGAEMS"/>
211 <has_n_lines n="18795" delta='3'/>
212 </assert_contents>
213 </output>
214 <output name="samples" ftype="json">
215 <assert_contents>
216 <has_n_lines n="22" delta='3'/>
217 </assert_contents>
218 </output>
219 <output name="summary_statistics" ftype="txt">
220 <assert_contents>
221 <has_text text="Soft core genes"/>
222 <has_n_lines n="6" delta='3'/>
223 </assert_contents>
224 </output>
225 </test>
226
227 <!--Test 2: PanTA main test for gff files: Tests the 'dont_split' parameter -->
228 <test expect_num_outputs="14">
229 <conditional name="mode">
230 <param name="select_mode" value="main"/>
231 </conditional>
232 <conditional name="input_type">
233 <param name="input_type_selector" value="gff"/>
234 <param name="input_gff">
235 <collection type="list">
236 <element name="GCA_021342655.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021342655.1.gff"/>
237 <element name="GCA_021534865.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021534865.1.gff"/>
238 <element name="GCA_021697815.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021697815.1.gff"/>
239 <element name="GCA_021890555.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021890555.1.gff"/>
240 </collection>
241 </param>
242 </conditional>
243 <param name="table" value="10"/>
244 <param name="dont_split" value="true"/>
245 <output name="annotated_clusters" ftype="json">
246 <assert_contents>
247 <has_text text="GCA_021342655.1-NZ_JAJTPH010000093.1-5475-cds-WP_000557454.1"/>
248 <has_n_lines n="90588" delta='3'/>
249 </assert_contents>
250 </output>
251 <output name="blast_output" ftype="tsv">
252 <assert_contents>
253 <has_text text="GCA_021342655.1-NZ_JAJTPH010000101.1-3-cds-WP_233337042.1"/>
254 <has_n_lines n="30680" delta='3'/>
255 </assert_contents>
256 </output>
257 <output name="clusters" ftype="json">
258 <assert_contents>
259 <has_text text="GCA_021890555.1-NZ_JAKLOD010000002.1-4878-cds-L3T02_RS24235"/>
260 <has_n_lines n="24266" delta='3'/>
261 </assert_contents>
262 </output>
263 <output name="gene_annotation" ftype="csv">
264 <assert_contents>
265 <has_text text="IclR family transcriptional regulator,6"/>
266 <has_n_lines n="19712" delta='3'/>
267 </assert_contents>
268 </output>
269 <output name="gene_position" ftype="csv">
270 <assert_contents>
271 <has_text text="1-NZ_JAJTPH010000010.1-23-cds-LXO36_RS13380"/>
272 <has_n_lines n="195" delta='3'/>
273 </assert_contents>
274 </output>
275 <output name="gene_presence_absence" ftype="csv">
276 <assert_contents>
277 <has_n_lines n="7089" delta='3'/>
278 </assert_contents>
279 </output>
280 <output name="gene_presence_absence_Rtab" ftype="txt">
281 <assert_contents>
282 <has_n_lines n="7089" delta='3'/>
283 </assert_contents>
284 </output>
285 <output name="representative_clusters_nucl" ftype="fasta">
286 <assert_contents>
287 <has_text text="GGTCTGCCCCACCGGCATCGACATTCGCGACGGCCTGCAGATCGAGTGCATTGGTTGCGC"/>
288 <has_n_lines n="115793" delta='3'/>
289 </assert_contents>
290 </output>
291 <output name="representative_clusters_prot" ftype="fasta">
292 <assert_contents>
293 <has_text text="GLPHRHRHSRRPADRVHWLRRLHRCLRQHHGQDGLPQGPDQLHNRTQSFRTEDPSAAPTP"/>
294 <has_n_lines n="45624" delta='3'/>
295 </assert_contents>
296 </output>
297 <output name="representative" ftype="fasta">
298 <assert_contents>
299 <has_text text="RIEGSVWPKSIRGSTPKVRGTCQIERAASESPHFMRFHVACPHCGEEQYLKFGDKETPFGLKWTPDDPSSVFYLCEHNACVIRQQELDFTDARYICEKTGIWTRDGILWFSSSGEEIEPPDSVTFHIWTAYSPFTTWVQIVKDWMKTKGDTGKRKTFVNTTLGETWEAKIGERPDAEVMAERKEHYSAPVPDRVAYLTAGIDSQLDRYEMRVWGWGPGEESWLIDRQIIMGRHDDEQTLLRVDEAINKTYTRRNGAEMS"/>
300 <has_n_lines n="18795" delta='3'/>
301 </assert_contents>
302 </output>
303 <output name="samples" ftype="json">
304 <assert_contents>
305 <has_n_lines n="22" delta='3'/>
306 </assert_contents>
307 </output>
308 <output name="summary_statistics" ftype="txt">
309 <assert_contents>
310 <has_text text="Soft core genes"/>
311 <has_n_lines n="6" delta='3'/>
312 </assert_contents>
313 </output>
314 </test>
315
316 <!--Test 3: PanTA main test for gff files: The 'alignment' parameter -->
317 <test expect_num_outputs="14">
318 <conditional name="mode">
319 <param name="select_mode" value="main"/>
320 </conditional>
321 <conditional name="input_type">
322 <param name="input_type_selector" value="gff"/>
323 <param name="input_gff">
324 <collection type="list">
325 <element name="GCA_021342655.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021342655.1.gff"/>
326 <element name="GCA_021534865.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021534865.1.gff"/>
327 <element name="GCA_021697815.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021697815.1.gff"/>
328 <element name="GCA_021890555.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021890555.1.gff"/>
329 </collection>
330 </param>
331 </conditional>
332 <param name="table" value="10"/>
333 <param name="alignment" value="nucleotide"/>
334 <output name="annotated_clusters" ftype="json">
335 <assert_contents>
336 <has_text text="GCA_021342655.1-NZ_JAJTPH010000093.1-5475-cds-WP_000557454.1"/>
337 <has_n_lines n="96508" delta='3'/>
338 </assert_contents>
339 </output>
340 <output name="blast_output" ftype="tsv">
341 <assert_contents>
342 <has_text text="GCA_021342655.1-NZ_JAJTPH010000101.1-3-cds-WP_233337042.1"/>
343 <has_n_lines n="30680" delta='3'/>
344 </assert_contents>
345 </output>
346 <output name="clusters" ftype="json">
347 <assert_contents>
348 <has_text text="GCA_021890555.1-NZ_JAKLOD010000002.1-4878-cds-L3T02_RS24235"/>
349 <has_n_lines n="24266" delta='3'/>
350 </assert_contents>
351 </output>
352 <output name="gene_annotation" ftype="csv">
353 <assert_contents>
354 <has_text text="IclR family transcriptional regulator,6"/>
355 <has_n_lines n="19712" delta='3'/>
356 </assert_contents>
357 </output>
358 <output name="gene_position" ftype="csv">
359 <assert_contents>
360 <has_text text="1-NZ_JAJTPH010000010.1-23-cds-LXO36_RS13380"/>
361 <has_n_lines n="195" delta='3'/>
362 </assert_contents>
363 </output>
364 <output name="gene_presence_absence" ftype="csv">
365 <assert_contents>
366 <has_n_lines n="7681" delta='3'/>
367 </assert_contents>
368 </output>
369 <output name="gene_presence_absence_Rtab" ftype="txt">
370 <assert_contents>
371 <has_n_lines n="7681" delta='3'/>
372 </assert_contents>
373 </output>
374 <output name="representative_clusters_nucl" ftype="fasta">
375 <assert_contents>
376 <has_text text="GGTCTGCCCCACCGGCATCGACATTCGCGACGGCCTGCAGATCGAGTGCATTGGTTGCGC"/>
377 <has_n_lines n="124180" delta='3'/>
378 </assert_contents>
379 </output>
380 <output name="representative_clusters_prot" ftype="fasta">
381 <assert_contents>
382 <has_text text="GLPHRHRHSRRPADRVHWLRRLHRCLRQHHGQDGLPQGPDQLHNRTQSFRTEDPSAAPTP"/>
383 <has_n_lines n="49014" delta='3'/>
384 </assert_contents>
385 </output>
386 <output name="representative" ftype="fasta">
387 <assert_contents>
388 <has_text text="RIEGSVWPKSIRGSTPKVRGTCQIERAASESPHFMRFHVACPHCGEEQYLKFGDKETPFGLKWTPDDPSSVFYLCEHNACVIRQQELDFTDARYICEKTGIWTRDGILWFSSSGEEIEPPDSVTFHIWTAYSPFTTWVQIVKDWMKTKGDTGKRKTFVNTTLGETWEAKIGERPDAEVMAERKEHYSAPVPDRVAYLTAGIDSQLDRYEMRVWGWGPGEESWLIDRQIIMGRHDDEQTLLRVDEAINKTYTRRNGAEMS"/>
389 <has_n_lines n="18795" delta='3'/>
390 </assert_contents>
391 </output>
392 <output name="samples" ftype="json">
393 <assert_contents>
394 <has_n_lines n="22" delta='3'/>
395 </assert_contents>
396 </output>
397 <output name="summary_statistics" ftype="txt">
398 <assert_contents>
399 <has_text text="Soft core genes"/>
400 <has_n_lines n="6" delta='3'/>
401 </assert_contents>
402 </output>
403 <output name="core_gene_alignment" ftype="txt">
404 <assert_contents>
405 <has_n_lines n="96690" delta='3'/>
406 </assert_contents>
407 </output>
408 <output name="pan_genome_reference" ftype="txt">
409 <assert_contents>
410 <has_text text="AAAGGCGTTTGGTATATAACGATGCCAG"/>
411 <has_n_lines n="84292" delta='3'/>
412 </assert_contents>
413 </output>
414 </test>
415
416 <!--Test 4: Basic PanTA add test for gff files: Tests the basic command for PanTA add -->
417 <test expect_num_outputs="14">
418 <conditional name="mode">
419 <param name="select_mode" value="add"/>
420 <param name="collection_dir" location="https://zenodo.org/records/16568442/files/collection_dir.tar.gz" ftype="tar.gz"/>
421 </conditional>
422 <conditional name="input_type">
423 <param name="input_type_selector" value="gff"/>
424 <param name="input_gff">
425 <collection type="list">
426 <element name="GCA_021342735.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021342735.1.gff"/>
427 <element name="GCA_021725855.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021725855.1.gff"/>
428 <element name="GCA_021890695.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021890695.1.gff"/>
429 </collection>
430 </param>
431 </conditional>
432 <param name="table" value="10"/>
433 <output name="annotated_clusters" ftype="json">
434 <assert_contents>
435 <has_text text="GCA_021342655.1-NZ_JAJTPH010000093.1-5475-cds-WP_000557454.1"/>
436 <has_n_lines n="118811" delta='3'/>
437 </assert_contents>
438 </output>
439 <output name="blast_output" ftype="tsv">
440 <assert_contents>
441 <has_text text="GCA_021342655.1-NZ_JAJTPH010000101.1-3-cds-WP_233337042.1"/>
442 <has_n_lines n="38245" delta='3'/>
443 </assert_contents>
444 </output>
445 <output name="clusters" ftype="json">
446 <assert_contents>
447 <has_text text="GCA_021890555.1-NZ_JAKLOD010000002.1-4878-cds-L3T02_RS24235"/>
448 <has_n_lines n="39790" delta='3'/>
449 </assert_contents>
450 </output>
451 <output name="gene_annotation" ftype="csv">
452 <assert_contents>
453 <has_text text="IclR family transcriptional regulator,6"/>
454 <has_n_lines n="33564" delta='3'/>
455 </assert_contents>
456 </output>
457 <output name="gene_position" ftype="csv">
458 <assert_contents>
459 <has_text text="1-NZ_JAJTPH010000010.1-23-cds-LXO36_RS13380"/>
460 <has_n_lines n="363" delta='3'/>
461 </assert_contents>
462 </output>
463 <output name="gene_presence_absence" ftype="csv">
464 <assert_contents>
465 <has_n_lines n="8523" delta='3'/>
466 </assert_contents>
467 </output>
468 <output name="gene_presence_absence_Rtab" ftype="txt">
469 <assert_contents>
470 <has_n_lines n="8523" delta='3'/>
471 </assert_contents>
472 </output>
473 <output name="representative_clusters_nucl" ftype="fasta">
474 <assert_contents>
475 <has_text text="GGTCTGCCCCACCGGCATCGACATTCGCGACGGCCTGCAGATCGAGTGCATTGGTTGCGC"/>
476 <has_n_lines n="136572" delta='3'/>
477 </assert_contents>
478 </output>
479 <output name="representative_clusters_prot" ftype="fasta">
480 <assert_contents>
481 <has_text text="GLPHRHRHSRRPADRVHWLRRLHRCLRQHHGQDGLPQGPDQLHNRTQSFRTEDPSAAPTP"/>
482 <has_n_lines n="53952" delta='3'/>
483 </assert_contents>
484 </output>
485 <output name="representative" ftype="fasta">
486 <assert_contents>
487 <has_text text="RIEGSVWPKSIRGSTPKVRGTCQIERAASESPHFMRFHVACPHCGEEQYLKFGDKETPFGLKWTPDDPSSVFYLCEHNACVIRQQELDFTDARYICEKTGIWTRDGILWFSSSGEEIEPPDSVTFHIWTAYSPFTTWVQIVKDWMKTKGDTGKRKTFVNTTLGETWEAKIGERPDAEVMAERKEHYSAPVPDRVAYLTAGIDSQLDRYEMRVWGWGPGEESWLIDRQIIMGRHDDEQTLLRVDEAINKTYTRRNGAEMS"/>
488 <has_n_lines n="29001" delta='3'/>
489 </assert_contents>
490 </output>
491 <output name="samples" ftype="json">
492 <assert_contents>
493 <has_n_lines n="37" delta='3'/>
494 </assert_contents>
495 </output>
496 <output name="summary_statistics" ftype="txt">
497 <assert_contents>
498 <has_text text="Soft core genes"/>
499 <has_n_lines n="6" delta='3'/>
500 </assert_contents>
501 </output>
502 </test>
503
504 <!--Test 5: Basic PanTA add test for gff files: Tests the 'dont_split' parameter -->
505 <test expect_num_outputs="14">
506 <conditional name="mode">
507 <param name="select_mode" value="add"/>
508 <param name="collection_dir" location="https://zenodo.org/records/16568442/files/collection_dir.tar.gz" ftype="tar.gz"/>
509 </conditional>
510 <conditional name="input_type">
511 <param name="input_type_selector" value="gff"/>
512 <param name="input_gff">
513 <collection type="list">
514 <element name="GCA_021342735.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021342735.1.gff"/>
515 <element name="GCA_021725855.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021725855.1.gff"/>
516 <element name="GCA_021890695.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021890695.1.gff"/>
517 </collection>
518 </param>
519 </conditional>
520 <param name="table" value="10"/>
521 <param name="dont_split" value="true"/>
522 <output name="annotated_clusters" ftype="json">
523 <assert_contents>
524 <has_text text="GCA_021342655.1-NZ_JAJTPH010000093.1-5475-cds-WP_000557454.1"/>
525 <has_n_lines n="111811" delta='3'/>
526 </assert_contents>
527 </output>
528 <output name="blast_output" ftype="tsv">
529 <assert_contents>
530 <has_text text="GCA_021342655.1-NZ_JAJTPH010000101.1-3-cds-WP_233337042.1"/>
531 <has_n_lines n="38245" delta='3'/>
532 </assert_contents>
533 </output>
534 <output name="clusters" ftype="json">
535 <assert_contents>
536 <has_text text="GCA_021890555.1-NZ_JAKLOD010000002.1-4878-cds-L3T02_RS24235"/>
537 <has_n_lines n="39790" delta='3'/>
538 </assert_contents>
539 </output>
540 <output name="gene_annotation" ftype="csv">
541 <assert_contents>
542 <has_text text="IclR family transcriptional regulator,6"/>
543 <has_n_lines n="33564" delta='3'/>
544 </assert_contents>
545 </output>
546 <output name="gene_position" ftype="csv">
547 <assert_contents>
548 <has_text text="1-NZ_JAJTPH010000010.1-23-cds-LXO36_RS13380"/>
549 <has_n_lines n="363" delta='3'/>
550 </assert_contents>
551 </output>
552 <output name="gene_presence_absence" ftype="csv">
553 <assert_contents>
554 <has_n_lines n="7825" delta='3'/>
555 </assert_contents>
556 </output>
557 <output name="gene_presence_absence_Rtab" ftype="txt">
558 <assert_contents>
559 <has_n_lines n="7825" delta='3'/>
560 </assert_contents>
561 </output>
562 <output name="representative_clusters_nucl" ftype="fasta">
563 <assert_contents>
564 <has_text text="GGTCTGCCCCACCGGCATCGACATTCGCGACGGCCTGCAGATCGAGTGCATTGGTTGCGC"/>
565 <has_n_lines n="126631" delta='3'/>
566 </assert_contents>
567 </output>
568 <output name="representative_clusters_prot" ftype="fasta">
569 <assert_contents>
570 <has_text text="GLPHRHRHSRRPADRVHWLRRLHRCLRQHHGQDGLPQGPDQLHNRTQSFRTEDPSAAPTP"/>
571 <has_n_lines n="49946" delta='3'/>
572 </assert_contents>
573 </output>
574 <output name="representative" ftype="fasta">
575 <assert_contents>
576 <has_text text="RIEGSVWPKSIRGSTPKVRGTCQIERAASESPHFMRFHVACPHCGEEQYLKFGDKETPFGLKWTPDDPSSVFYLCEHNACVIRQQELDFTDARYICEKTGIWTRDGILWFSSSGEEIEPPDSVTFHIWTAYSPFTTWVQIVKDWMKTKGDTGKRKTFVNTTLGETWEAKIGERPDAEVMAERKEHYSAPVPDRVAYLTAGIDSQLDRYEMRVWGWGPGEESWLIDRQIIMGRHDDEQTLLRVDEAINKTYTRRNGAEMS"/>
577 <has_n_lines n="29001" delta='3'/>
578 </assert_contents>
579 </output>
580 <output name="samples" ftype="json">
581 <assert_contents>
582 <has_n_lines n="37" delta='3'/>
583 </assert_contents>
584 </output>
585 <output name="summary_statistics" ftype="txt">
586 <assert_contents>
587 <has_text text="Soft core genes"/>
588 <has_n_lines n="6" delta='3'/>
589 </assert_contents>
590 </output>
591 </test>
592
593 <!--Test 6: Basic PanTA add test for gff files: Tests the 'alignment' parameter -->
594 <test expect_num_outputs="14">
595 <conditional name="mode">
596 <param name="select_mode" value="add"/>
597 <param name="collection_dir" location="https://zenodo.org/records/16568442/files/collection_dir.tar.gz" ftype="tar.gz"/>
598 </conditional>
599 <conditional name="input_type">
600 <param name="input_type_selector" value="gff"/>
601 <param name="input_gff">
602 <collection type="list">
603 <element name="GCA_021342735.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021342735.1.gff"/>
604 <element name="GCA_021725855.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021725855.1.gff"/>
605 <element name="GCA_021890695.1.gff" location="https://zenodo.org/records/16568442/files/GCA_021890695.1.gff"/>
606 </collection>
607 </param>
608 </conditional>
609 <param name="table" value="10"/>
610 <param name="alignment" value="nucleotide"/>
611 <output name="annotated_clusters" ftype="json">
612 <assert_contents>
613 <has_text text="GCA_021342655.1-NZ_JAJTPH010000093.1-5475-cds-WP_000557454.1"/>
614 <has_n_lines n="118811" delta='3'/>
615 </assert_contents>
616 </output>
617 <output name="blast_output" ftype="tsv">
618 <assert_contents>
619 <has_text text="GCA_021342655.1-NZ_JAJTPH010000101.1-3-cds-WP_233337042.1"/>
620 <has_n_lines n="38245" delta='3'/>
621 </assert_contents>
622 </output>
623 <output name="clusters" ftype="json">
624 <assert_contents>
625 <has_text text="GCA_021890555.1-NZ_JAKLOD010000002.1-4878-cds-L3T02_RS24235"/>
626 <has_n_lines n="39790" delta='3'/>
627 </assert_contents>
628 </output>
629 <output name="gene_annotation" ftype="csv">
630 <assert_contents>
631 <has_text text="IclR family transcriptional regulator,6"/>
632 <has_n_lines n="33564" delta='3'/>
633 </assert_contents>
634 </output>
635 <output name="gene_position" ftype="csv">
636 <assert_contents>
637 <has_text text="1-NZ_JAJTPH010000010.1-23-cds-LXO36_RS13380"/>
638 <has_n_lines n="363" delta='3'/>
639 </assert_contents>
640 </output>
641 <output name="gene_presence_absence" ftype="csv">
642 <assert_contents>
643 <has_n_lines n="8523" delta='3'/>
644 </assert_contents>
645 </output>
646 <output name="gene_presence_absence_Rtab" ftype="txt">
647 <assert_contents>
648 <has_n_lines n="8523" delta='3'/>
649 </assert_contents>
650 </output>
651 <output name="representative_clusters_nucl" ftype="fasta">
652 <assert_contents>
653 <has_text text="GGTCTGCCCCACCGGCATCGACATTCGCGACGGCCTGCAGATCGAGTGCATTGGTTGCGC"/>
654 <has_n_lines n="136572" delta='3'/>
655 </assert_contents>
656 </output>
657 <output name="representative_clusters_prot" ftype="fasta">
658 <assert_contents>
659 <has_text text="GLPHRHRHSRRPADRVHWLRRLHRCLRQHHGQDGLPQGPDQLHNRTQSFRTEDPSAAPTP"/>
660 <has_n_lines n="53952" delta='3'/>
661 </assert_contents>
662 </output>
663 <output name="representative" ftype="fasta">
664 <assert_contents>
665 <has_text text="RIEGSVWPKSIRGSTPKVRGTCQIERAASESPHFMRFHVACPHCGEEQYLKFGDKETPFGLKWTPDDPSSVFYLCEHNACVIRQQELDFTDARYICEKTGIWTRDGILWFSSSGEEIEPPDSVTFHIWTAYSPFTTWVQIVKDWMKTKGDTGKRKTFVNTTLGETWEAKIGERPDAEVMAERKEHYSAPVPDRVAYLTAGIDSQLDRYEMRVWGWGPGEESWLIDRQIIMGRHDDEQTLLRVDEAINKTYTRRNGAEMS"/>
666 <has_n_lines n="29001" delta='3'/>
667 </assert_contents>
668 </output>
669 <output name="samples" ftype="json">
670 <assert_contents>
671 <has_n_lines n="37" delta='3'/>
672 </assert_contents>
673 </output>
674 <output name="summary_statistics" ftype="txt">
675 <assert_contents>
676 <has_text text="Soft core genes"/>
677 <has_n_lines n="6" delta='3'/>
678 </assert_contents>
679 </output>
680 </test>
681 </tests>
682 <help><![CDATA[
683
684 PanTA builds the pangenome of a large collection of genomes and adds a set of new genomes to an existing pangenome without rebuilding the accumulated pangenome from scratch. PanTA takes as input a list of genome assemblies and their annotations. It extracts the protein-coding regions as specified by the annotations and translates them into protein sequences. PanTA then generates output reports according to the standards set out by Roary, which include a spreadsheet detailing the presence and absence of each gene in each isolate as well as a summary of pangenome statistics.
685
686 **INPUTS**
687
688 - A collection of gff3 files or a tsv file.
689
690 **OUTPUTS**
691
692 - annotated_clusters.json
693 - blast.tsv
694 - clusters.json
695 - gene_annotation.csv
696 - gene_position.csv
697 - gene_presence_absence.csv
698 - gene_presence_absence.Rtab
699 - representative_clusters_nucl.fasta
700 - representative_clusters_prot.fasta
701 - representative.fasta
702 - samples.json
703 - summary_statistics.txt
704 - core_gene_alignment.aln.gz (requires alignment option)
705 - pan_genome_reference.fna (requires alignment option)
706
707 ]]></help>
708 <citations>
709 <citation type="doi">10.6084/m9.figshare.23724705</citation>
710 </citations>
711 <expand macro="creator"/>
712 </tool>