comparison pirate.xml @ 0:ef07a43227a6 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/pirate commit fd6bda0b520e35e43c29f35c10d5b0704f6f4f82
author iuc
date Wed, 04 Feb 2026 11:52:07 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:ef07a43227a6
1 <tool id="pirate" name="PIRATE" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>Pangenome Iterative Refinement and Threshold Evaluation</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <command detect_errors="exit_code"><![CDATA[
8 ## Create input directory and symlink GFFs
9 #import re
10 mkdir input_gffs &&
11 #for $f in $input_files:
12 #set identifier = re.sub('[^\s\w\-\\.]','_',str($f.element_identifier))
13 ln -s '$f' 'input_gffs/$identifier' &&
14 #end for
15
16 PIRATE
17 --input input_gffs/
18 --steps '$steps'
19 --features '$features'
20 --min-len $min_len
21 $nucl
22 $para_off
23 $classify_off
24 $align
25 $rplots
26 --threads "\${GALAXY_SLOTS:-8}"
27 ## Pan-opt and advanced parameters
28 #if str($global_opts.pan_opts_conditional.enable_pan_opt) == 'yes':
29 --pan-opt '
30 --perc $global_opts.pan_opts_conditional.perc
31 --cd-low $global_opts.pan_opts_conditional.cdhit_opts.cd_low
32 --cd-step $global_opts.pan_opts_conditional.cdhit_opts.cd_step
33 $global_opts.pan_opts_conditional.cdhit_opts.cd_core_off
34 --evalue $global_opts.pan_opts_conditional.blast_opts.evalue
35 $global_opts.pan_opts_conditional.blast_opts.diamond
36 $global_opts.pan_opts_conditional.blast_opts.diamond_split
37 --hsp-len $global_opts.pan_opts_conditional.blast_opts.hsp_len
38 --flat $global_opts.pan_opts_conditional.mcl_opts.flat
39 '
40 #end if
41 --output results/
42 ]]></command>
43 <inputs>
44 <param argument="--input" name="input_files" type="data" format="gff,gff3" multiple="true" label="Input GFF files" help="Select all GFF files for pangenome construction"/>
45
46 <section name="global_opts" title="Global Options" expanded="true">
47 <param argument="--steps" type="text" value="50,60,70,80,90,95,98" label="Identity thresholds" help="Comma-separated list of % identity thresholds"/>
48 <param argument="--features" type="text" value="CDS" label="Features" help="Features to use (e.g., CDS, tRNA). Multiple could be specified using commas."/>
49 <param argument="--min-len" type="integer" value="120" min="0" label="Minimum feature length"/>
50 <param argument="--nucl" type="boolean" truevalue="--nucl" falsevalue="" checked="false" label="Do not translate to Amino Acids" />
51
52 <conditional name="pan_opts_conditional">
53 <param name="enable_pan_opt" type="select" label="Enable advanced pangenome options" help="Enable --pan-opt and advanced pangenome parameters">
54 <option value="no" selected="true">No</option>
55 <option value="yes">Yes</option>
56 </param>
57 <when value="yes">
58 <param argument="--perc" type="integer" value="98" optional="true" min="0" max="100" label="Single % identity threshold to use for pangenome" help="Single % identity threshold to use for pangenome"/>
59
60 <section name="cdhit_opts" title="CD-HIT Options" expanded="false">
61 <param argument="--cd-low" type="integer" value="98" min="0" max="100" label="CD-HIT lowest percentage id" help="Default: 98"/>
62 <param argument="--cd-step" type="float" value="0.5" min="0" label="CD-HIT step size" help="Default: 0.5"/>
63 <param argument="--cd-core-off" type="boolean" truevalue="--cd-core-off" falsevalue="" checked="false" label="Don't extract core families during CD-HIT clustering" help="Default: Extract core families"/>
64 </section>
65
66 <section name="blast_opts" title="BLAST Options" expanded="false">
67 <param argument="--evalue" type="float" value="1E-6" min="0" label="E-value for BLAST hit filtering" help="Default: 1E-6"/>
68 <param argument="--diamond" type="boolean" truevalue="--diamond" falsevalue="" checked="false" label="Use DIAMOND instead of BLAST. Incompatible with --nucl"/>
69 <param argument="--diamond-split" type="boolean" truevalue="--diamond-split" falsevalue="" checked="false" label="Split DIAMOND files into batches"/>
70 <param argument="--hsp-len" type="float" value="0" min="0" max="1" label="Remove BLAST HSPs proportion threshold" help="Remove BLAST HSPs that are less than this proportion of query length."/>
71 </section>
72
73 <section name="mcl_opts" title="MCL Options" expanded="false">
74 <param argument="--flat" type="float" value="1.5" min="0" label="MCL inflation value"/>
75 </section>
76 </when>
77 <when value="no"/>
78 </conditional>
79 </section>
80
81 <section name="para_opts" title="Paralog Classification" expanded="false">
82 <param argument="--para-off" type="boolean" truevalue="--para-off" falsevalue="" checked="false" label="Switch off paralog identification"/>
83 <param argument="--classify-off" type="boolean" truevalue="--classify-off" falsevalue="" checked="false" label="Do not classify paralogs"/>
84 </section>
85
86 <section name="output_opts" title="Output Options" expanded="true">
87 <param argument="--align" type="boolean" truevalue="--align" falsevalue="" checked="false" label="Produce alignments" help="Align all genes and produce core/pangenome alignments."/>
88 <param argument="--rplots" type="boolean" truevalue="--rplots" falsevalue="" checked="false" label="Generate R plots" help="Plot summaries using R."/>
89 </section>
90 </inputs>
91
92 <outputs>
93 <data name="pangenome_summary" format="txt" from_work_dir="results/PIRATE.pangenome_summary.txt" label="${tool.name} on ${on_string}: Pangenome Summary"/>
94 <data name="pirate_gene_families" format="tsv" from_work_dir="results/PIRATE.gene_families.ordered.tsv" label="${tool.name} on ${on_string}: Tabular summary of all gene families"/>
95 <data name="pirate_unique_alleles" format="tsv" from_work_dir="results/PIRATE.unique_alleles.tsv" label="${tool.name} on ${on_string}: Tabular summary of all unique alleles"/>
96 <data name="pirate_presence_absence_fasta" format="fasta" from_work_dir="results/binary_presence_absence.fasta" label="${tool.name} on ${on_string}: Binary Presence/Absence data"/>
97 <data name="pirate_presence_absence_nwk" format="newick" from_work_dir="results/binary_presence_absence.nwk" label="${tool.name} on ${on_string}: Binary Presence/Absence Newick data"/>
98 <data name="pangenome_gfa" format="gfa1" from_work_dir="results/pangenome.gfa" label="${tool.name} on ${on_string}: Pangenome GFA"/>
99 <data name="pirate_rep_sequences_ffn" format="fasta" from_work_dir="results/representative_sequences.ffn" label="${tool.name} on ${on_string}: Representative sequences for each gene family as nucleotide"/>
100 <data name="pirate_rep_sequences_faa" format="fasta" from_work_dir="results/representative_sequences.faa" label="${tool.name} on ${on_string}: Representative sequences for each gene family as amino acid"/>
101
102 <data name="pirate_core_aln" format="fasta" from_work_dir="results/core_alignment.fasta" label="${tool.name} on ${on_string}: Core Alignment FASTA">
103 <filter>output_opts['align']</filter>
104 </data>
105
106 <data name="pirate_core_gff" format="gff" from_work_dir="results/core_alignment.gff" label="${tool.name} on ${on_string}: Core Alignment GFF">
107 <filter>output_opts['align']</filter>
108 </data>
109
110 <data name="pirate_pangenome_aln" format="fasta" from_work_dir="results/pangenome_alignment.fasta" label="${tool.name} on ${on_string}: Pangenome Alignment FASTA">
111 <filter>output_opts['align']</filter>
112 </data>
113
114 <data name="pirate_pangenome_gff" format="gff" from_work_dir="results/pangenome_alignment.gff" label="${tool.name} on ${on_string}: Pangenome Alignment GFF">
115 <filter>output_opts['align']</filter>
116 </data>
117
118 <data name="pirate_plots" format="pdf" from_work_dir="results/PIRATE_plots.pdf" label="${tool.name} on ${on_string}: Summary plots of the PIRATE pangenome">
119 <filter>output_opts['rplots']</filter>
120 </data>
121 </outputs>
122
123 <tests>
124 <!-- Test 1 : Default parameters-->
125 <test expect_num_outputs="8">
126 <param name="input_files" location="https://zenodo.org/records/18470711/files/HO_5096_0412.gff,https://zenodo.org/records/18470711/files/MRSA252.gff"/>
127 <section name="global_opts">
128 <param name="steps" value="50,60,70,80,90,95,98"/>
129 <param name="features" value="CDS"/>
130 <param name="min_len" value="120"/>
131 <conditional name="pan_opts_conditional">
132 <param name="enable_pan_opt" value="no"/>
133 </conditional>
134 </section>
135 <output name="pangenome_summary" ftype="txt">
136 <assert_contents>
137 <has_line line="# 4 gene families in 2 genomes."/>
138 <has_n_lines n="13"/>
139 </assert_contents>
140 </output>
141 <output name="pirate_gene_families" ftype="tsv">
142 <assert_contents>
143 <has_n_lines n="5"/>
144 </assert_contents>
145 </output>
146 <output name="pirate_unique_alleles" ftype="tsv">
147 <assert_contents>
148 <has_line_matching expression="g03_10\s+g03\s+trpD\s+Anthranilate phosphoribosyltransferase\s+98\s+3\s+1\s+1\s+1\s+1\s+0\s+0\s+0\s+0\s+1\s+Anthranilate phosphoribosyltransferase\(1\)\s+trpD\(1\)\s+243\s+243\s+243\.00\s+MRSA252_00002"/>
149 <has_n_lines n="8"/>
150 </assert_contents>
151 </output>
152 <output name="pirate_presence_absence_fasta" ftype="fasta">
153 <assert_contents>
154 <has_line line=">HO_5096_0412"/>
155 <has_n_lines n="6"/>
156 </assert_contents>
157 </output>
158 <output name="pirate_presence_absence_nwk" ftype="newick">
159 <assert_contents>
160 <has_line line="(HO_5096_0412:0.152049416,MRSA252:0.152049416);"/>
161 <has_n_lines n="1"/>
162 </assert_contents>
163 </output>
164 <output name="pangenome_gfa" ftype="gfa1">
165 <assert_contents>
166 <has_line_matching expression="S\tg01\tA\tRC:i:2"/>
167 <has_n_lines n="7"/>
168 </assert_contents>
169 </output>
170 <output name="pirate_rep_sequences_ffn" ftype="fasta">
171 <assert_contents>
172 <has_n_lines n="8"/>
173 </assert_contents>
174 </output>
175 <output name="pirate_rep_sequences_faa" ftype="fasta">
176 <assert_contents>
177 <has_n_lines n="8"/>
178 </assert_contents>
179 </output>
180 </test>
181 <!-- Test 2 : testing align parameter-->
182 <test expect_num_outputs="12">
183 <param name="input_files" location="https://zenodo.org/records/18470711/files/HO_5096_0412.gff,https://zenodo.org/records/18470711/files/MRSA252.gff"/>
184 <section name="global_opts">
185 <param name="steps" value="50,60,70,80,90,95,98"/>
186 <param name="features" value="CDS"/>
187 <param name="min_len" value="120"/>
188 <conditional name="pan_opts_conditional">
189 <param name="enable_pan_opt" value="no"/>
190 </conditional>
191 </section>
192 <section name="output_opts">
193 <param name="align" value="true"/>
194 </section>
195 <output name="pangenome_summary" ftype="txt">
196 <assert_contents>
197 <has_line line="# 4 gene families in 2 genomes."/>
198 <has_n_lines n="13"/>
199 </assert_contents>
200 </output>
201 <output name="pirate_gene_families" ftype="tsv">
202 <assert_contents>
203 <has_n_lines n="5"/>
204 </assert_contents>
205 </output>
206 <output name="pirate_unique_alleles" ftype="tsv">
207 <assert_contents>
208 <has_line_matching expression="g03_10\s+g03\s+trpD\s+Anthranilate phosphoribosyltransferase\s+98\s+3\s+1\s+1\s+1\s+1\s+0\s+0\s+0\s+0\s+1\s+Anthranilate phosphoribosyltransferase\(1\)\s+trpD\(1\)\s+243\s+243\s+243\.00\s+MRSA252_00002"/>
209 <has_n_lines n="8"/>
210 </assert_contents>
211 </output>
212 <output name="pirate_presence_absence_fasta" ftype="fasta">
213 <assert_contents>
214 <has_line line=">HO_5096_0412"/>
215 <has_n_lines n="6"/>
216 </assert_contents>
217 </output>
218 <output name="pirate_presence_absence_nwk" ftype="newick">
219 <assert_contents>
220 <has_line line="(HO_5096_0412:0.152049416,MRSA252:0.152049416);"/>
221 <has_n_lines n="1"/>
222 </assert_contents>
223 </output>
224 <output name="pangenome_gfa" ftype="gfa1">
225 <assert_contents>
226 <has_line_matching expression="S\tg01\tA\tRC:i:2"/>
227 <has_n_lines n="7"/>
228 </assert_contents>
229 </output>
230 <output name="pirate_rep_sequences_ffn" ftype="fasta">
231 <assert_contents>
232 <has_n_lines n="8"/>
233 </assert_contents>
234 </output>
235 <output name="pirate_rep_sequences_faa" ftype="fasta">
236 <assert_contents>
237 <has_n_lines n="8"/>
238 </assert_contents>
239 </output>
240 <output name="pirate_pangenome_aln" ftype="fasta">
241 <assert_contents>
242 <has_line line=">HO_5096_0412"/>
243 <has_n_lines n="4"/>
244 </assert_contents>
245 </output>
246 <output name="pirate_pangenome_gff" ftype="gff">
247 <assert_contents>
248 <has_line_matching expression="##sequence-region Pangenome 1 3945"/>
249 <has_n_lines n="6"/>
250 </assert_contents>
251 </output>
252 <output name="pirate_core_aln" ftype="fasta">
253 <assert_contents>
254 <has_line line=">HO_5096_0412"/>
255 <has_n_lines n="4"/>
256 </assert_contents>
257 </output>
258 <output name="pirate_core_gff" ftype="gff">
259 <assert_contents>
260 <has_line_matching expression="##sequence-region Pangenome 1 2550"/>
261 <has_n_lines n="5"/>
262 </assert_contents>
263 </output>
264 </test>
265
266 <!-- Advanced pangenome options for pan-genome analysis -->
267 <test expect_num_outputs="8">
268 <param name="input_files" location="https://zenodo.org/records/18470711/files/HO_5096_0412.gff,https://zenodo.org/records/18470711/files/MRSA252.gff"/>
269 <section name="global_opts">
270 <param name="steps" value="50,60,70,80,90,95,98"/>
271 <param name="features" value="CDS"/>
272 <param name="min_len" value="120"/>
273 <conditional name="pan_opts_conditional">
274 <param name="enable_pan_opt" value="yes"/>
275 <param name="perc" value="95"/>
276 <section name="cdhit_opts">
277 <param name="cd_low" value="98"/>
278 <param name="cd_step" value="0.5"/>
279 <param name="cd_core_off" value="true"/>
280 </section>
281 <section name="blast_opts">
282 <param name="evalue" value="0.00001"/>
283 <param name="diamond" value="true"/>
284 <param name="diamond_split" value="true"/>
285 <param name="hsp_len" value="0.1"/>
286 </section>
287 <section name="mcl_opts">
288 <param name="flat" value="2.0"/>
289 </section>
290 </conditional>
291 </section>
292 <section name="output_opts">
293 <param name="align" value="false"/>
294 </section>
295 <output name="pangenome_summary" ftype="txt">
296 <assert_contents>
297 <has_line line="# 4 gene families in 2 genomes."/>
298 <has_n_lines n="13"/>
299 </assert_contents>
300 </output>
301 <output name="pirate_gene_families" ftype="tsv">
302 <assert_contents>
303 <has_n_lines n="5"/>
304 </assert_contents>
305 </output>
306 <output name="pirate_unique_alleles" ftype="tsv">
307 <assert_contents>
308 <has_line_matching expression="g01_09\s+g01\s+trpD\s+Anthranilate phosphoribosyltransferase\s+98\s+3\s+1\s+1\s+1\s+1\s+0\s+0\s+0\s+0\s+1\s+Anthranilate phosphoribosyltransferase\(1\)\s+trpD\(1\)\s+243\s+243\s+243\.00\s+\s+MRSA252_00002"/>
309 <has_n_lines n="9"/>
310 </assert_contents>
311 </output>
312 <output name="pirate_presence_absence_fasta" ftype="fasta">
313 <assert_contents>
314 <has_line line=">HO_5096_0412"/>
315 <has_n_lines n="6"/>
316 </assert_contents>
317 </output>
318 <output name="pirate_presence_absence_nwk" ftype="newick">
319 <assert_contents>
320 <has_line line="(HO_5096_0412:0.152049416,MRSA252:0.152049416);"/>
321 <has_n_lines n="1"/>
322 </assert_contents>
323 </output>
324 <output name="pangenome_gfa" ftype="gfa1">
325 <assert_contents>
326 <has_line_matching expression="S\tg01\tA\tRC:i:2"/>
327 <has_n_lines n="7"/>
328 </assert_contents>
329 </output>
330 <output name="pirate_rep_sequences_ffn" ftype="fasta">
331 <assert_contents>
332 <has_n_lines n="8"/>
333 </assert_contents>
334 </output>
335 <output name="pirate_rep_sequences_faa" ftype="fasta">
336 <assert_contents>
337 <has_n_lines n="8"/>
338 </assert_contents>
339 </output>
340 </test>
341
342 <!-- Test 04: Tesing PDF Reports -->
343 <test expect_num_outputs="9">
344 <param name="input_files" location="https://zenodo.org/records/18470711/files/HO_5096_0412.gff,https://zenodo.org/records/18470711/files/MRSA252.gff"/>
345 <section name="global_opts">
346 <param name="steps" value="50,60,70,80,90,95,98"/>
347 <param name="features" value="CDS"/>
348 <param name="min_len" value="120"/>
349 <conditional name="pan_opts_conditional">
350 <param name="enable_pan_opt" value="no"/>
351 </conditional>
352 </section>
353 <section name="output_opts">
354 <param name="rplots" value="true"/>
355 </section>
356 <output name="pangenome_summary" ftype="txt">
357 <assert_contents>
358 <has_line line="# 4 gene families in 2 genomes."/>
359 <has_n_lines n="13"/>
360 </assert_contents>
361 </output>
362 <output name="pirate_gene_families" ftype="tsv">
363 <assert_contents>
364 <has_n_lines n="5"/>
365 </assert_contents>
366 </output>
367 <output name="pirate_unique_alleles" ftype="tsv">
368 <assert_contents>
369 <has_line_matching expression="g03_10\s+g03\s+trpD\s+Anthranilate phosphoribosyltransferase\s+98\s+3\s+1\s+1\s+1\s+1\s+0\s+0\s+0\s+0\s+1\s+Anthranilate phosphoribosyltransferase\(1\)\s+trpD\(1\)\s+243\s+243\s+243\.00\s+MRSA252_00002"/>
370 <has_n_lines n="8"/>
371 </assert_contents>
372 </output>
373 <output name="pirate_presence_absence_fasta" ftype="fasta">
374 <assert_contents>
375 <has_line line=">HO_5096_0412"/>
376 <has_n_lines n="6"/>
377 </assert_contents>
378 </output>
379 <output name="pirate_presence_absence_nwk" ftype="newick">
380 <assert_contents>
381 <has_line line="(HO_5096_0412:0.152049416,MRSA252:0.152049416);"/>
382 <has_n_lines n="1"/>
383 </assert_contents>
384 </output>
385 <output name="pangenome_gfa" ftype="gfa1">
386 <assert_contents>
387 <has_line_matching expression="S\tg01\tA\tRC:i:2"/>
388 <has_n_lines n="7"/>
389 </assert_contents>
390 </output>
391 <output name="pirate_rep_sequences_ffn" ftype="fasta">
392 <assert_contents>
393 <has_n_lines n="8"/>
394 </assert_contents>
395 </output>
396 <output name="pirate_rep_sequences_faa" ftype="fasta">
397 <assert_contents>
398 <has_n_lines n="8"/>
399 </assert_contents>
400 </output>
401 <output name="pirate_plots" ftype="pdf">
402 <assert_contents>
403 <has_size value="14397" delta="100"/>
404 </assert_contents>
405 </output>
406 </test>
407
408 </tests>
409 <help><![CDATA[
410 **PIRATE** (Pangenome Iterative Refinement and Threshold Evaluation)
411
412 PIRATE is a pangenomics tool that allows for the iterative refinement of pangenomes using multiple identity thresholds. It is designed to handle highly divergent pangenomes and identify orthologs across different evolutionary scales.
413
414 **INPUTS**
415
416 - A collection of gff3 files.
417
418 **CORE OUTPUTS**
419
420 1. Pangenome Summary - Summary statistics of gene number and frequency in the pangenome
421 2. Tabular summary of all gene families - Complete gene family catalog with one row per family. Families split during paralog detection are labeled with underscores and numbers (e.g., g0001_1, g0001_2). Families are ordered by syntenic position in the pangenome graph.
422 3. Tabular summary of all unique alleles - Catalog of unique alleles per gene family, defined as distinct MCL sub-clusters at higher identity thresholds
423 4. Binary Presence/Absence data - Binary gene family presence/absence matrix in FASTA format
424 5. Binary Presence/Absence Newick data - FastTree phylogeny constructed from the binary presence/absence matrix
425 6. Pangenome GFA - Network representation of gene family connections in GFA format (can be visualized with Bandage)
426 7. Representative sequences for each gene family as nucleotide - Nucleotide sequences with the longest sequence per family selected as representative (genomes ordered alphabetically)
427 8. Representative sequences for each gene family as amino acid - Amino acid sequences corresponding to the nucleotide set
428
429 **OPTIONAL OUTPUTS**
430
431 1. Core Alignment FASTA - MAFFT-aligned core genome sequences, ordered by gene family table. Reverse-translated when created from CDS. Multi-copy genes represented as ? characters.
432 2. Core Alignment GFF - Annotation coordinates and gene/product information for the core alignment
433 3. Pangenome Alignment FASTA - MAFFT-aligned full pangenome sequences with the same characteristics as core alignment
434 4. Pangenome Alignment GFF - Annotation coordinates for the pangenome alignment
435 5. Summary plots of the PIRATE pangenome - Visualization plots summarizing the pangenome analysis
436
437
438 ]]></help>
439 <expand macro="citations"/>
440 <expand macro="creator"/>
441 </tool>