Mercurial > repos > iuc > pirate
view pirate.xml @ 0:ef07a43227a6 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/pirate commit fd6bda0b520e35e43c29f35c10d5b0704f6f4f82
| author | iuc |
|---|---|
| date | Wed, 04 Feb 2026 11:52:07 +0000 |
| parents | |
| children |
line wrap: on
line source
<tool id="pirate" name="PIRATE" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>Pangenome Iterative Refinement and Threshold Evaluation</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <command detect_errors="exit_code"><![CDATA[ ## Create input directory and symlink GFFs #import re mkdir input_gffs && #for $f in $input_files: #set identifier = re.sub('[^\s\w\-\\.]','_',str($f.element_identifier)) ln -s '$f' 'input_gffs/$identifier' && #end for PIRATE --input input_gffs/ --steps '$steps' --features '$features' --min-len $min_len $nucl $para_off $classify_off $align $rplots --threads "\${GALAXY_SLOTS:-8}" ## Pan-opt and advanced parameters #if str($global_opts.pan_opts_conditional.enable_pan_opt) == 'yes': --pan-opt ' --perc $global_opts.pan_opts_conditional.perc --cd-low $global_opts.pan_opts_conditional.cdhit_opts.cd_low --cd-step $global_opts.pan_opts_conditional.cdhit_opts.cd_step $global_opts.pan_opts_conditional.cdhit_opts.cd_core_off --evalue $global_opts.pan_opts_conditional.blast_opts.evalue $global_opts.pan_opts_conditional.blast_opts.diamond $global_opts.pan_opts_conditional.blast_opts.diamond_split --hsp-len $global_opts.pan_opts_conditional.blast_opts.hsp_len --flat $global_opts.pan_opts_conditional.mcl_opts.flat ' #end if --output results/ ]]></command> <inputs> <param argument="--input" name="input_files" type="data" format="gff,gff3" multiple="true" label="Input GFF files" help="Select all GFF files for pangenome construction"/> <section name="global_opts" title="Global Options" expanded="true"> <param argument="--steps" type="text" value="50,60,70,80,90,95,98" label="Identity thresholds" help="Comma-separated list of % identity thresholds"/> <param argument="--features" type="text" value="CDS" label="Features" help="Features to use (e.g., CDS, tRNA). Multiple could be specified using commas."/> <param argument="--min-len" type="integer" value="120" min="0" label="Minimum feature length"/> <param argument="--nucl" type="boolean" truevalue="--nucl" falsevalue="" checked="false" label="Do not translate to Amino Acids" /> <conditional name="pan_opts_conditional"> <param name="enable_pan_opt" type="select" label="Enable advanced pangenome options" help="Enable --pan-opt and advanced pangenome parameters"> <option value="no" selected="true">No</option> <option value="yes">Yes</option> </param> <when value="yes"> <param argument="--perc" type="integer" value="98" optional="true" min="0" max="100" label="Single % identity threshold to use for pangenome" help="Single % identity threshold to use for pangenome"/> <section name="cdhit_opts" title="CD-HIT Options" expanded="false"> <param argument="--cd-low" type="integer" value="98" min="0" max="100" label="CD-HIT lowest percentage id" help="Default: 98"/> <param argument="--cd-step" type="float" value="0.5" min="0" label="CD-HIT step size" help="Default: 0.5"/> <param argument="--cd-core-off" type="boolean" truevalue="--cd-core-off" falsevalue="" checked="false" label="Don't extract core families during CD-HIT clustering" help="Default: Extract core families"/> </section> <section name="blast_opts" title="BLAST Options" expanded="false"> <param argument="--evalue" type="float" value="1E-6" min="0" label="E-value for BLAST hit filtering" help="Default: 1E-6"/> <param argument="--diamond" type="boolean" truevalue="--diamond" falsevalue="" checked="false" label="Use DIAMOND instead of BLAST. Incompatible with --nucl"/> <param argument="--diamond-split" type="boolean" truevalue="--diamond-split" falsevalue="" checked="false" label="Split DIAMOND files into batches"/> <param argument="--hsp-len" type="float" value="0" min="0" max="1" label="Remove BLAST HSPs proportion threshold" help="Remove BLAST HSPs that are less than this proportion of query length."/> </section> <section name="mcl_opts" title="MCL Options" expanded="false"> <param argument="--flat" type="float" value="1.5" min="0" label="MCL inflation value"/> </section> </when> <when value="no"/> </conditional> </section> <section name="para_opts" title="Paralog Classification" expanded="false"> <param argument="--para-off" type="boolean" truevalue="--para-off" falsevalue="" checked="false" label="Switch off paralog identification"/> <param argument="--classify-off" type="boolean" truevalue="--classify-off" falsevalue="" checked="false" label="Do not classify paralogs"/> </section> <section name="output_opts" title="Output Options" expanded="true"> <param argument="--align" type="boolean" truevalue="--align" falsevalue="" checked="false" label="Produce alignments" help="Align all genes and produce core/pangenome alignments."/> <param argument="--rplots" type="boolean" truevalue="--rplots" falsevalue="" checked="false" label="Generate R plots" help="Plot summaries using R."/> </section> </inputs> <outputs> <data name="pangenome_summary" format="txt" from_work_dir="results/PIRATE.pangenome_summary.txt" label="${tool.name} on ${on_string}: Pangenome Summary"/> <data name="pirate_gene_families" format="tsv" from_work_dir="results/PIRATE.gene_families.ordered.tsv" label="${tool.name} on ${on_string}: Tabular summary of all gene families"/> <data name="pirate_unique_alleles" format="tsv" from_work_dir="results/PIRATE.unique_alleles.tsv" label="${tool.name} on ${on_string}: Tabular summary of all unique alleles"/> <data name="pirate_presence_absence_fasta" format="fasta" from_work_dir="results/binary_presence_absence.fasta" label="${tool.name} on ${on_string}: Binary Presence/Absence data"/> <data name="pirate_presence_absence_nwk" format="newick" from_work_dir="results/binary_presence_absence.nwk" label="${tool.name} on ${on_string}: Binary Presence/Absence Newick data"/> <data name="pangenome_gfa" format="gfa1" from_work_dir="results/pangenome.gfa" label="${tool.name} on ${on_string}: Pangenome GFA"/> <data name="pirate_rep_sequences_ffn" format="fasta" from_work_dir="results/representative_sequences.ffn" label="${tool.name} on ${on_string}: Representative sequences for each gene family as nucleotide"/> <data name="pirate_rep_sequences_faa" format="fasta" from_work_dir="results/representative_sequences.faa" label="${tool.name} on ${on_string}: Representative sequences for each gene family as amino acid"/> <data name="pirate_core_aln" format="fasta" from_work_dir="results/core_alignment.fasta" label="${tool.name} on ${on_string}: Core Alignment FASTA"> <filter>output_opts['align']</filter> </data> <data name="pirate_core_gff" format="gff" from_work_dir="results/core_alignment.gff" label="${tool.name} on ${on_string}: Core Alignment GFF"> <filter>output_opts['align']</filter> </data> <data name="pirate_pangenome_aln" format="fasta" from_work_dir="results/pangenome_alignment.fasta" label="${tool.name} on ${on_string}: Pangenome Alignment FASTA"> <filter>output_opts['align']</filter> </data> <data name="pirate_pangenome_gff" format="gff" from_work_dir="results/pangenome_alignment.gff" label="${tool.name} on ${on_string}: Pangenome Alignment GFF"> <filter>output_opts['align']</filter> </data> <data name="pirate_plots" format="pdf" from_work_dir="results/PIRATE_plots.pdf" label="${tool.name} on ${on_string}: Summary plots of the PIRATE pangenome"> <filter>output_opts['rplots']</filter> </data> </outputs> <tests> <!-- Test 1 : Default parameters--> <test expect_num_outputs="8"> <param name="input_files" location="https://zenodo.org/records/18470711/files/HO_5096_0412.gff,https://zenodo.org/records/18470711/files/MRSA252.gff"/> <section name="global_opts"> <param name="steps" value="50,60,70,80,90,95,98"/> <param name="features" value="CDS"/> <param name="min_len" value="120"/> <conditional name="pan_opts_conditional"> <param name="enable_pan_opt" value="no"/> </conditional> </section> <output name="pangenome_summary" ftype="txt"> <assert_contents> <has_line line="# 4 gene families in 2 genomes."/> <has_n_lines n="13"/> </assert_contents> </output> <output name="pirate_gene_families" ftype="tsv"> <assert_contents> <has_n_lines n="5"/> </assert_contents> </output> <output name="pirate_unique_alleles" ftype="tsv"> <assert_contents> <has_line_matching expression="g03_10\s+g03\s+trpD\s+Anthranilate phosphoribosyltransferase\s+98\s+3\s+1\s+1\s+1\s+1\s+0\s+0\s+0\s+0\s+1\s+Anthranilate phosphoribosyltransferase\(1\)\s+trpD\(1\)\s+243\s+243\s+243\.00\s+MRSA252_00002"/> <has_n_lines n="8"/> </assert_contents> </output> <output name="pirate_presence_absence_fasta" ftype="fasta"> <assert_contents> <has_line line=">HO_5096_0412"/> <has_n_lines n="6"/> </assert_contents> </output> <output name="pirate_presence_absence_nwk" ftype="newick"> <assert_contents> <has_line line="(HO_5096_0412:0.152049416,MRSA252:0.152049416);"/> <has_n_lines n="1"/> </assert_contents> </output> <output name="pangenome_gfa" ftype="gfa1"> <assert_contents> <has_line_matching expression="S\tg01\tA\tRC:i:2"/> <has_n_lines n="7"/> </assert_contents> </output> <output name="pirate_rep_sequences_ffn" ftype="fasta"> <assert_contents> <has_n_lines n="8"/> </assert_contents> </output> <output name="pirate_rep_sequences_faa" ftype="fasta"> <assert_contents> <has_n_lines n="8"/> </assert_contents> </output> </test> <!-- Test 2 : testing align parameter--> <test expect_num_outputs="12"> <param name="input_files" location="https://zenodo.org/records/18470711/files/HO_5096_0412.gff,https://zenodo.org/records/18470711/files/MRSA252.gff"/> <section name="global_opts"> <param name="steps" value="50,60,70,80,90,95,98"/> <param name="features" value="CDS"/> <param name="min_len" value="120"/> <conditional name="pan_opts_conditional"> <param name="enable_pan_opt" value="no"/> </conditional> </section> <section name="output_opts"> <param name="align" value="true"/> </section> <output name="pangenome_summary" ftype="txt"> <assert_contents> <has_line line="# 4 gene families in 2 genomes."/> <has_n_lines n="13"/> </assert_contents> </output> <output name="pirate_gene_families" ftype="tsv"> <assert_contents> <has_n_lines n="5"/> </assert_contents> </output> <output name="pirate_unique_alleles" ftype="tsv"> <assert_contents> <has_line_matching expression="g03_10\s+g03\s+trpD\s+Anthranilate phosphoribosyltransferase\s+98\s+3\s+1\s+1\s+1\s+1\s+0\s+0\s+0\s+0\s+1\s+Anthranilate phosphoribosyltransferase\(1\)\s+trpD\(1\)\s+243\s+243\s+243\.00\s+MRSA252_00002"/> <has_n_lines n="8"/> </assert_contents> </output> <output name="pirate_presence_absence_fasta" ftype="fasta"> <assert_contents> <has_line line=">HO_5096_0412"/> <has_n_lines n="6"/> </assert_contents> </output> <output name="pirate_presence_absence_nwk" ftype="newick"> <assert_contents> <has_line line="(HO_5096_0412:0.152049416,MRSA252:0.152049416);"/> <has_n_lines n="1"/> </assert_contents> </output> <output name="pangenome_gfa" ftype="gfa1"> <assert_contents> <has_line_matching expression="S\tg01\tA\tRC:i:2"/> <has_n_lines n="7"/> </assert_contents> </output> <output name="pirate_rep_sequences_ffn" ftype="fasta"> <assert_contents> <has_n_lines n="8"/> </assert_contents> </output> <output name="pirate_rep_sequences_faa" ftype="fasta"> <assert_contents> <has_n_lines n="8"/> </assert_contents> </output> <output name="pirate_pangenome_aln" ftype="fasta"> <assert_contents> <has_line line=">HO_5096_0412"/> <has_n_lines n="4"/> </assert_contents> </output> <output name="pirate_pangenome_gff" ftype="gff"> <assert_contents> <has_line_matching expression="##sequence-region Pangenome 1 3945"/> <has_n_lines n="6"/> </assert_contents> </output> <output name="pirate_core_aln" ftype="fasta"> <assert_contents> <has_line line=">HO_5096_0412"/> <has_n_lines n="4"/> </assert_contents> </output> <output name="pirate_core_gff" ftype="gff"> <assert_contents> <has_line_matching expression="##sequence-region Pangenome 1 2550"/> <has_n_lines n="5"/> </assert_contents> </output> </test> <!-- Advanced pangenome options for pan-genome analysis --> <test expect_num_outputs="8"> <param name="input_files" location="https://zenodo.org/records/18470711/files/HO_5096_0412.gff,https://zenodo.org/records/18470711/files/MRSA252.gff"/> <section name="global_opts"> <param name="steps" value="50,60,70,80,90,95,98"/> <param name="features" value="CDS"/> <param name="min_len" value="120"/> <conditional name="pan_opts_conditional"> <param name="enable_pan_opt" value="yes"/> <param name="perc" value="95"/> <section name="cdhit_opts"> <param name="cd_low" value="98"/> <param name="cd_step" value="0.5"/> <param name="cd_core_off" value="true"/> </section> <section name="blast_opts"> <param name="evalue" value="0.00001"/> <param name="diamond" value="true"/> <param name="diamond_split" value="true"/> <param name="hsp_len" value="0.1"/> </section> <section name="mcl_opts"> <param name="flat" value="2.0"/> </section> </conditional> </section> <section name="output_opts"> <param name="align" value="false"/> </section> <output name="pangenome_summary" ftype="txt"> <assert_contents> <has_line line="# 4 gene families in 2 genomes."/> <has_n_lines n="13"/> </assert_contents> </output> <output name="pirate_gene_families" ftype="tsv"> <assert_contents> <has_n_lines n="5"/> </assert_contents> </output> <output name="pirate_unique_alleles" ftype="tsv"> <assert_contents> <has_line_matching expression="g01_09\s+g01\s+trpD\s+Anthranilate phosphoribosyltransferase\s+98\s+3\s+1\s+1\s+1\s+1\s+0\s+0\s+0\s+0\s+1\s+Anthranilate phosphoribosyltransferase\(1\)\s+trpD\(1\)\s+243\s+243\s+243\.00\s+\s+MRSA252_00002"/> <has_n_lines n="9"/> </assert_contents> </output> <output name="pirate_presence_absence_fasta" ftype="fasta"> <assert_contents> <has_line line=">HO_5096_0412"/> <has_n_lines n="6"/> </assert_contents> </output> <output name="pirate_presence_absence_nwk" ftype="newick"> <assert_contents> <has_line line="(HO_5096_0412:0.152049416,MRSA252:0.152049416);"/> <has_n_lines n="1"/> </assert_contents> </output> <output name="pangenome_gfa" ftype="gfa1"> <assert_contents> <has_line_matching expression="S\tg01\tA\tRC:i:2"/> <has_n_lines n="7"/> </assert_contents> </output> <output name="pirate_rep_sequences_ffn" ftype="fasta"> <assert_contents> <has_n_lines n="8"/> </assert_contents> </output> <output name="pirate_rep_sequences_faa" ftype="fasta"> <assert_contents> <has_n_lines n="8"/> </assert_contents> </output> </test> <!-- Test 04: Tesing PDF Reports --> <test expect_num_outputs="9"> <param name="input_files" location="https://zenodo.org/records/18470711/files/HO_5096_0412.gff,https://zenodo.org/records/18470711/files/MRSA252.gff"/> <section name="global_opts"> <param name="steps" value="50,60,70,80,90,95,98"/> <param name="features" value="CDS"/> <param name="min_len" value="120"/> <conditional name="pan_opts_conditional"> <param name="enable_pan_opt" value="no"/> </conditional> </section> <section name="output_opts"> <param name="rplots" value="true"/> </section> <output name="pangenome_summary" ftype="txt"> <assert_contents> <has_line line="# 4 gene families in 2 genomes."/> <has_n_lines n="13"/> </assert_contents> </output> <output name="pirate_gene_families" ftype="tsv"> <assert_contents> <has_n_lines n="5"/> </assert_contents> </output> <output name="pirate_unique_alleles" ftype="tsv"> <assert_contents> <has_line_matching expression="g03_10\s+g03\s+trpD\s+Anthranilate phosphoribosyltransferase\s+98\s+3\s+1\s+1\s+1\s+1\s+0\s+0\s+0\s+0\s+1\s+Anthranilate phosphoribosyltransferase\(1\)\s+trpD\(1\)\s+243\s+243\s+243\.00\s+MRSA252_00002"/> <has_n_lines n="8"/> </assert_contents> </output> <output name="pirate_presence_absence_fasta" ftype="fasta"> <assert_contents> <has_line line=">HO_5096_0412"/> <has_n_lines n="6"/> </assert_contents> </output> <output name="pirate_presence_absence_nwk" ftype="newick"> <assert_contents> <has_line line="(HO_5096_0412:0.152049416,MRSA252:0.152049416);"/> <has_n_lines n="1"/> </assert_contents> </output> <output name="pangenome_gfa" ftype="gfa1"> <assert_contents> <has_line_matching expression="S\tg01\tA\tRC:i:2"/> <has_n_lines n="7"/> </assert_contents> </output> <output name="pirate_rep_sequences_ffn" ftype="fasta"> <assert_contents> <has_n_lines n="8"/> </assert_contents> </output> <output name="pirate_rep_sequences_faa" ftype="fasta"> <assert_contents> <has_n_lines n="8"/> </assert_contents> </output> <output name="pirate_plots" ftype="pdf"> <assert_contents> <has_size value="14397" delta="100"/> </assert_contents> </output> </test> </tests> <help><![CDATA[ **PIRATE** (Pangenome Iterative Refinement and Threshold Evaluation) PIRATE is a pangenomics tool that allows for the iterative refinement of pangenomes using multiple identity thresholds. It is designed to handle highly divergent pangenomes and identify orthologs across different evolutionary scales. **INPUTS** - A collection of gff3 files. **CORE OUTPUTS** 1. Pangenome Summary - Summary statistics of gene number and frequency in the pangenome 2. Tabular summary of all gene families - Complete gene family catalog with one row per family. Families split during paralog detection are labeled with underscores and numbers (e.g., g0001_1, g0001_2). Families are ordered by syntenic position in the pangenome graph. 3. Tabular summary of all unique alleles - Catalog of unique alleles per gene family, defined as distinct MCL sub-clusters at higher identity thresholds 4. Binary Presence/Absence data - Binary gene family presence/absence matrix in FASTA format 5. Binary Presence/Absence Newick data - FastTree phylogeny constructed from the binary presence/absence matrix 6. Pangenome GFA - Network representation of gene family connections in GFA format (can be visualized with Bandage) 7. Representative sequences for each gene family as nucleotide - Nucleotide sequences with the longest sequence per family selected as representative (genomes ordered alphabetically) 8. Representative sequences for each gene family as amino acid - Amino acid sequences corresponding to the nucleotide set **OPTIONAL OUTPUTS** 1. Core Alignment FASTA - MAFFT-aligned core genome sequences, ordered by gene family table. Reverse-translated when created from CDS. Multi-copy genes represented as ? characters. 2. Core Alignment GFF - Annotation coordinates and gene/product information for the core alignment 3. Pangenome Alignment FASTA - MAFFT-aligned full pangenome sequences with the same characteristics as core alignment 4. Pangenome Alignment GFF - Annotation coordinates for the pangenome alignment 5. Summary plots of the PIRATE pangenome - Visualization plots summarizing the pangenome analysis ]]></help> <expand macro="citations"/> <expand macro="creator"/> </tool>
