Mercurial > repos > iuc > pirate
comparison pirate.xml @ 0:ef07a43227a6 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/pirate commit fd6bda0b520e35e43c29f35c10d5b0704f6f4f82
| author | iuc |
|---|---|
| date | Wed, 04 Feb 2026 11:52:07 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:ef07a43227a6 |
|---|---|
| 1 <tool id="pirate" name="PIRATE" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description>Pangenome Iterative Refinement and Threshold Evaluation</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="requirements"/> | |
| 7 <command detect_errors="exit_code"><![CDATA[ | |
| 8 ## Create input directory and symlink GFFs | |
| 9 #import re | |
| 10 mkdir input_gffs && | |
| 11 #for $f in $input_files: | |
| 12 #set identifier = re.sub('[^\s\w\-\\.]','_',str($f.element_identifier)) | |
| 13 ln -s '$f' 'input_gffs/$identifier' && | |
| 14 #end for | |
| 15 | |
| 16 PIRATE | |
| 17 --input input_gffs/ | |
| 18 --steps '$steps' | |
| 19 --features '$features' | |
| 20 --min-len $min_len | |
| 21 $nucl | |
| 22 $para_off | |
| 23 $classify_off | |
| 24 $align | |
| 25 $rplots | |
| 26 --threads "\${GALAXY_SLOTS:-8}" | |
| 27 ## Pan-opt and advanced parameters | |
| 28 #if str($global_opts.pan_opts_conditional.enable_pan_opt) == 'yes': | |
| 29 --pan-opt ' | |
| 30 --perc $global_opts.pan_opts_conditional.perc | |
| 31 --cd-low $global_opts.pan_opts_conditional.cdhit_opts.cd_low | |
| 32 --cd-step $global_opts.pan_opts_conditional.cdhit_opts.cd_step | |
| 33 $global_opts.pan_opts_conditional.cdhit_opts.cd_core_off | |
| 34 --evalue $global_opts.pan_opts_conditional.blast_opts.evalue | |
| 35 $global_opts.pan_opts_conditional.blast_opts.diamond | |
| 36 $global_opts.pan_opts_conditional.blast_opts.diamond_split | |
| 37 --hsp-len $global_opts.pan_opts_conditional.blast_opts.hsp_len | |
| 38 --flat $global_opts.pan_opts_conditional.mcl_opts.flat | |
| 39 ' | |
| 40 #end if | |
| 41 --output results/ | |
| 42 ]]></command> | |
| 43 <inputs> | |
| 44 <param argument="--input" name="input_files" type="data" format="gff,gff3" multiple="true" label="Input GFF files" help="Select all GFF files for pangenome construction"/> | |
| 45 | |
| 46 <section name="global_opts" title="Global Options" expanded="true"> | |
| 47 <param argument="--steps" type="text" value="50,60,70,80,90,95,98" label="Identity thresholds" help="Comma-separated list of % identity thresholds"/> | |
| 48 <param argument="--features" type="text" value="CDS" label="Features" help="Features to use (e.g., CDS, tRNA). Multiple could be specified using commas."/> | |
| 49 <param argument="--min-len" type="integer" value="120" min="0" label="Minimum feature length"/> | |
| 50 <param argument="--nucl" type="boolean" truevalue="--nucl" falsevalue="" checked="false" label="Do not translate to Amino Acids" /> | |
| 51 | |
| 52 <conditional name="pan_opts_conditional"> | |
| 53 <param name="enable_pan_opt" type="select" label="Enable advanced pangenome options" help="Enable --pan-opt and advanced pangenome parameters"> | |
| 54 <option value="no" selected="true">No</option> | |
| 55 <option value="yes">Yes</option> | |
| 56 </param> | |
| 57 <when value="yes"> | |
| 58 <param argument="--perc" type="integer" value="98" optional="true" min="0" max="100" label="Single % identity threshold to use for pangenome" help="Single % identity threshold to use for pangenome"/> | |
| 59 | |
| 60 <section name="cdhit_opts" title="CD-HIT Options" expanded="false"> | |
| 61 <param argument="--cd-low" type="integer" value="98" min="0" max="100" label="CD-HIT lowest percentage id" help="Default: 98"/> | |
| 62 <param argument="--cd-step" type="float" value="0.5" min="0" label="CD-HIT step size" help="Default: 0.5"/> | |
| 63 <param argument="--cd-core-off" type="boolean" truevalue="--cd-core-off" falsevalue="" checked="false" label="Don't extract core families during CD-HIT clustering" help="Default: Extract core families"/> | |
| 64 </section> | |
| 65 | |
| 66 <section name="blast_opts" title="BLAST Options" expanded="false"> | |
| 67 <param argument="--evalue" type="float" value="1E-6" min="0" label="E-value for BLAST hit filtering" help="Default: 1E-6"/> | |
| 68 <param argument="--diamond" type="boolean" truevalue="--diamond" falsevalue="" checked="false" label="Use DIAMOND instead of BLAST. Incompatible with --nucl"/> | |
| 69 <param argument="--diamond-split" type="boolean" truevalue="--diamond-split" falsevalue="" checked="false" label="Split DIAMOND files into batches"/> | |
| 70 <param argument="--hsp-len" type="float" value="0" min="0" max="1" label="Remove BLAST HSPs proportion threshold" help="Remove BLAST HSPs that are less than this proportion of query length."/> | |
| 71 </section> | |
| 72 | |
| 73 <section name="mcl_opts" title="MCL Options" expanded="false"> | |
| 74 <param argument="--flat" type="float" value="1.5" min="0" label="MCL inflation value"/> | |
| 75 </section> | |
| 76 </when> | |
| 77 <when value="no"/> | |
| 78 </conditional> | |
| 79 </section> | |
| 80 | |
| 81 <section name="para_opts" title="Paralog Classification" expanded="false"> | |
| 82 <param argument="--para-off" type="boolean" truevalue="--para-off" falsevalue="" checked="false" label="Switch off paralog identification"/> | |
| 83 <param argument="--classify-off" type="boolean" truevalue="--classify-off" falsevalue="" checked="false" label="Do not classify paralogs"/> | |
| 84 </section> | |
| 85 | |
| 86 <section name="output_opts" title="Output Options" expanded="true"> | |
| 87 <param argument="--align" type="boolean" truevalue="--align" falsevalue="" checked="false" label="Produce alignments" help="Align all genes and produce core/pangenome alignments."/> | |
| 88 <param argument="--rplots" type="boolean" truevalue="--rplots" falsevalue="" checked="false" label="Generate R plots" help="Plot summaries using R."/> | |
| 89 </section> | |
| 90 </inputs> | |
| 91 | |
| 92 <outputs> | |
| 93 <data name="pangenome_summary" format="txt" from_work_dir="results/PIRATE.pangenome_summary.txt" label="${tool.name} on ${on_string}: Pangenome Summary"/> | |
| 94 <data name="pirate_gene_families" format="tsv" from_work_dir="results/PIRATE.gene_families.ordered.tsv" label="${tool.name} on ${on_string}: Tabular summary of all gene families"/> | |
| 95 <data name="pirate_unique_alleles" format="tsv" from_work_dir="results/PIRATE.unique_alleles.tsv" label="${tool.name} on ${on_string}: Tabular summary of all unique alleles"/> | |
| 96 <data name="pirate_presence_absence_fasta" format="fasta" from_work_dir="results/binary_presence_absence.fasta" label="${tool.name} on ${on_string}: Binary Presence/Absence data"/> | |
| 97 <data name="pirate_presence_absence_nwk" format="newick" from_work_dir="results/binary_presence_absence.nwk" label="${tool.name} on ${on_string}: Binary Presence/Absence Newick data"/> | |
| 98 <data name="pangenome_gfa" format="gfa1" from_work_dir="results/pangenome.gfa" label="${tool.name} on ${on_string}: Pangenome GFA"/> | |
| 99 <data name="pirate_rep_sequences_ffn" format="fasta" from_work_dir="results/representative_sequences.ffn" label="${tool.name} on ${on_string}: Representative sequences for each gene family as nucleotide"/> | |
| 100 <data name="pirate_rep_sequences_faa" format="fasta" from_work_dir="results/representative_sequences.faa" label="${tool.name} on ${on_string}: Representative sequences for each gene family as amino acid"/> | |
| 101 | |
| 102 <data name="pirate_core_aln" format="fasta" from_work_dir="results/core_alignment.fasta" label="${tool.name} on ${on_string}: Core Alignment FASTA"> | |
| 103 <filter>output_opts['align']</filter> | |
| 104 </data> | |
| 105 | |
| 106 <data name="pirate_core_gff" format="gff" from_work_dir="results/core_alignment.gff" label="${tool.name} on ${on_string}: Core Alignment GFF"> | |
| 107 <filter>output_opts['align']</filter> | |
| 108 </data> | |
| 109 | |
| 110 <data name="pirate_pangenome_aln" format="fasta" from_work_dir="results/pangenome_alignment.fasta" label="${tool.name} on ${on_string}: Pangenome Alignment FASTA"> | |
| 111 <filter>output_opts['align']</filter> | |
| 112 </data> | |
| 113 | |
| 114 <data name="pirate_pangenome_gff" format="gff" from_work_dir="results/pangenome_alignment.gff" label="${tool.name} on ${on_string}: Pangenome Alignment GFF"> | |
| 115 <filter>output_opts['align']</filter> | |
| 116 </data> | |
| 117 | |
| 118 <data name="pirate_plots" format="pdf" from_work_dir="results/PIRATE_plots.pdf" label="${tool.name} on ${on_string}: Summary plots of the PIRATE pangenome"> | |
| 119 <filter>output_opts['rplots']</filter> | |
| 120 </data> | |
| 121 </outputs> | |
| 122 | |
| 123 <tests> | |
| 124 <!-- Test 1 : Default parameters--> | |
| 125 <test expect_num_outputs="8"> | |
| 126 <param name="input_files" location="https://zenodo.org/records/18470711/files/HO_5096_0412.gff,https://zenodo.org/records/18470711/files/MRSA252.gff"/> | |
| 127 <section name="global_opts"> | |
| 128 <param name="steps" value="50,60,70,80,90,95,98"/> | |
| 129 <param name="features" value="CDS"/> | |
| 130 <param name="min_len" value="120"/> | |
| 131 <conditional name="pan_opts_conditional"> | |
| 132 <param name="enable_pan_opt" value="no"/> | |
| 133 </conditional> | |
| 134 </section> | |
| 135 <output name="pangenome_summary" ftype="txt"> | |
| 136 <assert_contents> | |
| 137 <has_line line="# 4 gene families in 2 genomes."/> | |
| 138 <has_n_lines n="13"/> | |
| 139 </assert_contents> | |
| 140 </output> | |
| 141 <output name="pirate_gene_families" ftype="tsv"> | |
| 142 <assert_contents> | |
| 143 <has_n_lines n="5"/> | |
| 144 </assert_contents> | |
| 145 </output> | |
| 146 <output name="pirate_unique_alleles" ftype="tsv"> | |
| 147 <assert_contents> | |
| 148 <has_line_matching expression="g03_10\s+g03\s+trpD\s+Anthranilate phosphoribosyltransferase\s+98\s+3\s+1\s+1\s+1\s+1\s+0\s+0\s+0\s+0\s+1\s+Anthranilate phosphoribosyltransferase\(1\)\s+trpD\(1\)\s+243\s+243\s+243\.00\s+MRSA252_00002"/> | |
| 149 <has_n_lines n="8"/> | |
| 150 </assert_contents> | |
| 151 </output> | |
| 152 <output name="pirate_presence_absence_fasta" ftype="fasta"> | |
| 153 <assert_contents> | |
| 154 <has_line line=">HO_5096_0412"/> | |
| 155 <has_n_lines n="6"/> | |
| 156 </assert_contents> | |
| 157 </output> | |
| 158 <output name="pirate_presence_absence_nwk" ftype="newick"> | |
| 159 <assert_contents> | |
| 160 <has_line line="(HO_5096_0412:0.152049416,MRSA252:0.152049416);"/> | |
| 161 <has_n_lines n="1"/> | |
| 162 </assert_contents> | |
| 163 </output> | |
| 164 <output name="pangenome_gfa" ftype="gfa1"> | |
| 165 <assert_contents> | |
| 166 <has_line_matching expression="S\tg01\tA\tRC:i:2"/> | |
| 167 <has_n_lines n="7"/> | |
| 168 </assert_contents> | |
| 169 </output> | |
| 170 <output name="pirate_rep_sequences_ffn" ftype="fasta"> | |
| 171 <assert_contents> | |
| 172 <has_n_lines n="8"/> | |
| 173 </assert_contents> | |
| 174 </output> | |
| 175 <output name="pirate_rep_sequences_faa" ftype="fasta"> | |
| 176 <assert_contents> | |
| 177 <has_n_lines n="8"/> | |
| 178 </assert_contents> | |
| 179 </output> | |
| 180 </test> | |
| 181 <!-- Test 2 : testing align parameter--> | |
| 182 <test expect_num_outputs="12"> | |
| 183 <param name="input_files" location="https://zenodo.org/records/18470711/files/HO_5096_0412.gff,https://zenodo.org/records/18470711/files/MRSA252.gff"/> | |
| 184 <section name="global_opts"> | |
| 185 <param name="steps" value="50,60,70,80,90,95,98"/> | |
| 186 <param name="features" value="CDS"/> | |
| 187 <param name="min_len" value="120"/> | |
| 188 <conditional name="pan_opts_conditional"> | |
| 189 <param name="enable_pan_opt" value="no"/> | |
| 190 </conditional> | |
| 191 </section> | |
| 192 <section name="output_opts"> | |
| 193 <param name="align" value="true"/> | |
| 194 </section> | |
| 195 <output name="pangenome_summary" ftype="txt"> | |
| 196 <assert_contents> | |
| 197 <has_line line="# 4 gene families in 2 genomes."/> | |
| 198 <has_n_lines n="13"/> | |
| 199 </assert_contents> | |
| 200 </output> | |
| 201 <output name="pirate_gene_families" ftype="tsv"> | |
| 202 <assert_contents> | |
| 203 <has_n_lines n="5"/> | |
| 204 </assert_contents> | |
| 205 </output> | |
| 206 <output name="pirate_unique_alleles" ftype="tsv"> | |
| 207 <assert_contents> | |
| 208 <has_line_matching expression="g03_10\s+g03\s+trpD\s+Anthranilate phosphoribosyltransferase\s+98\s+3\s+1\s+1\s+1\s+1\s+0\s+0\s+0\s+0\s+1\s+Anthranilate phosphoribosyltransferase\(1\)\s+trpD\(1\)\s+243\s+243\s+243\.00\s+MRSA252_00002"/> | |
| 209 <has_n_lines n="8"/> | |
| 210 </assert_contents> | |
| 211 </output> | |
| 212 <output name="pirate_presence_absence_fasta" ftype="fasta"> | |
| 213 <assert_contents> | |
| 214 <has_line line=">HO_5096_0412"/> | |
| 215 <has_n_lines n="6"/> | |
| 216 </assert_contents> | |
| 217 </output> | |
| 218 <output name="pirate_presence_absence_nwk" ftype="newick"> | |
| 219 <assert_contents> | |
| 220 <has_line line="(HO_5096_0412:0.152049416,MRSA252:0.152049416);"/> | |
| 221 <has_n_lines n="1"/> | |
| 222 </assert_contents> | |
| 223 </output> | |
| 224 <output name="pangenome_gfa" ftype="gfa1"> | |
| 225 <assert_contents> | |
| 226 <has_line_matching expression="S\tg01\tA\tRC:i:2"/> | |
| 227 <has_n_lines n="7"/> | |
| 228 </assert_contents> | |
| 229 </output> | |
| 230 <output name="pirate_rep_sequences_ffn" ftype="fasta"> | |
| 231 <assert_contents> | |
| 232 <has_n_lines n="8"/> | |
| 233 </assert_contents> | |
| 234 </output> | |
| 235 <output name="pirate_rep_sequences_faa" ftype="fasta"> | |
| 236 <assert_contents> | |
| 237 <has_n_lines n="8"/> | |
| 238 </assert_contents> | |
| 239 </output> | |
| 240 <output name="pirate_pangenome_aln" ftype="fasta"> | |
| 241 <assert_contents> | |
| 242 <has_line line=">HO_5096_0412"/> | |
| 243 <has_n_lines n="4"/> | |
| 244 </assert_contents> | |
| 245 </output> | |
| 246 <output name="pirate_pangenome_gff" ftype="gff"> | |
| 247 <assert_contents> | |
| 248 <has_line_matching expression="##sequence-region Pangenome 1 3945"/> | |
| 249 <has_n_lines n="6"/> | |
| 250 </assert_contents> | |
| 251 </output> | |
| 252 <output name="pirate_core_aln" ftype="fasta"> | |
| 253 <assert_contents> | |
| 254 <has_line line=">HO_5096_0412"/> | |
| 255 <has_n_lines n="4"/> | |
| 256 </assert_contents> | |
| 257 </output> | |
| 258 <output name="pirate_core_gff" ftype="gff"> | |
| 259 <assert_contents> | |
| 260 <has_line_matching expression="##sequence-region Pangenome 1 2550"/> | |
| 261 <has_n_lines n="5"/> | |
| 262 </assert_contents> | |
| 263 </output> | |
| 264 </test> | |
| 265 | |
| 266 <!-- Advanced pangenome options for pan-genome analysis --> | |
| 267 <test expect_num_outputs="8"> | |
| 268 <param name="input_files" location="https://zenodo.org/records/18470711/files/HO_5096_0412.gff,https://zenodo.org/records/18470711/files/MRSA252.gff"/> | |
| 269 <section name="global_opts"> | |
| 270 <param name="steps" value="50,60,70,80,90,95,98"/> | |
| 271 <param name="features" value="CDS"/> | |
| 272 <param name="min_len" value="120"/> | |
| 273 <conditional name="pan_opts_conditional"> | |
| 274 <param name="enable_pan_opt" value="yes"/> | |
| 275 <param name="perc" value="95"/> | |
| 276 <section name="cdhit_opts"> | |
| 277 <param name="cd_low" value="98"/> | |
| 278 <param name="cd_step" value="0.5"/> | |
| 279 <param name="cd_core_off" value="true"/> | |
| 280 </section> | |
| 281 <section name="blast_opts"> | |
| 282 <param name="evalue" value="0.00001"/> | |
| 283 <param name="diamond" value="true"/> | |
| 284 <param name="diamond_split" value="true"/> | |
| 285 <param name="hsp_len" value="0.1"/> | |
| 286 </section> | |
| 287 <section name="mcl_opts"> | |
| 288 <param name="flat" value="2.0"/> | |
| 289 </section> | |
| 290 </conditional> | |
| 291 </section> | |
| 292 <section name="output_opts"> | |
| 293 <param name="align" value="false"/> | |
| 294 </section> | |
| 295 <output name="pangenome_summary" ftype="txt"> | |
| 296 <assert_contents> | |
| 297 <has_line line="# 4 gene families in 2 genomes."/> | |
| 298 <has_n_lines n="13"/> | |
| 299 </assert_contents> | |
| 300 </output> | |
| 301 <output name="pirate_gene_families" ftype="tsv"> | |
| 302 <assert_contents> | |
| 303 <has_n_lines n="5"/> | |
| 304 </assert_contents> | |
| 305 </output> | |
| 306 <output name="pirate_unique_alleles" ftype="tsv"> | |
| 307 <assert_contents> | |
| 308 <has_line_matching expression="g01_09\s+g01\s+trpD\s+Anthranilate phosphoribosyltransferase\s+98\s+3\s+1\s+1\s+1\s+1\s+0\s+0\s+0\s+0\s+1\s+Anthranilate phosphoribosyltransferase\(1\)\s+trpD\(1\)\s+243\s+243\s+243\.00\s+\s+MRSA252_00002"/> | |
| 309 <has_n_lines n="9"/> | |
| 310 </assert_contents> | |
| 311 </output> | |
| 312 <output name="pirate_presence_absence_fasta" ftype="fasta"> | |
| 313 <assert_contents> | |
| 314 <has_line line=">HO_5096_0412"/> | |
| 315 <has_n_lines n="6"/> | |
| 316 </assert_contents> | |
| 317 </output> | |
| 318 <output name="pirate_presence_absence_nwk" ftype="newick"> | |
| 319 <assert_contents> | |
| 320 <has_line line="(HO_5096_0412:0.152049416,MRSA252:0.152049416);"/> | |
| 321 <has_n_lines n="1"/> | |
| 322 </assert_contents> | |
| 323 </output> | |
| 324 <output name="pangenome_gfa" ftype="gfa1"> | |
| 325 <assert_contents> | |
| 326 <has_line_matching expression="S\tg01\tA\tRC:i:2"/> | |
| 327 <has_n_lines n="7"/> | |
| 328 </assert_contents> | |
| 329 </output> | |
| 330 <output name="pirate_rep_sequences_ffn" ftype="fasta"> | |
| 331 <assert_contents> | |
| 332 <has_n_lines n="8"/> | |
| 333 </assert_contents> | |
| 334 </output> | |
| 335 <output name="pirate_rep_sequences_faa" ftype="fasta"> | |
| 336 <assert_contents> | |
| 337 <has_n_lines n="8"/> | |
| 338 </assert_contents> | |
| 339 </output> | |
| 340 </test> | |
| 341 | |
| 342 <!-- Test 04: Tesing PDF Reports --> | |
| 343 <test expect_num_outputs="9"> | |
| 344 <param name="input_files" location="https://zenodo.org/records/18470711/files/HO_5096_0412.gff,https://zenodo.org/records/18470711/files/MRSA252.gff"/> | |
| 345 <section name="global_opts"> | |
| 346 <param name="steps" value="50,60,70,80,90,95,98"/> | |
| 347 <param name="features" value="CDS"/> | |
| 348 <param name="min_len" value="120"/> | |
| 349 <conditional name="pan_opts_conditional"> | |
| 350 <param name="enable_pan_opt" value="no"/> | |
| 351 </conditional> | |
| 352 </section> | |
| 353 <section name="output_opts"> | |
| 354 <param name="rplots" value="true"/> | |
| 355 </section> | |
| 356 <output name="pangenome_summary" ftype="txt"> | |
| 357 <assert_contents> | |
| 358 <has_line line="# 4 gene families in 2 genomes."/> | |
| 359 <has_n_lines n="13"/> | |
| 360 </assert_contents> | |
| 361 </output> | |
| 362 <output name="pirate_gene_families" ftype="tsv"> | |
| 363 <assert_contents> | |
| 364 <has_n_lines n="5"/> | |
| 365 </assert_contents> | |
| 366 </output> | |
| 367 <output name="pirate_unique_alleles" ftype="tsv"> | |
| 368 <assert_contents> | |
| 369 <has_line_matching expression="g03_10\s+g03\s+trpD\s+Anthranilate phosphoribosyltransferase\s+98\s+3\s+1\s+1\s+1\s+1\s+0\s+0\s+0\s+0\s+1\s+Anthranilate phosphoribosyltransferase\(1\)\s+trpD\(1\)\s+243\s+243\s+243\.00\s+MRSA252_00002"/> | |
| 370 <has_n_lines n="8"/> | |
| 371 </assert_contents> | |
| 372 </output> | |
| 373 <output name="pirate_presence_absence_fasta" ftype="fasta"> | |
| 374 <assert_contents> | |
| 375 <has_line line=">HO_5096_0412"/> | |
| 376 <has_n_lines n="6"/> | |
| 377 </assert_contents> | |
| 378 </output> | |
| 379 <output name="pirate_presence_absence_nwk" ftype="newick"> | |
| 380 <assert_contents> | |
| 381 <has_line line="(HO_5096_0412:0.152049416,MRSA252:0.152049416);"/> | |
| 382 <has_n_lines n="1"/> | |
| 383 </assert_contents> | |
| 384 </output> | |
| 385 <output name="pangenome_gfa" ftype="gfa1"> | |
| 386 <assert_contents> | |
| 387 <has_line_matching expression="S\tg01\tA\tRC:i:2"/> | |
| 388 <has_n_lines n="7"/> | |
| 389 </assert_contents> | |
| 390 </output> | |
| 391 <output name="pirate_rep_sequences_ffn" ftype="fasta"> | |
| 392 <assert_contents> | |
| 393 <has_n_lines n="8"/> | |
| 394 </assert_contents> | |
| 395 </output> | |
| 396 <output name="pirate_rep_sequences_faa" ftype="fasta"> | |
| 397 <assert_contents> | |
| 398 <has_n_lines n="8"/> | |
| 399 </assert_contents> | |
| 400 </output> | |
| 401 <output name="pirate_plots" ftype="pdf"> | |
| 402 <assert_contents> | |
| 403 <has_size value="14397" delta="100"/> | |
| 404 </assert_contents> | |
| 405 </output> | |
| 406 </test> | |
| 407 | |
| 408 </tests> | |
| 409 <help><![CDATA[ | |
| 410 **PIRATE** (Pangenome Iterative Refinement and Threshold Evaluation) | |
| 411 | |
| 412 PIRATE is a pangenomics tool that allows for the iterative refinement of pangenomes using multiple identity thresholds. It is designed to handle highly divergent pangenomes and identify orthologs across different evolutionary scales. | |
| 413 | |
| 414 **INPUTS** | |
| 415 | |
| 416 - A collection of gff3 files. | |
| 417 | |
| 418 **CORE OUTPUTS** | |
| 419 | |
| 420 1. Pangenome Summary - Summary statistics of gene number and frequency in the pangenome | |
| 421 2. Tabular summary of all gene families - Complete gene family catalog with one row per family. Families split during paralog detection are labeled with underscores and numbers (e.g., g0001_1, g0001_2). Families are ordered by syntenic position in the pangenome graph. | |
| 422 3. Tabular summary of all unique alleles - Catalog of unique alleles per gene family, defined as distinct MCL sub-clusters at higher identity thresholds | |
| 423 4. Binary Presence/Absence data - Binary gene family presence/absence matrix in FASTA format | |
| 424 5. Binary Presence/Absence Newick data - FastTree phylogeny constructed from the binary presence/absence matrix | |
| 425 6. Pangenome GFA - Network representation of gene family connections in GFA format (can be visualized with Bandage) | |
| 426 7. Representative sequences for each gene family as nucleotide - Nucleotide sequences with the longest sequence per family selected as representative (genomes ordered alphabetically) | |
| 427 8. Representative sequences for each gene family as amino acid - Amino acid sequences corresponding to the nucleotide set | |
| 428 | |
| 429 **OPTIONAL OUTPUTS** | |
| 430 | |
| 431 1. Core Alignment FASTA - MAFFT-aligned core genome sequences, ordered by gene family table. Reverse-translated when created from CDS. Multi-copy genes represented as ? characters. | |
| 432 2. Core Alignment GFF - Annotation coordinates and gene/product information for the core alignment | |
| 433 3. Pangenome Alignment FASTA - MAFFT-aligned full pangenome sequences with the same characteristics as core alignment | |
| 434 4. Pangenome Alignment GFF - Annotation coordinates for the pangenome alignment | |
| 435 5. Summary plots of the PIRATE pangenome - Visualization plots summarizing the pangenome analysis | |
| 436 | |
| 437 | |
| 438 ]]></help> | |
| 439 <expand macro="citations"/> | |
| 440 <expand macro="creator"/> | |
| 441 </tool> |
