Mercurial > repos > galaxy-australia > panaroo
diff panaroo.xml @ 1:b6a78d286482 draft
planemo upload for repository https://github.com/usegalaxy-au/tools-au/tree/master/tools/panaroo commit 23afccfad9fc0d2a4b91fd391f4847062fd98042
| author | iuc |
|---|---|
| date | Fri, 11 Apr 2025 11:23:50 +0000 |
| parents | 50483f852947 |
| children | b05be6316263 |
line wrap: on
line diff
--- a/panaroo.xml Fri Apr 11 07:46:04 2025 +0000 +++ b/panaroo.xml Fri Apr 11 11:23:50 2025 +0000 @@ -6,421 +6,436 @@ <expand macro="edam_ontology"/> <expand macro="biotools"/> <expand macro="requirements"/> - <stdio> - <exit_code range="1:" /> - <regex match="System..*Exception" - source="both" - level="fatal" - description="Error encountered" /> - </stdio> - <command><![CDATA[ - - mkdir outdir && - - #import re - #set input_directory = 'input_directory' - mkdir $input_directory && - #for $gff in $gff_input_collection: - #set identifier = re.sub('[^\s\w\-\\.]','_',str($gff.element_identifier)) - ln -fs '$gff' '$input_directory/$identifier' && - #end for + <command detect_errors="exit_code"><![CDATA[ + mkdir outdir && + #import re + #set input_directory = 'input_directory' + mkdir $input_directory && + #for $gff in $gff_input_collection: + #set identifier = re.sub('[^\s\w\-\\.]','_',str($gff.element_identifier)) + ln -fs '$gff' '$input_directory/$identifier' && + #end for - panaroo - -t \${GALAXY_SLOTS:-2} - #if str($gen_code) != 'None': - --codon-table $gen_code - #end if - #if str($advanced.adv_options_selector) == "set": - #if $advanced.remove_invalid_gene - $advanced.remove_invalid_gene - #end if - -c '$advanced.matching_option.seq_threshold' - -f '$advanced.matching_option.peptide_threshold' - --len_dif_percent '$advanced.matching_option.length_diff_cutoff' - $advanced.matching_option.merge_paralogs - --search_radius '$advanced.refind_option.search_radius' - --refind_prop_match '$advanced.refind_option.refind_prop_match' - --refind-mode '$advanced.refind_option.refind_mode' - --min_trailing_support '$advanced.graph_correction_option.min_trailing_support' - --trailing_recursive '$advanced.graph_correction_option.trailing_recursive' - --edge_support_threshold '$advanced.graph_correction_option.edge_support_threshold' - --remove_by_consensus '$advanced.graph_correction_option.remove_by_consensus' - --high_var_flag '$advanced.graph_correction_option.high_var_flag' - --min_edge_support_sv '$advanced.graph_correction_option.min_edge_support_sv' - $advanced.graph_correction_option.all_seq_in_graph - $advanced.graph_correction_option.no_clean_edges - - #if $advanced.gene_alignment_option.a != 'None' - -a '$advanced.gene_alignment_option.a' - #end if - - #if '$advanced.gene_alignment_option.aligner' == 'mafft' - --aligner mafft - #else - --aligner '$advanced.gene_alignment_option.aligner' - #end if - #if $advanced.gene_alignment_option.core_subset != '' - --core_subset $advanced.gene_alignment_option.core_subset - #end if - #end if - -i $input_directory/*.gff - -o outdir - --clean-mode $mode - > '$log' && - mv outdir/gene_presence_absence.Rtab outdir/gene_presence_absence_rtab.Rtab && - mv outdir/combined_protein_cdhit_out.txt outdir/combined_protein_cdhit_out.fa && - 2>&1 - + panaroo + --clean-mode '$mode' + #if str($gen_code) != 'None': + --codon-table '$gen_code' + #end if + #if str($advanced.adv_options_selector) == "set": + #if $advanced.remove_invalid_genes + $advanced.remove_invalid_genes + #end if + --threshold '$advanced.matching_option.threshold' + --family_threshold '$advanced.matching_option.family_threshold' + --len_dif_percent '$advanced.matching_option.len_dif_percent' + $advanced.matching_option.merge_paralogs + --search_radius '$advanced.refind_option.search_radius' + --refind_prop_match '$advanced.refind_option.refind_prop_match' + --refind-mode '$advanced.refind_option.refind_mode' + --min_trailing_support '$advanced.graph_correction_option.min_trailing_support' + --trailing_recursive '$advanced.graph_correction_option.trailing_recursive' + --edge_support_threshold '$advanced.graph_correction_option.edge_support_threshold' + --remove_by_consensus '$advanced.graph_correction_option.remove_by_consensus' + --high_var_flag '$advanced.graph_correction_option.high_var_flag' + --min_edge_support_sv '$advanced.graph_correction_option.min_edge_support_sv' + $advanced.graph_correction_option.all_seq_in_graph + $advanced.graph_correction_option.no_clean_edges + #if $advanced.gene_alignment_option.alignment != 'None' + --alignment '$advanced.gene_alignment_option.alignment' + --aligner '$advanced.gene_alignment_option.aligner' + #end if + #if $advanced.gene_alignment_option.core_subset + --core_subset $advanced.gene_alignment_option.core_subset + #end if + #if $advanced.gene_alignment_option.core_entropy_filter + --core_entropy_filter $advanced.gene_alignment_option.core_entropy_filter + #end if + #end if + -i $input_directory/*.gff + -o outdir + -t \${GALAXY_SLOTS:-8} + #if $log_out + 2>&1 | tee '$log' + #end if + && mv outdir/gene_presence_absence.Rtab outdir/gene_presence_absence_rtab.Rtab && + mv outdir/combined_protein_cdhit_out.txt outdir/combined_protein_cdhit_out.fa ]]></command> <inputs> - <param name="gff_input_collection" type="data_collection" format="gff" collection_type="list" label="GFF Input Collection" help="A list of gff files (i.e prokka)"/> - <param name="mode" type="select" label="The stringency mode at which to run panaroo" help="--clean-mode"> - <expand macro="clean_mode"/> + <param name="gff_input_collection" type="data_collection" format="gff" collection_type="list" label="GFF Input Collection" help="A collection of input GFF files"/> + <param name="mode" type="select" label="The stringency mode for Panaroo to run" help="Each of these modes can be fine tuned using the additional parameters in the 'Graph correction' section."> + <option value="strict">Strict</option> + <option value="moderate">Moderate</option> + <option value="sensitive">Sensitive</option> </param> - <param name="gen_code" type="select" label="the codon table user for translation" help="default: 11"> + <param name="gen_code" type="select" label="The Codon table used for translation" help="Default: 11.Bacteria and Archaea"> <expand macro="genetic_code"/> </param> - <conditional name="advanced"> - <param name="adv_options_selector" type="select" label="Set advanced options?" help="Provides additional controls"> + <param name="log_out" type="boolean" label="Output log file?" truevalue="yes" falsevalue="no"/> + <conditional name="advanced"> + <param name="adv_options_selector" type="select" label="Set Advanced Options?" help="Fine Tuning of Panaroo algorithmic parameters"> <option value="set">Set</option> <option value="do_not_set" selected="True">Do not set</option> </param> - <when value="set"> - <param argument="--remove-invalid-genes" name="remove_invalid_gene" type="boolean" truevalue="--remove-invalid-genes" falsevalue="" label="removes annotations that do not conform to the expected Prokka format such as those including premature stop codons" help="--remove-invalid-genes"/> - - <section name="matching_option" title="Matching" expanded="false"> - <param argument="--threshold" name="seq_threshold" type="float" value="0.98" label="sequence identity threshold" help="default: 0.98"/> - <param argument="--family_threshold" name="peptide_threshold" type="float" value="0.7" label="protein family sequence identity threshold" help="default: 0.7"/> - <param argument="--len_dif_percent" name="length_diff_cutoff" type="float" value="0.98" label="length difference cutoff" help="default: 0.98"/> - <param name="merge_paralogs" type="boolean" truevalue="--merge_paralogs" falsevalue="" checked="false" label="do not split paralogs" help="--merge_paralogs"/> - </section> - - <section name="refind_option" title="Refind" expanded="false"> - <param argument="--search_radius" type="integer" value="5000" label="Search radius" help="--search_radius (default: 5000)"/> - <param argument="--refind_prop_match" type="float" value="0.75" label="Gene proportion match" help="default: 0.75"/> - <param argument="--refind_mode" type="select" label="The stringency mode at which to re-find genes" help="default: default"> - <expand macro="refind_mode_option"/> - </param> - </section> + <when value="set"> + <param argument="--remove-invalid-genes" type="boolean" truevalue="--remove-invalid-genes" falsevalue="" label="Remove Invalid Genes" help="Removes annotations that do not conform to the expected Prokka format."/> + + <!--Options for Matching--> + <section name="matching_option" title="Matching" expanded="false"> + <param argument="--threshold" type="float" value="0.98" label="Sequence identity threshold" help="default: 0.98"/> + <param argument="--family_threshold" type="float" value="0.7" label="Protein family sequence identity threshold" help="default: 0.7"/> + <param argument="--len_dif_percent" type="float" value="0.98" label="Length difference cutoff" help="default: 0.98"/> + <param argument="--merge-paralogs" type="boolean" truevalue="--merge_paralogs" falsevalue="" label="Merge Paralogs"/> + </section> + + <!--Options for Refind--> + <section name="refind_option" title="Refind" expanded="false"> + <param argument="--search_radius" type="integer" value="5000" label="Refinding Search radius" help="The distance in nucleotides surronding the neighbour of an accessory gene in which to search for it"/> + <param argument="--refind_prop_match" type="float" value="0.2" label="Refinding Proportion Match" help="he proportion of an accessory gene that must be found in order to consider it a match"/> + <param argument="--refind_mode" type="select" label="Refind Mode" help="Set the stringency mode at which to re-find genes"> + <option value="default" selected="True">Default</option> + <option value="strict">Strict</option> + <option value="off">Off</option> + </param> + </section> - <section name="graph_correction_option" title="Graph Correction" expanded="false"> - <param argument="--min_trailing_support" type="integer" value="2" label="Minimum cluster size to keep a gene called at the end of a contig" help="--min_traiiing_support [relexed mode : 2 is used]"/> - <param argument="--trailing_recursive" type="integer" value="1" label="Number of times to perform recursive trimming of low support nodes near the end of contigs" help="--trailing_recursive [relaxed mode: 1 is used]"/> - <param argument="--edge_support_threshold" type="integer" value="1" label="Edge support threshold" help="--edge_support_threshold [ Minimal edge 1 is used ]"/> - <param argument="--len_outlier_proportion" type="float" value="0.01" label="Length outlier support proportion" help="--length_outlier_support_proportion [default: 0.01]"/> - <param argument="--remove_by_consensus" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Remove consensus" help="--remove_by_consensus [default: False]"/> - <param argument="--high_var_flag" type="integer" value="5" label="Highly variable gene region" help="--high_var_flag [default: 5]"/> - <param argument="--min_edge_support_sv" type="integer" value="2" label="Minimum edge support structural variants" help="--min_edge_support_sv [relaxed mode: 2 is used]"/> - <param argument="--all_seq_in_graph" type="boolean" truevalue="--all_seq_in_graph" falsevalue="" label="Retains all DNA sequence" help="--all_seq_in_graph [default: off]"/> - <param argument="--no_clean_edges" type="boolean" truevalue="--no_clean_edges" falsevalue="" label="Edge filtering in the final output graph" help="--no_clean_edges [default: off]"/> - </section> - - <section name="gene_alignment_option" title="Gene Alignment" expanded="false"> - <param argument="-a" type="select" label="Output alignments of core genes or all genes." help="-a [optional: core or pan; default: None"> - <expand macro="gene_alignment"/> - </param> - <param argument="--aligner" type="select" label="Specify an aligner" help="--aligner [mafft|prank|clustal][default: mafft]"> - <expand macro="gene_aligner"/> - </param> - <param name="codons" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Generate codon alignments" help="--codons"/> - <param name="core_threshold" type="float" value="0.95" label="Core-genome sample threshold" help="--core_threshold [default: 0.95]"/> - <param argument="--core_subset" type="integer" value="" optional="true" label="Subset of the core genome to these many genes" help="--core_subset [default: all]"/> - <param name="core_entropy" type="float" value="0.1" label="Set the Block Mapping and Gathering with Entropy" help="--core_entropy_filter (threshold can be between 0.0 and 1.0) [default: Tukey outlier method]"/> - </section> - </when> + <!--Graph Correction--> + <section name="graph_correction_option" title="Graph Correction" expanded="false"> + <param argument="--min_trailing_support" type="integer" value="2" label="Minimum trailing support" help="Minimum cluster size to keep a gene called at the end of a contig"/> + <param argument="--trailing_recursive" type="integer" value="1" label="Trailing Recursive" help="Number of times to perform recursive trimming of low support nodes near the end of contigs"/> + <param argument="--edge_support_threshold" type="float" value="1" label="Edge support threshold" help="Minimum support required to keep an edge that has been flagged as a possible mis-assembly."/> + <param argument="--len_outlier_proportion" type="float" value="0.01" label="Length outlier support proportion" help="--length_outlier_support_proportion"/> + <param argument="--remove_by_consensus" type="boolean" truevalue="True" falsevalue="False" label="Remove consensus" help="If a gene is called in the same region with similar sequence a minority of the time, remove it."/> + <param argument="--high_var_flag" type="integer" value="5" label="Highly variable gene region" help="Minimum number of nested cycles to call a highly variable gene region."/> + <param argument="--min_edge_support_sv" type="integer" value="2" label="Minimum edge support structural variants" help="Minimum edge support required to call structural variants in the presence/absence sv file"/> + <param argument="--all_seq_in_graph" type="boolean" truevalue="--all_seq_in_graph" falsevalue="" label="Retains all DNA sequence" help="Retains all DNA sequence for each gene cluster in the graph output."/> + <param argument="--no_clean_edges" type="boolean" truevalue="--no_clean_edges" falsevalue="" label="No Clean Edges" help="Turn off edge filtering in the final output graph."/> + </section> + + <!--Gene Alignment--> + <section name="gene_alignment_option" title="Gene Alignment" expanded="false"> + <param argument="--alignment" type="select" label="Output alignments of core genes or all genes."> + <option value="None" selected="True">None</option> + <option value="core">Core genome alignment</option> + <option value="pan">Pan-genome alignment</option> + </param> + <param argument="--aligner" type="select" label="Specify an aligner" help="--aligner [mafft|prank|clustal][default: mafft]"> + <option value="mafft" selected="True">MAFFT</option> + <option value="prank">PRANK</option> + <option value="clustal">Clustal</option> + </param> + <param argument="--codons" type="boolean" label="Generate codon alignments by aligning sequences at the protein level" truevalue="--codons" falsevalue="" help="Generate codon alignments by aligning sequences at the protein level"/> + <param argument="--core_threshold" type="float" value="0.95" label="Core Threshold" help="Core-genome sample threshold"/> + <param argument="--core_subset" type="integer" optional="true" label="Subset of the core genome to these many genes" help="Randomly subset the core genome to these many genes. Default is all genes."/> + <param argument="--core_entropy_filter" type="float" value="0.1" label="Core Entropy Filter" help="Manually set the Block Mapping and Gathering with Entropy (BMGE) filter. By default this is set using the Tukey outlier method."/> + </section> + </when> <when value="do_not_set"/> - </conditional> + </conditional> </inputs> <outputs> - <collection name="output" type="list" label="${tool.name} on ${on_string}: Pangenome output"> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>clstr)" directory="outdir" format="txt" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>txt)" directory="outdir" format="txt" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>gml)" directory="outdir" format="txt" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>Rtab)" directory="outdir" format="tabular" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>csv)" directory="outdir" format="csv" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fasta)" directory="outdir" format="fasta" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fa)" directory="outdir" format="fasta" visible="false" /> - <filter>advanced['adv_options_selector'] != 'set'</filter> - </collection> - <collection name="output_advance" type="list" label="${tool.name} on ${on_string}: Pangenome output (advance)"> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>clstr)" directory="outdir" format="txt" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>txt)" directory="outdir" format="txt" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>gml)" directory="outdir" format="txt" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>Rtab)" directory="outdir" format="tabular" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>csv)" directory="outdir" format="csv" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fasta)" directory="outdir" format="fasta" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fa)" directory="outdir" format="fasta" visible="false" /> - <filter>advanced['adv_options_selector'] == 'set' and advanced['gene_alignment_option']['a'] == 'None'</filter> - </collection> - <collection name="output_pangenome" type="list" label="${tool.name} on ${on_string}: Pangenome alignment output"> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>clstr)" directory="outdir" format="txt" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>txt)" directory="outdir" format="txt" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>gml)" directory="outdir" format="txt" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>Rtab)" directory="outdir" format="tabular" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>csv)" directory="outdir" format="csv" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fasta)" directory="outdir" format="fasta" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fa)" directory="outdir" format="fasta" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>aln)" directory="outdir" format="aln" visible="false" /> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>embl)" directory="outdir" format="embl" visible="false" /> - <filter>advanced['adv_options_selector'] == 'set' and advanced['gene_alignment_option']['a'] != 'None' </filter> - </collection> - <collection name="output_pangenome_fasta" type="list" label="${tool.name} on ${on_string}: Pangenom alignment fasta"> - <discover_datasets pattern="(?P<designation>.+)\.(?P<ext>fas)" directory="outdir/aligned_gene_sequences" format="fasta" visible="false" /> - <filter>advanced['adv_options_selector'] == 'set' and advanced['gene_alignment_option']['a'] != 'None'</filter> - </collection> - <data name="log" format="txt" label="${tool.name} on ${on_string}: log"/> + <!--Panaroo default outputs --> + <collection name="output" type="list" label="${tool.name} on ${on_string}: Pangenome default output"> + <discover_datasets pattern="(?P<designation>.+)\.fasta$" directory="outdir" format="fasta" visible="false"/> + <discover_datasets pattern="(?P<designation>.+)\.fa$" directory="outdir" format="fasta" visible="false"/> + <discover_datasets pattern="(?P<designation>.+)\.txt$" directory="outdir" format="txt" visible="false"/> + <discover_datasets pattern="(?P<designation>.+)\.clstr$" directory="outdir" format="txt" visible="false"/> + <discover_datasets pattern="(?P<designation>.+)\.gml$" directory="outdir" format="txt" visible="false"/> + <discover_datasets pattern="(?P<designation>.+)\.csv$" directory="outdir" format="csv" visible="false"/> + <discover_datasets pattern="(?P<designation>.+)\.Rtab$" directory="outdir" format="tabular" visible="false"/> + <filter>( advanced['adv_options_selector'] != 'set' ) or ( advanced['adv_options_selector'] == 'set' and advanced['gene_alignment_option']['alignment'] == 'None' )</filter> + </collection> + + <!--Panaroo advance alignment outputs --> + <collection name="output_pangenome" type="list" label="${tool.name} on ${on_string}: Pangenome alignment output"> + <discover_datasets pattern="(?P<designation>.+)\.clstr$" directory="outdir" format="txt" visible="false"/> + <discover_datasets pattern="(?P<designation>.+)\.txt$" directory="outdir" format="txt" visible="false"/> + <discover_datasets pattern="(?P<designation>.+)\.gml$" directory="outdir" format="txt" visible="false"/> + <discover_datasets pattern="(?P<designation>.+)\.Rtab$" directory="outdir" format="tabular" visible="false"/> + <discover_datasets pattern="(?P<designation>.+)\.csv$" directory="outdir" format="csv" visible="false"/> + <discover_datasets pattern="(?P<designation>.+)\.fasta$" directory="outdir" format="fasta" visible="false"/> + <discover_datasets pattern="(?P<designation>.+)\.fa$" directory="outdir" format="fasta" visible="false"/> + <discover_datasets pattern="(?P<designation>.+)\.aln$" directory="outdir" format="fasta" visible="false"/> + <discover_datasets pattern="(?P<designation>.+)\.embl$" directory="outdir" format="embl" visible="false"/> + <filter>advanced['adv_options_selector'] == 'set' and advanced['gene_alignment_option']['alignment'] != 'None' </filter> + </collection> + + <!--Pan Genome Aligned FASTA --> + <collection name="output_pangenome_fasta" type="list" label="${tool.name} on ${on_string}: Pangenome Alignment Gene Sequences"> + <discover_datasets pattern="(?P<designation>.+)\.fas$" directory="outdir/aligned_gene_sequences" format="fasta" visible="false"/> + <filter>advanced['adv_options_selector'] == 'set' and advanced['gene_alignment_option']['alignment'] != 'None'</filter> + </collection> + + <!--Panaroo log output --> + <data name="log" format="txt" label="${tool.name} on ${on_string}: Panaroo Log"> + <filter>log_out</filter> + </data> </outputs> <tests> - <!-- run panaroo with default parameters (i.e panaroo -t 2 -i *.gff -o default \-\-clean-mode strict \-\-remove-invalid-genes) --> - <test expect_num_outputs="2"> + <!-- Test 1 : Testing Panaroo with default parameters --> + <test expect_num_outputs="2"> + <param name="mode" value="strict"/> <param name="gen_code" value="11"/> - <param name="mode" value="strict"/> - <param name="adv_options_selector" value="do_not_set"/> + <param name="log_out" value="yes"/> + <conditional name="advanced"> + <param name="adv_options_selector" value="do_not_set"/> + </conditional> <param name="gff_input_collection"> <collection type="list"> <element name="10_small.gff" value="10_small.gff"/> <element name="11_small.gff" value="11_small.gff"/> </collection> </param> - <output_collection name="output" count="13" type="list"> - <element name="combined_DNA_CDS" file="combined_DNA_CDS.fasta" ftype="fa"> - <assert_contents> - <has_n_lines n="18206"/> - </assert_contents> - </element> - <element name="combined_protein_CDS" file="combined_protein_CDS.fasta" ftype="fa"> - <assert_contents> - <has_n_lines n="7048"/> - </assert_contents> - </element> - <element name="combined_protein_cdhit_out" file="combined_protein_cdhit_out.fa" ftype="fa"> - <assert_contents> - <has_n_lines n="5119"/> - </assert_contents> - </element> - <element name="gene_data" file="gene_data.csv" ftype="csv"> - <assert_contents> - <has_text text="KPLBOJCC_00001"/> - <has_text text="NCFNLLIC_00549" /> - </assert_contents> - </element> - <element name="gene_presence_absence" file="gene_presence_absence.csv" ftype="csv"> - <assert_contents> - <has_text text="dcd"/> - <has_text text="trmB"/> - <has_text text="betI_1"/> - </assert_contents> - </element> - <element name="gene_presence_absence_roary" file="gene_presence_absence_roary.csv" ftype="csv"> - <assert_contents> - <has_text text="kstR2_1"/> - <has_text text="ybgJ"/> - </assert_contents> - </element> - <element name="pan_genome_reference" file="pan_genome_reference.fa" ftype="fa"> - <assert_contents> - <has_n_lines n="5055"/> - </assert_contents> - </element> - <element name="struct_presence_absence" file="struct_presence_absence.Rtab" ftype="Rtab"> - <assert_contents> - <has_line_matching expression="Gene\s+10_small\s+11_small"/> - </assert_contents> - </element> - <element name="summary_statistics" file="summary_statistics.txt" ftype="txt"> - <assert_contents> - <has_line line="Core genes	(99% <= strains <= 100%)	251"/> - <has_line line="Total genes	(0% <= strains <= 100%)	251"/> - </assert_contents> - </element> - </output_collection> + <output_collection name="output" count="13" type="list"> + <element name="combined_DNA_CDS" ftype="fasta"> + <assert_contents> + <has_n_lines n="18206"/> + </assert_contents> + </element> + <element name="combined_protein_CDS" ftype="fasta"> + <assert_contents> + <has_n_lines n="7048"/> + </assert_contents> + </element> + <element name="combined_protein_cdhit_out" ftype="fasta"> + <assert_contents> + <has_n_lines n="5119"/> + </assert_contents> + </element> + <element name="pan_genome_reference" ftype="fasta"> + <assert_contents> + <has_n_lines n="5055"/> + </assert_contents> + </element> + <element name="summary_statistics" ftype="txt"> + <assert_contents> + <has_line line="Core genes	(99% <= strains <= 100%)	251"/> + <has_line line="Total genes	(0% <= strains <= 100%)	251"/> + </assert_contents> + </element> + <element name="gene_data" ftype="csv"> + <assert_contents> + <has_text text="KPLBOJCC_00001"/> + <has_text text="NCFNLLIC_00549"/> + </assert_contents> + </element> + <element name="gene_presence_absence" ftype="csv"> + <assert_contents> + <has_text text="dcd"/> + <has_text text="trmB"/> + <has_text text="betI_1"/> + </assert_contents> + </element> + <element name="gene_presence_absence_roary" ftype="csv"> + <assert_contents> + <has_text text="kstR2_1"/> + <has_text text="ybgJ"/> + </assert_contents> + </element> + <element name="struct_presence_absence" ftype="tabular"> + <assert_contents> + <has_line_matching expression="Gene\s+10_small\s+11_small"/> + </assert_contents> + </element> + </output_collection> <output name="log"> <assert_contents> - <has_text text="pre-processing gff3 files..."/> + <has_text text="total seq: 979"/> </assert_contents> </output> </test> - <test expect_num_outputs="2"> - <param name="gen_code" value="11"/> - <param name="mode" value="strict"/> - <param name="adv_options_selector" value="set"/> - <param name="a" value="None"/> - <param name="gff_input_collection"> - <collection type="list"> - <element name="10_small.gff" value="10_small.gff"/> - <element name="11_small.gff" value="11_small.gff"/> - </collection> - </param> - <output_collection name="output_advance" count="13" type="list"> - <element name="combined_DNA_CDS" file="advance/combined_DNA_CDS.fasta" ftype="fa"> - <assert_contents> - <has_n_lines n="18206"/> - </assert_contents> - </element> - <element name="combined_protein_CDS" file="advance/combined_protein_CDS.fasta" ftype="fa"> - <assert_contents> - <has_n_lines n="7048"/> - </assert_contents> - </element> - <element name="combined_protein_cdhit_out" file="advance/combined_protein_cdhit_out.fa" ftype="fa"> - <assert_contents> - <has_n_lines n="5119"/> - </assert_contents> - </element> - <element name="gene_data" ftype="csv"> - <assert_contents> - <has_n_lines n="980"/> - <has_n_columns sep="," n="8"/> - <has_text text="KPLBOJCC_00003"/> - <has_text text="NCFNLLIC_00003"/> - </assert_contents> - </element> - <element name="gene_presence_absence" file="advance/gene_presence_absence.csv" ftype="csv"> - <assert_contents> - <has_text text="recB"/> - <has_text text="recC"/> - <has_text text="rpoB"/> - </assert_contents> - </element> - <element name="gene_presence_absence_roary" file="advance/gene_presence_absence_roary.csv" ftype="csv"> - <assert_contents> - <has_text text="ctpI_2"/> - <has_text text="amiD_1"/> - </assert_contents> - </element> - <element name="pan_genome_reference" file="advance/pan_genome_reference.fa" ftype="fa"> - <assert_contents> - <has_n_lines n="13120"/> - </assert_contents> - </element> - <element name="struct_presence_absence" file="advance/struct_presence_absence.Rtab" ftype="Rtab"> - <assert_contents> - <has_line_matching expression="Gene\s+10_small\s+11_small"/> - </assert_contents> - </element> - <element name="summary_statistics" file="advance/summary_statistics.txt" ftype="txt"> - <assert_contents> - <has_line line="Core genes	(99% <= strains <= 100%)	251"/> - <has_line line="Shell genes	(15% <= strains < 95%)	475"/> - <has_line line="Total genes	(0% <= strains <= 100%)	726"/> - </assert_contents> - </element> + + <!-- Test 2 : Testing Panaroo with Advanced filtering option along with Alignment turned off --> + <test expect_num_outputs="2"> + <param name="gen_code" value="11"/> + <param name="mode" value="strict"/> + <param name="log_out" value="yes"/> + <conditional name="advanced"> + <param name="adv_options_selector" value="set"/> + <section name="gene_alignment_option"> + <param name="alignment" value="None"/> + </section> + </conditional> + <param name="gff_input_collection"> + <collection type="list"> + <element name="10_small.gff" value="10_small.gff"/> + <element name="11_small.gff" value="11_small.gff"/> + </collection> + </param> + <output_collection name="output" count="13" type="list"> + <element name="combined_DNA_CDS" ftype="fasta"> + <assert_contents> + <has_n_lines n="18206"/> + </assert_contents> + </element> + <element name="combined_protein_CDS" ftype="fasta"> + <assert_contents> + <has_n_lines n="7048"/> + </assert_contents> + </element> + <element name="combined_protein_cdhit_out" ftype="fasta"> + <assert_contents> + <has_n_lines n="5119"/> + </assert_contents> + </element> + <element name="pan_genome_reference" ftype="fasta"> + <assert_contents> + <has_n_lines n="13120"/> + </assert_contents> + </element> + <element name="summary_statistics" ftype="txt"> + <assert_contents> + <has_line line="Core genes	(99% <= strains <= 100%)	251"/> + <has_line line="Shell genes	(15% <= strains < 95%)	475"/> + <has_line line="Total genes	(0% <= strains <= 100%)	726"/> + </assert_contents> + </element> + <element name="gene_data" ftype="csv"> + <assert_contents> + <has_n_lines n="980"/> + <has_n_columns sep="," n="8"/> + <has_text text="KPLBOJCC_00003"/> + <has_text text="NCFNLLIC_00003"/> + </assert_contents> + </element> + <element name="gene_presence_absence" ftype="csv"> + <assert_contents> + <has_text text="recB"/> + <has_text text="recC"/> + <has_text text="rpoB"/> + </assert_contents> + </element> + <element name="gene_presence_absence_roary" ftype="csv"> + <assert_contents> + <has_text text="ctpI_2"/> + <has_text text="amiD_1"/> + </assert_contents> + </element> + <element name="struct_presence_absence" ftype="tabular"> + <assert_contents> + <has_line_matching expression="Gene\s+10_small\s+11_small"/> + </assert_contents> + </element> </output_collection> - <output name="log"> - <assert_contents> - <has_text text="pre-processing gff3 files..."/> - </assert_contents> + <output name="log"> + <assert_contents> + <has_text text="total seq: 979"/> + </assert_contents> </output> - </test> - <test expect_num_outputs="3"> - <param name="gen_code" value="11"/> - <param name="mode" value="strict"/> - <param name="adv_options_selector" value="set"/> - <param name="a" value="core"/> - <param name="gff_input_collection"> - <collection type="list"> - <element name="10_small.gff" value="10_small.gff"/> - <element name="11_small.gff" value="11_small.gff"/> - </collection> - </param> - <output_collection name="output_pangenome" count="18" type="list"> - <element name="combined_DNA_CDS" file="core/combined_DNA_CDS.fasta" ftype="fa"> - <assert_contents> - <has_n_lines n="18206"/> - </assert_contents> - </element> - <element name="combined_protein_CDS" file="core/combined_protein_CDS.fasta" ftype="fa"> - <assert_contents> - <has_n_lines n="7048"/> - </assert_contents> - </element> - <element name="combined_protein_cdhit_out" file="core/combined_protein_cdhit_out.fa" ftype="fa"> - <assert_contents> - <has_n_lines n="5119"/> - </assert_contents> - </element> - <element name="summary_statistics" file="core/summary_statistics.txt" ftype="txt"> - <assert_contents> - <has_line line="Core genes	(99% <= strains <= 100%)	251"/> - <has_line line="Shell genes	(15% <= strains < 95%)	475"/> - <has_line line="Total genes	(0% <= strains <= 100%)	726"/> - </assert_contents> - </element> - <element name="struct_presence_absence" file="core/struct_presence_absence.Rtab" ftype="Rtab"> - <assert_contents> - <has_line_matching expression="Gene\s+10_small\s+11_small"/> - </assert_contents> - </element> - <element name="alignment_entropy" file="core/alignment_entropy.csv" ftype="csv"> - <assert_contents> - <has_text text="stf0.aln,0.0"/> - <has_text text="bglB.aln,0.0"/> - </assert_contents> - </element> - <element name="gene_data" ftype="csv"> - <assert_contents> - <has_n_lines n="980"/> - <has_n_columns sep="," n="8"/> - <has_text text="KPLBOJCC_00003"/> - <has_text text="NCFNLLIC_00003"/> - </assert_contents> - </element> - <element name="pan_genome_reference" file="core/pan_genome_reference.fa" ftype="fa"> - <assert_contents> - <has_n_lines n="13120"/> - </assert_contents> - </element> - <element name="gene_presence_absence" file="core/gene_presence_absence.csv" ftype="csv"> - <assert_contents> - <has_text text="recB"/> - <has_text text="recC"/> - <has_text text="rpoB"/> - </assert_contents> - </element> - <element name="gene_presence_absence_roary" file="core/gene_presence_absence_roary.csv" ftype="csv"> - <assert_contents> - <has_text text="ctpI_2"/> - <has_text text="amiD_1"/> - </assert_contents> - </element> - <element name="core_gene_alignment_filtered" ftype="aln"> - <assert_contents> - <has_size value="568632" delta="1000"/> - </assert_contents> - </element> - <element name="core_gene_alignment" ftype="aln"> - <assert_contents> - <has_size value="569962" delta="1000"/> - </assert_contents> - </element> - <element name="core_alignment_header" file="core/core_alignment_header.embl" ftype="embl"> - <assert_contents> - <has_text text="ID Genome standard; DNA; PRO; 1234 BP."/> - <has_text text="hisB_1.aln"/> - </assert_contents> - </element> - <element name="core_alignment_filtered_header" file="core/core_alignment_filtered_header.embl" ftype="embl"> - <assert_contents> - <has_text text="ID Genome standard; DNA; PRO; 1234 BP."/> - <has_text text=" FT feature 79606..80691 "/> - </assert_contents> - </element> - </output_collection> - <output_collection name="output_pangenome_fasta" count="251"/> - <output name="log"> - <assert_contents> - <has_text text="pre-processing gff3 files..."/> - </assert_contents> - </output> - </test> + </test> + <!-- Test 3 : Testing Panaroo with Advanced Filtering options along with MAFFT core alignment --> + <test expect_num_outputs="3"> + <param name="gen_code" value="11"/> + <param name="mode" value="strict"/> + <param name="log_out" value="yes"/> + <conditional name="advanced"> + <param name="adv_options_selector" value="set"/> + <section name="gene_alignment_option"> + <param name="alignment" value="core"/> + <param name="aligner" value="mafft"/> + </section> + </conditional> + <param name="gff_input_collection"> + <collection type="list"> + <element name="10_small.gff" value="10_small.gff"/> + <element name="11_small.gff" value="11_small.gff"/> + </collection> + </param> + <output_collection name="output_pangenome" count="18" type="list"> + <element name="summary_statistics" ftype="txt"> + <assert_contents> + <has_line line="Core genes	(99% <= strains <= 100%)	251"/> + <has_line line="Shell genes	(15% <= strains < 95%)	475"/> + <has_line line="Total genes	(0% <= strains <= 100%)	726"/> + </assert_contents> + </element> + <element name="alignment_entropy" ftype="csv"> + <assert_contents> + <has_text text="stf0.aln,0.0"/> + <has_text text="bglB.aln,0.0"/> + </assert_contents> + </element> + <element name="combined_DNA_CDS" ftype="fasta"> + <assert_contents> + <has_n_lines n="18206"/> + </assert_contents> + </element> + <element name="combined_protein_cdhit_out" ftype="fasta"> + <assert_contents> + <has_n_lines n="5119"/> + </assert_contents> + </element> + <element name="combined_protein_CDS" ftype="fasta"> + <assert_contents> + <has_n_lines n="7048"/> + </assert_contents> + </element> + <element name="struct_presence_absence" ftype="tabular"> + <assert_contents> + <has_line_matching expression="Gene\s+10_small\s+11_small"/> + </assert_contents> + </element> + <element name="gene_data" ftype="csv"> + <assert_contents> + <has_n_lines n="980"/> + <has_n_columns sep="," n="8"/> + <has_text text="KPLBOJCC_00003"/> + <has_text text="NCFNLLIC_00003"/> + </assert_contents> + </element> + <element name="pan_genome_reference" ftype="fasta"> + <assert_contents> + <has_n_lines n="13120"/> + </assert_contents> + </element> + <element name="gene_presence_absence" ftype="csv"> + <assert_contents> + <has_text text="recB"/> + <has_text text="recC"/> + <has_text text="rpoB"/> + </assert_contents> + </element> + <element name="gene_presence_absence_roary" ftype="csv"> + <assert_contents> + <has_text text="ctpI_2"/> + <has_text text="amiD_1"/> + </assert_contents> + </element> + <element name="core_gene_alignment_filtered" ftype="fasta"> + <assert_contents> + <has_size value="569962" delta="1000"/> + </assert_contents> + </element> + <element name="core_gene_alignment" ftype="fasta"> + <assert_contents> + <has_size value="569962" delta="1000"/> + </assert_contents> + </element> + <element name="core_alignment_header" ftype="embl"> + <assert_contents> + <has_text text="ID Genome standard; DNA; PRO; 1234 BP."/> + <has_text text="hisB_1.aln"/> + </assert_contents> + </element> + <element name="core_alignment_filtered_header" ftype="embl"> + <assert_contents> + <has_n_lines n="760" delta="10"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="output_pangenome_fasta" count="251"/> + <output name="log"> + <assert_contents> + <has_text text="total seq: 979"/> + </assert_contents> + </output> + </test> </tests> <help><