Mercurial > repos > galaxy-australia > panaroo

diff panaroo.xml @ 1:b6a78d286482 draft
planemo upload for repository https://github.com/usegalaxy-au/tools-au/tree/master/tools/panaroo commit 23afccfad9fc0d2a4b91fd391f4847062fd98042
author: iuc
date: Fri, 11 Apr 2025 11:23:50 +0000
parents: 50483f852947
children: b05be6316263
--- a/panaroo.xml	Fri Apr 11 07:46:04 2025 +0000
+++ b/panaroo.xml	Fri Apr 11 11:23:50 2025 +0000
@@ -6,421 +6,436 @@
     <expand macro="edam_ontology"/>
     <expand macro="biotools"/>
     <expand macro="requirements"/>
-    <stdio>
-        <exit_code range="1:" />
-        <regex match="System..*Exception"
-           source="both"
-           level="fatal"
-           description="Error encountered" />
-    </stdio>
-    <command><![CDATA[
-
-         mkdir outdir &&
-	    
-	 #import re
-	 #set input_directory = 'input_directory'
-	 mkdir $input_directory &&
-	 #for $gff in $gff_input_collection:
-	    #set identifier = re.sub('[^\s\w\-\\.]','_',str($gff.element_identifier))
-	    ln -fs '$gff' '$input_directory/$identifier' &&
-	 #end for
+    <command detect_errors="exit_code"><![CDATA[
+      mkdir outdir &&
+      #import re
+      #set input_directory = 'input_directory'
+      mkdir $input_directory &&
+      #for $gff in $gff_input_collection:
+            #set identifier = re.sub('[^\s\w\-\\.]','_',str($gff.element_identifier))
+            ln -fs '$gff' '$input_directory/$identifier' &&
+      #end for
 
-	 panaroo 
-	 -t \${GALAXY_SLOTS:-2}
-         #if str($gen_code) != 'None':
-           --codon-table $gen_code
-	 #end if
-         #if str($advanced.adv_options_selector) == "set":
-	     #if $advanced.remove_invalid_gene
-		 $advanced.remove_invalid_gene
-	     #end if
-	     -c '$advanced.matching_option.seq_threshold'
-	     -f '$advanced.matching_option.peptide_threshold'
-	     --len_dif_percent '$advanced.matching_option.length_diff_cutoff'
-	     $advanced.matching_option.merge_paralogs
-	     --search_radius '$advanced.refind_option.search_radius'
-	     --refind_prop_match '$advanced.refind_option.refind_prop_match'
-	     --refind-mode '$advanced.refind_option.refind_mode'
-	     --min_trailing_support '$advanced.graph_correction_option.min_trailing_support'
-	     --trailing_recursive '$advanced.graph_correction_option.trailing_recursive'
-	     --edge_support_threshold '$advanced.graph_correction_option.edge_support_threshold'
-	     --remove_by_consensus '$advanced.graph_correction_option.remove_by_consensus'
-	     --high_var_flag '$advanced.graph_correction_option.high_var_flag'
-	     --min_edge_support_sv '$advanced.graph_correction_option.min_edge_support_sv'
-	     $advanced.graph_correction_option.all_seq_in_graph
-	     $advanced.graph_correction_option.no_clean_edges
-	
-	     #if $advanced.gene_alignment_option.a != 'None'
-		-a '$advanced.gene_alignment_option.a'
-	     #end if
-
-	     #if '$advanced.gene_alignment_option.aligner' == 'mafft'
-                --aligner mafft
-	     #else
-               --aligner '$advanced.gene_alignment_option.aligner'
-	     #end if
-	     #if $advanced.gene_alignment_option.core_subset != ''
-		--core_subset $advanced.gene_alignment_option.core_subset
-	     #end if
-         #end if
-	 -i $input_directory/*.gff 
-	 -o outdir 
-	 --clean-mode $mode 
-	 > '$log' &&
-	 mv outdir/gene_presence_absence.Rtab outdir/gene_presence_absence_rtab.Rtab &&
-	 mv outdir/combined_protein_cdhit_out.txt outdir/combined_protein_cdhit_out.fa && 
-	 2>&1  
-
+      panaroo
+      --clean-mode '$mode'  
+      #if str($gen_code) != 'None':
+            --codon-table '$gen_code'
+      #end if
+      #if str($advanced.adv_options_selector) == "set":
+            #if $advanced.remove_invalid_genes
+                  $advanced.remove_invalid_genes
+            #end if
+            --threshold '$advanced.matching_option.threshold'
+            --family_threshold '$advanced.matching_option.family_threshold'
+            --len_dif_percent '$advanced.matching_option.len_dif_percent'
+            $advanced.matching_option.merge_paralogs
+            --search_radius '$advanced.refind_option.search_radius'
+            --refind_prop_match '$advanced.refind_option.refind_prop_match'
+            --refind-mode '$advanced.refind_option.refind_mode'
+            --min_trailing_support '$advanced.graph_correction_option.min_trailing_support'
+            --trailing_recursive '$advanced.graph_correction_option.trailing_recursive'
+            --edge_support_threshold '$advanced.graph_correction_option.edge_support_threshold'
+            --remove_by_consensus '$advanced.graph_correction_option.remove_by_consensus'
+            --high_var_flag '$advanced.graph_correction_option.high_var_flag'
+            --min_edge_support_sv '$advanced.graph_correction_option.min_edge_support_sv'
+            $advanced.graph_correction_option.all_seq_in_graph
+            $advanced.graph_correction_option.no_clean_edges
+            #if $advanced.gene_alignment_option.alignment != 'None'
+                  --alignment '$advanced.gene_alignment_option.alignment'
+                  --aligner '$advanced.gene_alignment_option.aligner'
+            #end if
+            #if $advanced.gene_alignment_option.core_subset
+                  --core_subset $advanced.gene_alignment_option.core_subset
+            #end if
+            #if $advanced.gene_alignment_option.core_entropy_filter
+                  --core_entropy_filter $advanced.gene_alignment_option.core_entropy_filter
+            #end if
+      #end if
+      -i $input_directory/*.gff 
+      -o outdir
+      -t \${GALAXY_SLOTS:-8} 
+      #if $log_out
+            2>&1 | tee '$log'
+      #end if
+      && mv outdir/gene_presence_absence.Rtab outdir/gene_presence_absence_rtab.Rtab &&
+      mv outdir/combined_protein_cdhit_out.txt outdir/combined_protein_cdhit_out.fa
     ]]></command>
     <inputs>
-	<param name="gff_input_collection" type="data_collection" format="gff" collection_type="list" label="GFF Input Collection" help="A list of gff files (i.e prokka)"/>
-	<param name="mode" type="select" label="The stringency mode at which to run panaroo" help="--clean-mode">
-            <expand macro="clean_mode"/>
+	<param name="gff_input_collection" type="data_collection" format="gff" collection_type="list" label="GFF Input Collection" help="A collection of input GFF files"/>
+	<param name="mode" type="select" label="The stringency mode for Panaroo to run" help="Each of these modes can be fine tuned using the additional parameters in the 'Graph correction' section.">
+            <option value="strict">Strict</option>
+            <option value="moderate">Moderate</option>
+            <option value="sensitive">Sensitive</option>
     	</param>
-	<param name="gen_code" type="select" label="the codon table user for translation" help="default: 11">
+	<param name="gen_code" type="select" label="The Codon table used for translation" help="Default: 11.Bacteria and Archaea">
             <expand macro="genetic_code"/>
     	</param>
-        <conditional name="advanced">
-            <param name="adv_options_selector" type="select" label="Set advanced options?" help="Provides additional controls">
+      <param name="log_out" type="boolean" label="Output log file?" truevalue="yes" falsevalue="no"/>
+      <conditional name="advanced">
+            <param name="adv_options_selector" type="select" label="Set Advanced Options?" help="Fine Tuning of Panaroo algorithmic parameters">
                 <option value="set">Set</option>
                 <option value="do_not_set" selected="True">Do not set</option>
             </param>
-	    <when value="set">
-		 <param argument="--remove-invalid-genes" name="remove_invalid_gene" type="boolean" truevalue="--remove-invalid-genes" falsevalue=""  label="removes annotations that do not conform to the expected Prokka format such as those including premature stop codons" help="--remove-invalid-genes"/>
-
-                 <section name="matching_option" title="Matching" expanded="false">
-                   <param argument="--threshold" name="seq_threshold" type="float" value="0.98" label="sequence identity threshold" help="default: 0.98"/>
-                   <param argument="--family_threshold" name="peptide_threshold" type="float" value="0.7" label="protein family sequence identity threshold" help="default: 0.7"/>
-                   <param argument="--len_dif_percent" name="length_diff_cutoff" type="float" value="0.98" label="length difference cutoff" help="default: 0.98"/>
-                   <param name="merge_paralogs" type="boolean" truevalue="--merge_paralogs" falsevalue="" checked="false" label="do not split paralogs" help="--merge_paralogs"/>
-	   	 </section>
-
-                 <section name="refind_option" title="Refind" expanded="false">
-                   <param argument="--search_radius" type="integer" value="5000" label="Search radius" help="--search_radius (default: 5000)"/>
-                   <param argument="--refind_prop_match" type="float" value="0.75" label="Gene proportion match" help="default: 0.75"/>
-		   <param argument="--refind_mode" type="select" label="The stringency mode at which to re-find genes" help="default: default">
-			  <expand macro="refind_mode_option"/>
-	   	   </param>
-                 </section>
+	      <when value="set">
+		      <param argument="--remove-invalid-genes" type="boolean" truevalue="--remove-invalid-genes" falsevalue="" label="Remove Invalid Genes" help="Removes annotations that do not conform to the expected Prokka format."/>
+                  
+                  <!--Options for Matching-->
+                  <section name="matching_option" title="Matching" expanded="false">
+                        <param argument="--threshold" type="float" value="0.98" label="Sequence identity threshold" help="default: 0.98"/>
+                        <param argument="--family_threshold" type="float" value="0.7" label="Protein family sequence identity threshold" help="default: 0.7"/>
+                        <param argument="--len_dif_percent" type="float" value="0.98" label="Length difference cutoff" help="default: 0.98"/>
+                        <param argument="--merge-paralogs" type="boolean" truevalue="--merge_paralogs" falsevalue="" label="Merge Paralogs"/>
+                  </section>
+            
+                  <!--Options for Refind-->
+                  <section name="refind_option" title="Refind" expanded="false">
+                        <param argument="--search_radius" type="integer" value="5000" label="Refinding Search radius" help="The distance in nucleotides surronding the neighbour of an accessory gene in which to search for it"/>
+                        <param argument="--refind_prop_match" type="float" value="0.2" label="Refinding Proportion Match" help="he proportion of an accessory gene that must be found in order to consider it a match"/>
+                        <param argument="--refind_mode" type="select" label="Refind Mode" help="Set the stringency mode at which to re-find genes">
+                              <option value="default" selected="True">Default</option>
+                              <option value="strict">Strict</option>
+                              <option value="off">Off</option>
+                        </param>
+                  </section>
 
-                 <section name="graph_correction_option" title="Graph Correction" expanded="false">
-                   <param argument="--min_trailing_support" type="integer" value="2" label="Minimum cluster size to keep a gene called at the end of a contig" help="--min_traiiing_support [relexed mode : 2 is used]"/>
-                   <param argument="--trailing_recursive" type="integer" value="1" label="Number of times to perform recursive trimming of low support nodes near the end of contigs" help="--trailing_recursive [relaxed mode: 1 is used]"/>
-                   <param argument="--edge_support_threshold" type="integer" value="1" label="Edge support threshold" help="--edge_support_threshold [ Minimal edge 1 is used ]"/>
-		   <param argument="--len_outlier_proportion" type="float" value="0.01" label="Length outlier support proportion" help="--length_outlier_support_proportion [default: 0.01]"/>
-		   <param argument="--remove_by_consensus" type="boolean" truevalue="True" falsevalue="False" checked="False" label="Remove consensus" help="--remove_by_consensus [default: False]"/>
-		   <param argument="--high_var_flag" type="integer" value="5" label="Highly variable gene region" help="--high_var_flag [default: 5]"/>
-		   <param argument="--min_edge_support_sv" type="integer" value="2" label="Minimum edge support structural variants" help="--min_edge_support_sv [relaxed mode: 2 is used]"/>
-		   <param argument="--all_seq_in_graph" type="boolean" truevalue="--all_seq_in_graph" falsevalue="" label="Retains all DNA sequence" help="--all_seq_in_graph [default: off]"/>
-		   <param argument="--no_clean_edges" type="boolean" truevalue="--no_clean_edges" falsevalue="" label="Edge filtering in the final output graph" help="--no_clean_edges [default: off]"/>
-	   	</section>
-	
-		<section name="gene_alignment_option" title="Gene Alignment" expanded="false">
-		    <param argument="-a" type="select" label="Output alignments of core genes or all genes." help="-a [optional: core or pan; default: None">
-                        <expand macro="gene_alignment"/>
-                   </param>
-		   <param argument="--aligner" type="select" label="Specify an aligner" help="--aligner [mafft|prank|clustal][default: mafft]">
-			<expand macro="gene_aligner"/>
-		   </param>
-		   <param name="codons" type="boolean" truevalue="true" falsevalue="false"  checked="false" label="Generate codon alignments" help="--codons"/>
-		   <param name="core_threshold" type="float" value="0.95" label="Core-genome sample threshold" help="--core_threshold [default: 0.95]"/>
-		   <param argument="--core_subset" type="integer" value=""  optional="true" label="Subset of the core genome to these many genes" help="--core_subset [default: all]"/>
-		   <param name="core_entropy" type="float" value="0.1" label="Set the Block Mapping and Gathering with Entropy" help="--core_entropy_filter (threshold can be between 0.0 and 1.0) [default: Tukey outlier method]"/>
-		</section>
-	    </when>
+                  <!--Graph Correction-->
+                  <section name="graph_correction_option" title="Graph Correction" expanded="false">
+                        <param argument="--min_trailing_support" type="integer" value="2" label="Minimum trailing support" help="Minimum cluster size to keep a gene called at the end of a contig"/>
+                        <param argument="--trailing_recursive" type="integer" value="1" label="Trailing Recursive"  help="Number of times to perform recursive trimming of low support nodes near the end of contigs"/>
+                        <param argument="--edge_support_threshold" type="float" value="1" label="Edge support threshold" help="Minimum support required to keep an edge that has been flagged as a possible mis-assembly."/>
+                        <param argument="--len_outlier_proportion" type="float" value="0.01" label="Length outlier support proportion" help="--length_outlier_support_proportion"/>
+                        <param argument="--remove_by_consensus" type="boolean" truevalue="True" falsevalue="False" label="Remove consensus" help="If a gene is called in the same region with similar sequence a minority of the time, remove it."/>
+                        <param argument="--high_var_flag" type="integer" value="5" label="Highly variable gene region" help="Minimum number of nested cycles to call a highly variable gene region."/>
+                        <param argument="--min_edge_support_sv" type="integer" value="2" label="Minimum edge support structural variants" help="Minimum edge support required to call structural variants in the presence/absence sv file"/>
+                        <param argument="--all_seq_in_graph" type="boolean" truevalue="--all_seq_in_graph" falsevalue="" label="Retains all DNA sequence" help="Retains all DNA sequence for each gene cluster in the graph output."/>
+                        <param argument="--no_clean_edges" type="boolean" truevalue="--no_clean_edges" falsevalue="" label="No Clean Edges" help="Turn off edge filtering in the final output graph."/>
+                  </section>
+            
+                  <!--Gene Alignment-->
+		      <section name="gene_alignment_option" title="Gene Alignment" expanded="false">
+		            <param argument="--alignment" type="select" label="Output alignments of core genes or all genes.">
+                              <option value="None" selected="True">None</option>
+                              <option value="core">Core genome alignment</option>
+                              <option value="pan">Pan-genome alignment</option>
+                        </param>
+		            <param argument="--aligner" type="select" label="Specify an aligner" help="--aligner [mafft|prank|clustal][default: mafft]">
+                              <option value="mafft" selected="True">MAFFT</option>
+                              <option value="prank">PRANK</option>
+                              <option value="clustal">Clustal</option>
+		            </param>
+                        <param argument="--codons" type="boolean" label="Generate codon alignments by aligning sequences at the protein level" truevalue="--codons" falsevalue="" help="Generate codon alignments by aligning sequences at the protein level"/>
+                        <param argument="--core_threshold" type="float" value="0.95" label="Core Threshold" help="Core-genome sample threshold"/>
+		            <param argument="--core_subset" type="integer" optional="true" label="Subset of the core genome to these many genes" help="Randomly subset the core genome to these many genes. Default is all genes."/>
+                        <param argument="--core_entropy_filter" type="float" value="0.1" label="Core Entropy Filter" help="Manually set the Block Mapping and Gathering with Entropy (BMGE) filter. By default this is set using the Tukey outlier method."/>
+		      </section>
+	      </when>
             <when value="do_not_set"/>
-        </conditional>
+      </conditional>
     </inputs>
     <outputs>
-        <collection name="output" type="list" label="${tool.name} on ${on_string}: Pangenome output">
-             <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;clstr)" directory="outdir" format="txt" visible="false" />
-             <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;txt)" directory="outdir" format="txt" visible="false" />
-             <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;gml)" directory="outdir" format="txt" visible="false" />
-             <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;Rtab)" directory="outdir" format="tabular" visible="false" />
-             <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;csv)" directory="outdir" format="csv" visible="false" />
-             <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;fasta)" directory="outdir" format="fasta" visible="false" />
-             <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;fa)" directory="outdir" format="fasta" visible="false" />
-             <filter>advanced['adv_options_selector'] != 'set'</filter>
-       </collection>
-	<collection name="output_advance" type="list" label="${tool.name} on ${on_string}: Pangenome output (advance)">
-	     <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;clstr)" directory="outdir" format="txt" visible="false" />
-	     <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;txt)" directory="outdir" format="txt" visible="false" />
-	     <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;gml)" directory="outdir" format="txt" visible="false" />
-	     <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;Rtab)" directory="outdir" format="tabular" visible="false" />
-	     <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;csv)" directory="outdir" format="csv" visible="false" />
-	     <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;fasta)" directory="outdir" format="fasta" visible="false" />
-	     <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;fa)" directory="outdir" format="fasta" visible="false" />
-	     <filter>advanced['adv_options_selector'] == 'set' and advanced['gene_alignment_option']['a'] == 'None'</filter>
-       </collection>
-        <collection name="output_pangenome" type="list" label="${tool.name} on ${on_string}: Pangenome alignment output">
-             <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;clstr)" directory="outdir" format="txt" visible="false" />
-             <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;txt)" directory="outdir" format="txt" visible="false" />
-             <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;gml)" directory="outdir" format="txt" visible="false" />
-             <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;Rtab)" directory="outdir" format="tabular" visible="false" />
-             <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;csv)" directory="outdir" format="csv" visible="false" />
-             <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;fasta)" directory="outdir" format="fasta" visible="false" />
-	     <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;fa)" directory="outdir" format="fasta" visible="false" />
-	     <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;aln)" directory="outdir" format="aln" visible="false" />
-	     <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;embl)" directory="outdir" format="embl" visible="false" />
-             <filter>advanced['adv_options_selector'] == 'set' and advanced['gene_alignment_option']['a'] != 'None' </filter>
-       </collection>
-       <collection name="output_pangenome_fasta" type="list" label="${tool.name} on ${on_string}: Pangenom alignment fasta">
-	     <discover_datasets pattern="(?P&lt;designation&gt;.+)\.(?P&lt;ext&gt;fas)" directory="outdir/aligned_gene_sequences" format="fasta" visible="false" />  
-             <filter>advanced['adv_options_selector'] == 'set' and advanced['gene_alignment_option']['a'] != 'None'</filter>
-       </collection>
-       <data name="log" format="txt" label="${tool.name} on ${on_string}: log"/>
+      <!--Panaroo default outputs -->
+      <collection name="output" type="list" label="${tool.name} on ${on_string}: Pangenome default output">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fasta$" directory="outdir" format="fasta" visible="false"/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fa$" directory="outdir" format="fasta" visible="false"/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.txt$" directory="outdir" format="txt" visible="false"/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.clstr$" directory="outdir" format="txt" visible="false"/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.gml$" directory="outdir" format="txt" visible="false"/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.csv$" directory="outdir" format="csv" visible="false"/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.Rtab$" directory="outdir" format="tabular" visible="false"/>
+            <filter>( advanced['adv_options_selector'] != 'set' ) or ( advanced['adv_options_selector'] == 'set' and advanced['gene_alignment_option']['alignment'] == 'None' )</filter>
+      </collection>
+
+      <!--Panaroo advance alignment outputs -->
+      <collection name="output_pangenome" type="list" label="${tool.name} on ${on_string}: Pangenome alignment output">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.clstr$" directory="outdir" format="txt" visible="false"/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.txt$" directory="outdir" format="txt" visible="false"/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.gml$" directory="outdir" format="txt" visible="false"/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.Rtab$" directory="outdir" format="tabular" visible="false"/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.csv$" directory="outdir" format="csv" visible="false"/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fasta$" directory="outdir" format="fasta" visible="false"/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fa$" directory="outdir" format="fasta" visible="false"/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.aln$" directory="outdir" format="fasta" visible="false"/>
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.embl$" directory="outdir" format="embl" visible="false"/>
+            <filter>advanced['adv_options_selector'] == 'set' and advanced['gene_alignment_option']['alignment'] != 'None' </filter>
+      </collection>
+
+      <!--Pan Genome Aligned FASTA -->
+      <collection name="output_pangenome_fasta" type="list" label="${tool.name} on ${on_string}: Pangenome Alignment Gene Sequences">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.fas$" directory="outdir/aligned_gene_sequences" format="fasta" visible="false"/>  
+            <filter>advanced['adv_options_selector'] == 'set' and advanced['gene_alignment_option']['alignment'] != 'None'</filter>
+      </collection>
+
+      <!--Panaroo log output -->
+      <data name="log" format="txt" label="${tool.name} on ${on_string}: Panaroo Log">
+            <filter>log_out</filter>
+      </data>
     </outputs>
     <tests>
-	  <!-- run panaroo with default parameters (i.e panaroo -t 2 -i *.gff -o default \-\-clean-mode strict \-\-remove-invalid-genes) -->
-         <test expect_num_outputs="2">
+      <!-- Test 1 : Testing Panaroo with default parameters -->
+      <test expect_num_outputs="2">
+            <param name="mode" value="strict"/>
             <param name="gen_code" value="11"/>
-            <param name="mode" value="strict"/>
-            <param name="adv_options_selector" value="do_not_set"/>
+            <param name="log_out" value="yes"/>
+            <conditional name="advanced">
+                  <param name="adv_options_selector" value="do_not_set"/>
+            </conditional>
             <param name="gff_input_collection">
                 <collection type="list">
                     <element name="10_small.gff" value="10_small.gff"/>
                     <element name="11_small.gff" value="11_small.gff"/>
                 </collection>
             </param>
-	    <output_collection name="output" count="13" type="list">
-                    <element name="combined_DNA_CDS" file="combined_DNA_CDS.fasta" ftype="fa">
-                          <assert_contents>
-                                <has_n_lines n="18206"/>
-                          </assert_contents>
-                    </element>
-                    <element name="combined_protein_CDS" file="combined_protein_CDS.fasta" ftype="fa">
-                          <assert_contents>
-                                <has_n_lines n="7048"/>
-                          </assert_contents>
-		    </element>
-                    <element name="combined_protein_cdhit_out" file="combined_protein_cdhit_out.fa" ftype="fa">
-                          <assert_contents>
-                                <has_n_lines n="5119"/>
-                          </assert_contents>
-		    </element>
-                    <element name="gene_data" file="gene_data.csv" ftype="csv">
-                          <assert_contents>
-                                <has_text text="KPLBOJCC_00001"/>
-                                <has_text text="NCFNLLIC_00549" />
-                          </assert_contents>
-		   </element>
-                   <element name="gene_presence_absence" file="gene_presence_absence.csv" ftype="csv">
-                          <assert_contents>
-                                <has_text text="dcd"/>
-                                <has_text text="trmB"/>
-                                <has_text text="betI_1"/>
-                          </assert_contents>
-                    </element>
-                    <element name="gene_presence_absence_roary" file="gene_presence_absence_roary.csv" ftype="csv">
-                          <assert_contents>
-                                <has_text text="kstR2_1"/>
-                                <has_text text="ybgJ"/>
-                          </assert_contents>
-                    </element>
-                    <element name="pan_genome_reference" file="pan_genome_reference.fa" ftype="fa">
-                          <assert_contents>
-                                <has_n_lines n="5055"/>
-                          </assert_contents>
-		    </element>
-                    <element name="struct_presence_absence" file="struct_presence_absence.Rtab" ftype="Rtab">
-                          <assert_contents>
-                                <has_line_matching expression="Gene\s+10_small\s+11_small"/>
-                          </assert_contents>
-                    </element>
-                    <element name="summary_statistics" file="summary_statistics.txt" ftype="txt">
-			  <assert_contents>
-				<has_line line="Core genes&#009;(99% &#60;= strains &#60;= 100%)&#009;251"/>
-                                <has_line line="Total genes&#009;(0% &#60;= strains &#60;= 100%)&#009;251"/>
-                          </assert_contents>
-                    </element>
-	    </output_collection>
+	      <output_collection name="output" count="13" type="list">
+                  <element name="combined_DNA_CDS" ftype="fasta">
+                        <assert_contents>
+                              <has_n_lines n="18206"/>
+                        </assert_contents>
+                  </element>
+                  <element name="combined_protein_CDS" ftype="fasta">
+                        <assert_contents>
+                              <has_n_lines n="7048"/>
+                        </assert_contents>
+		      </element>
+                  <element name="combined_protein_cdhit_out" ftype="fasta">
+                        <assert_contents>
+                              <has_n_lines n="5119"/>
+                        </assert_contents>
+		      </element>
+                  <element name="pan_genome_reference" ftype="fasta">
+                        <assert_contents>
+                              <has_n_lines n="5055"/>
+                        </assert_contents>
+		      </element>
+                  <element name="summary_statistics" ftype="txt">
+			      <assert_contents>
+				      <has_line line="Core genes&#009;(99% &#60;= strains &#60;= 100%)&#009;251"/>
+                              <has_line line="Total genes&#009;(0% &#60;= strains &#60;= 100%)&#009;251"/>
+                        </assert_contents>
+                  </element>
+                  <element name="gene_data" ftype="csv">
+                        <assert_contents>
+                              <has_text text="KPLBOJCC_00001"/>
+                              <has_text text="NCFNLLIC_00549"/>
+                        </assert_contents>
+		      </element>
+                  <element name="gene_presence_absence" ftype="csv">
+                        <assert_contents>
+                              <has_text text="dcd"/>
+                              <has_text text="trmB"/>
+                              <has_text text="betI_1"/>
+                        </assert_contents>
+                  </element>
+                  <element name="gene_presence_absence_roary" ftype="csv">
+                        <assert_contents>
+                              <has_text text="kstR2_1"/>
+                              <has_text text="ybgJ"/>
+                        </assert_contents>
+                  </element>
+                  <element name="struct_presence_absence" ftype="tabular">
+                        <assert_contents>
+                              <has_line_matching expression="Gene\s+10_small\s+11_small"/>
+                        </assert_contents>
+                  </element>
+	      </output_collection>
             <output name="log">
                  <assert_contents>
-                      <has_text text="pre-processing gff3 files..."/>
+                      <has_text text="total seq: 979"/>
                  </assert_contents>
             </output>
          </test>
-	 <test expect_num_outputs="2">
-	    <param name="gen_code" value="11"/>
-	    <param name="mode" value="strict"/>
-	    <param name="adv_options_selector" value="set"/>
-	    <param name="a" value="None"/>
-	    <param name="gff_input_collection">
-		<collection type="list">
-		    <element name="10_small.gff" value="10_small.gff"/>
-		    <element name="11_small.gff" value="11_small.gff"/>
-		</collection>
-	    </param>
-            <output_collection name="output_advance" count="13" type="list">
-		    <element name="combined_DNA_CDS" file="advance/combined_DNA_CDS.fasta" ftype="fa">
-                          <assert_contents>
-                                <has_n_lines n="18206"/>
-                          </assert_contents>
-                    </element>
-		    <element name="combined_protein_CDS" file="advance/combined_protein_CDS.fasta" ftype="fa">
-                          <assert_contents>
-                                <has_n_lines n="7048"/>
-                          </assert_contents>
-                    </element>
-		    <element name="combined_protein_cdhit_out" file="advance/combined_protein_cdhit_out.fa" ftype="fa">
-                          <assert_contents>
-                                <has_n_lines n="5119"/>
-                          </assert_contents>
-                    </element>
-                    <element name="gene_data" ftype="csv">
-			  <assert_contents>
-				  <has_n_lines n="980"/>	    
-				  <has_n_columns sep="," n="8"/>
-				  <has_text text="KPLBOJCC_00003"/>
-				  <has_text text="NCFNLLIC_00003"/>
-                          </assert_contents>
-                   </element>
-		   <element name="gene_presence_absence" file="advance/gene_presence_absence.csv" ftype="csv">
-                          <assert_contents>
-                                <has_text text="recB"/>
-                                <has_text text="recC"/>
-                                <has_text text="rpoB"/>
-                          </assert_contents>
-                    </element>
-		    <element name="gene_presence_absence_roary" file="advance/gene_presence_absence_roary.csv" ftype="csv">
-                          <assert_contents>
-                                <has_text text="ctpI_2"/>
-                                <has_text text="amiD_1"/>
-                          </assert_contents>
-                    </element>
-		    <element name="pan_genome_reference" file="advance/pan_genome_reference.fa" ftype="fa">
-                          <assert_contents>
-                                <has_n_lines n="13120"/>
-                          </assert_contents>
-                    </element>
-		    <element name="struct_presence_absence" file="advance/struct_presence_absence.Rtab" ftype="Rtab">
-                          <assert_contents>
-                                <has_line_matching expression="Gene\s+10_small\s+11_small"/>
-                          </assert_contents>
-                    </element>
-		    <element name="summary_statistics" file="advance/summary_statistics.txt" ftype="txt">
-                          <assert_contents>
-				<has_line line="Core genes&#009;(99% &#60;= strains &#60;= 100%)&#009;251"/>
-				<has_line line="Shell genes&#009;(15% &#60;= strains &#60; 95%)&#009;475"/>
-                                <has_line line="Total genes&#009;(0% &#60;= strains &#60;= 100%)&#009;726"/>
-			  </assert_contents>
-                    </element>
+
+         <!-- Test 2 : Testing Panaroo with Advanced filtering option along with Alignment turned off -->
+         <test expect_num_outputs="2">
+            <param name="gen_code" value="11"/>
+            <param name="mode" value="strict"/>
+            <param name="log_out" value="yes"/>
+            <conditional name="advanced">
+                  <param name="adv_options_selector" value="set"/>
+                  <section name="gene_alignment_option">
+                        <param name="alignment" value="None"/>
+                  </section>
+            </conditional>
+            <param name="gff_input_collection">
+              <collection type="list">
+                  <element name="10_small.gff" value="10_small.gff"/>
+                  <element name="11_small.gff" value="11_small.gff"/>
+              </collection>
+            </param>
+            <output_collection name="output" count="13" type="list">
+            <element name="combined_DNA_CDS" ftype="fasta">
+                  <assert_contents>
+                        <has_n_lines n="18206"/>
+                  </assert_contents>
+            </element>
+            <element name="combined_protein_CDS" ftype="fasta">
+                  <assert_contents>
+                        <has_n_lines n="7048"/>
+                  </assert_contents>
+            </element>
+            <element name="combined_protein_cdhit_out" ftype="fasta">
+                  <assert_contents>
+                        <has_n_lines n="5119"/>
+                  </assert_contents>
+            </element>
+            <element name="pan_genome_reference" ftype="fasta">
+                  <assert_contents>
+                        <has_n_lines n="13120"/>
+                  </assert_contents>
+            </element>
+            <element name="summary_statistics" ftype="txt">
+                  <assert_contents>
+                        <has_line line="Core genes&#009;(99% &#60;= strains &#60;= 100%)&#009;251"/>
+                        <has_line line="Shell genes&#009;(15% &#60;= strains &#60; 95%)&#009;475"/>
+                        <has_line line="Total genes&#009;(0% &#60;= strains &#60;= 100%)&#009;726"/>
+                  </assert_contents>
+            </element>
+            <element name="gene_data" ftype="csv">
+                  <assert_contents>
+                        <has_n_lines n="980"/>	    
+                        <has_n_columns sep="," n="8"/>
+                        <has_text text="KPLBOJCC_00003"/>
+                        <has_text text="NCFNLLIC_00003"/>
+                  </assert_contents>
+            </element>
+            <element name="gene_presence_absence" ftype="csv">
+                  <assert_contents>
+                        <has_text text="recB"/>
+                        <has_text text="recC"/>
+                        <has_text text="rpoB"/>
+                  </assert_contents>
+            </element>
+            <element name="gene_presence_absence_roary" ftype="csv">
+                  <assert_contents>
+                        <has_text text="ctpI_2"/>
+                        <has_text text="amiD_1"/>
+                  </assert_contents>
+            </element>
+            <element name="struct_presence_absence" ftype="tabular">
+                  <assert_contents>
+                        <has_line_matching expression="Gene\s+10_small\s+11_small"/>
+                  </assert_contents>
+            </element>
             </output_collection>
-	    <output name="log">
-		 <assert_contents>
-		      <has_text text="pre-processing gff3 files..."/>
-		 </assert_contents>
+            <output name="log">
+                  <assert_contents>
+                        <has_text text="total seq: 979"/>
+                  </assert_contents>
             </output>
-    	</test>
-	<test expect_num_outputs="3">
-	   <param name="gen_code" value="11"/>
-	   <param name="mode" value="strict"/>
-	   <param name="adv_options_selector" value="set"/>
-	   <param name="a" value="core"/>
-	   <param name="gff_input_collection">
-		<collection type="list">
-		    <element name="10_small.gff" value="10_small.gff"/>
-		    <element name="11_small.gff" value="11_small.gff"/>
-	        </collection>
-           </param>
-	   <output_collection name="output_pangenome" count="18" type="list">
-		   <element name="combined_DNA_CDS" file="core/combined_DNA_CDS.fasta" ftype="fa">
-                          <assert_contents>
-                                <has_n_lines n="18206"/>
-                          </assert_contents>
-                    </element>
-		    <element name="combined_protein_CDS" file="core/combined_protein_CDS.fasta" ftype="fa">
-                          <assert_contents>
-                                <has_n_lines n="7048"/>
-                          </assert_contents>
-                    </element>
-		    <element name="combined_protein_cdhit_out" file="core/combined_protein_cdhit_out.fa" ftype="fa">
-                          <assert_contents>
-                                <has_n_lines n="5119"/>
-                          </assert_contents>
-		    </element>
-                    <element name="summary_statistics" file="core/summary_statistics.txt" ftype="txt">
-                          <assert_contents>
-                                <has_line line="Core genes&#009;(99% &#60;= strains &#60;= 100%)&#009;251"/>
-                                <has_line line="Shell genes&#009;(15% &#60;= strains &#60; 95%)&#009;475"/>
-                                <has_line line="Total genes&#009;(0% &#60;= strains &#60;= 100%)&#009;726"/>
-                          </assert_contents>
-                   </element>
-                   <element name="struct_presence_absence" file="core/struct_presence_absence.Rtab" ftype="Rtab">
-                          <assert_contents>
-                                <has_line_matching expression="Gene\s+10_small\s+11_small"/>
-                          </assert_contents>
-                    </element>
-                    <element name="alignment_entropy"  file="core/alignment_entropy.csv" ftype="csv">
-                          <assert_contents>
-				 <has_text text="stf0.aln,0.0"/>
-				 <has_text text="bglB.aln,0.0"/>
-                          </assert_contents>
-                   </element>
-                    <element name="gene_data" ftype="csv">
-                          <assert_contents>
-                                  <has_n_lines n="980"/>
-                                  <has_n_columns sep="," n="8"/>
-                                  <has_text text="KPLBOJCC_00003"/>
-                                  <has_text text="NCFNLLIC_00003"/>
-                          </assert_contents>
-		   </element>
-                   <element name="pan_genome_reference" file="core/pan_genome_reference.fa" ftype="fa">
-                          <assert_contents>
-                                <has_n_lines n="13120"/>
-			</assert_contents>
-		   </element>
-		   <element name="gene_presence_absence" file="core/gene_presence_absence.csv" ftype="csv">
-                          <assert_contents>
-                                <has_text text="recB"/>
-                                <has_text text="recC"/>
-                                <has_text text="rpoB"/>
-                          </assert_contents>
-                    </element>
-		    <element name="gene_presence_absence_roary" file="core/gene_presence_absence_roary.csv" ftype="csv">
-                          <assert_contents>
-                                <has_text text="ctpI_2"/>
-                                <has_text text="amiD_1"/>
-                          </assert_contents>
-		   </element>
-                   <element name="core_gene_alignment_filtered" ftype="aln">
-                          <assert_contents>
-                                <has_size value="568632" delta="1000"/>
-                          </assert_contents>
-                   </element>
-                   <element name="core_gene_alignment"  ftype="aln">
-                          <assert_contents>
-                                <has_size value="569962" delta="1000"/>
-                          </assert_contents>
-                   </element>
-                   <element name="core_alignment_header" file="core/core_alignment_header.embl" ftype="embl">
-                          <assert_contents>
-                                <has_text text="ID   Genome standard; DNA; PRO; 1234 BP."/>
-                                <has_text text="hisB_1.aln"/>
-                          </assert_contents>
-                   </element>
-                   <element name="core_alignment_filtered_header" file="core/core_alignment_filtered_header.embl" ftype="embl">
-                          <assert_contents>
-                                <has_text text="ID   Genome standard; DNA; PRO; 1234 BP."/>
-                                <has_text text="&#10;FT   feature         79606..80691&#10;"/>
-                          </assert_contents>
-                   </element>
-	   </output_collection>
-	   <output_collection name="output_pangenome_fasta" count="251"/>
-	   <output name="log">
-		  <assert_contents>
-		       <has_text text="pre-processing gff3 files..."/>
-		 </assert_contents>
-	   </output>
-	</test>
+      </test>
+      <!-- Test 3 : Testing Panaroo with Advanced Filtering options along with MAFFT core alignment -->
+      <test expect_num_outputs="3">
+            <param name="gen_code" value="11"/>
+            <param name="mode" value="strict"/>
+            <param name="log_out" value="yes"/>
+            <conditional name="advanced">
+                  <param name="adv_options_selector" value="set"/>
+                  <section name="gene_alignment_option">
+                        <param name="alignment" value="core"/>
+                        <param name="aligner" value="mafft"/>
+                  </section>
+            </conditional>
+            <param name="gff_input_collection">
+                  <collection type="list">
+                        <element name="10_small.gff" value="10_small.gff"/>
+                        <element name="11_small.gff" value="11_small.gff"/>
+                  </collection>
+            </param>
+            <output_collection name="output_pangenome" count="18" type="list">
+            <element name="summary_statistics" ftype="txt">
+                  <assert_contents>
+                        <has_line line="Core genes&#009;(99% &#60;= strains &#60;= 100%)&#009;251"/>
+                        <has_line line="Shell genes&#009;(15% &#60;= strains &#60; 95%)&#009;475"/>
+                        <has_line line="Total genes&#009;(0% &#60;= strains &#60;= 100%)&#009;726"/>
+                  </assert_contents>
+            </element>
+            <element name="alignment_entropy" ftype="csv">
+                  <assert_contents>
+                        <has_text text="stf0.aln,0.0"/>
+                        <has_text text="bglB.aln,0.0"/>
+                  </assert_contents>
+            </element>
+            <element name="combined_DNA_CDS" ftype="fasta">
+                  <assert_contents>
+                        <has_n_lines n="18206"/>
+                  </assert_contents>
+            </element>
+            <element name="combined_protein_cdhit_out" ftype="fasta">
+                  <assert_contents>
+                        <has_n_lines n="5119"/>
+                  </assert_contents>
+            </element>
+            <element name="combined_protein_CDS" ftype="fasta">
+                  <assert_contents>
+                        <has_n_lines n="7048"/>
+                  </assert_contents>
+            </element>
+            <element name="struct_presence_absence" ftype="tabular">
+                  <assert_contents>
+                        <has_line_matching expression="Gene\s+10_small\s+11_small"/>
+                  </assert_contents>
+            </element>
+            <element name="gene_data" ftype="csv">
+                  <assert_contents>
+                        <has_n_lines n="980"/>
+                        <has_n_columns sep="," n="8"/>
+                        <has_text text="KPLBOJCC_00003"/>
+                        <has_text text="NCFNLLIC_00003"/>
+                  </assert_contents>
+            </element>
+            <element name="pan_genome_reference" ftype="fasta">
+                  <assert_contents>
+                        <has_n_lines n="13120"/>
+                  </assert_contents>
+            </element>
+            <element name="gene_presence_absence" ftype="csv">
+                  <assert_contents>
+                        <has_text text="recB"/>
+                        <has_text text="recC"/>
+                        <has_text text="rpoB"/>
+                  </assert_contents>
+            </element>
+            <element name="gene_presence_absence_roary" ftype="csv">
+                  <assert_contents>
+                        <has_text text="ctpI_2"/>
+                        <has_text text="amiD_1"/>
+                  </assert_contents>
+            </element>
+            <element name="core_gene_alignment_filtered" ftype="fasta">
+                  <assert_contents>
+                        <has_size value="569962" delta="1000"/>
+                  </assert_contents>
+            </element>
+            <element name="core_gene_alignment" ftype="fasta">
+                  <assert_contents>
+                        <has_size value="569962" delta="1000"/>
+                  </assert_contents>
+            </element>
+            <element name="core_alignment_header" ftype="embl">
+                  <assert_contents>
+                        <has_text text="ID   Genome standard; DNA; PRO; 1234 BP."/>
+                        <has_text text="hisB_1.aln"/>
+                  </assert_contents>
+            </element>
+            <element name="core_alignment_filtered_header" ftype="embl">
+                  <assert_contents>
+                        <has_n_lines n="760" delta="10"/>
+                  </assert_contents>
+            </element>
+            </output_collection>
+            <output_collection name="output_pangenome_fasta" count="251"/>
+            <output name="log">
+                 <assert_contents>
+                        <has_text text="total seq: 979"/>
+                </assert_contents>
+            </output>
+         </test>
     </tests>
     <help><![CDATA[
-Panaroo_ is A Bacterial Pangenome Analysis Pipeline. Panaroo builds a full graphical representation of the pangenome, where nodes are clusters of orthologous genes (COGs) and two nodes are connected by an edge if they are adjacent on a contig in any sample from the population. Using this graphical representation, Panaroo corrects for errors introduced during annotation by collapsing diverse gene families, filtering contamination, merging fragmented gene segments and refinding missing genes.
+Panaroo_ is a pangenome analysis tool specifically designed to analyze bacterial genomes. Panaroo builds a full graphical representation of the pangenome, where nodes are clusters of orthologous genes (COGs) and two nodes are connected by an edge if they are adjacent on a contig in any sample from the population. Using this graphical representation, Panaroo corrects for errors introduced during annotation by collapsing diverse gene families, filtering contamination, merging fragmented gene segments and refinding missing genes.
 
 **INPUTS**
-Panaroo now supports multiple input formats. To use non-standard GFF3 files you must profile the input file as a list in a text file (one per line). Separate GFF and FASTA files can be provided per isolate by providing each file delimited by a space or a tab. Genbank file formats are also supported with extensions '.gbk', '.gb' or '.gbff'. These must compliant with Genbank/ENA/DDJB. This can be forced in Prokka by specifying the --compliance parameter.
 
-  - a list of gff format in a collection
+  - A list of gff3 files (from Prokka) in a collection. 
 
 **OUTPUTS**
 
@@ -457,7 +472,7 @@
   - core_gene_alignment_filtered
   - core_alignment_filtered_header
   - core_alignment_header
-  - a collection of fasta files
+  - a collection of Pangenome Aligned Fasta files
 
 .. _Panaroo: https://gthlab.au/panaroo/#/gettingstarted/quickstart
 
@@ -465,5 +480,4 @@
     <citations>
         <citation type="doi">10.1186/s13059-020-02090-4</citation>
     </citations>
-</tool>
- 
+</tool>
\ No newline at end of file
author	iuc
date	Fri, 11 Apr 2025 11:23:50 +0000
parents	50483f852947
children	b05be6316263