diff AlleleCall.xml @ 0:1ac58e449c87 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/chewbbaca commit 8bb518e20d68623904232ae28bb8a51ec05c1c4a
author iuc
date Wed, 25 Sep 2024 14:12:27 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/AlleleCall.xml	Wed Sep 25 14:12:27 2024 +0000
@@ -0,0 +1,222 @@
+<tool id="chewbbaca_allelecall" name="ChewBBACA AlleleCall" version="@CHEW_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>Determine the allelic profiles of a set of genomes</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+        #import re
+        mkdir 'input' &&
+        mkdir 'schema' &&
+        #for $file in $input_file
+        #set escaped_element_identifier = re.sub('[^\w\-]', '_', str($file.element_identifier))
+        ln -sf '$file' 'input/${escaped_element_identifier}.${file.ext}' &&
+        #end for
+        unzip '$input_schema' -d 'schema' &&
+        chewBBACA.py AlleleCall
+            #if $training_file:
+                --ptf '$training_file'
+            #end if
+            $cds_input
+            #if $genes_list:
+                --gl '$genes_list'
+            #end if
+            #if str($blast_score_ratio) != ""
+                --bsr $blast_score_ratio
+            #end if
+            #if str($minimum_length) != ""
+            --l $minimum_length
+            #end if
+            #if str($translation_table) != ""
+            --t $translation_table
+            #end if
+            #if str($size_threshold) != ""
+            --st $size_threshold
+            #end if
+            $no_inferred
+            --pm $prodigal_mode
+            --mode $mode
+            --force-continue
+            #if 'output_unclassified' in $output_selector:
+                --output-unclassified
+            #end if
+            #if 'output_missing' in $output_selector:
+                --output-missing
+            #end if
+            #if 'output_novel' in $output_selector:
+                --output-novel
+            #end if
+            #if 'hash_profile' in $output_selector:
+            ## It can use any hashing algorithm from hashlib but for simplicity we set it to md5
+                --hash-profile md5
+            #end if
+            -i 'input' -g 'schema/schema_seed/' -o 'output'
+    ]]></command>
+    <inputs>
+        <param format="fasta" name="input_file" type="data" multiple="true" label="Genome assemblies in FASTA format"/>
+        <param format="zip" name="input_schema" type="data" label="Schema Files in zip format" help="The schema directory contains the loci FASTA files and a folder named 'short' that contains the FASTA files with the loci representative alleles."/>
+        <section name="advanced" title="Advanced options">
+            <param argument="--genes-list" type="data" format="txt" label="Gene list" optional="true" />
+            <param argument="--training-file" type="data" format="binary" label="Prodigal training file" optional="true" help="By default, gets the training file from the schema"/>
+            <param argument="--cds-input" type="boolean" truevalue="--cds-input" falsevalue="" checked="false" label="CDS input" optional="true"/>
+            <param argument="--blast-score-ratio" type="float" min="0.0" max="1.0" value="" optional="true" label="BLAST Score Ratio value" /> 
+            <param argument="--minimum-length" type="integer" min="0" value="" optional="true" label="Minimum sequence length value"/>
+            <param argument="--translation-table" type="integer" min="0" value="" optional="true" help="Must match the genetic code used to create the training file (default: uses value defined in schema config)." label="Genetic code used to predict genes and to translate coding sequences"/>
+            <param argument="--size-threshold" type="float" min="0" value="" optional="true" label="CDS size variation threshold"/>
+            <param argument="--no-inferred" type="boolean" truevalue="--no-inferred" falsevalue="" checked="false" optional="true" label="Add the sequences of inferred alleles (INF) to the schema" help="Use this parameter if the schema is being accessed by multiple processes/users simultaneously." />
+            <param argument="--prodigal-mode" type="select" optional="true" label="Prodigal Mode" help="&quot;single&quot; for finished genomes, reasonable quality draft genomes and big viruses. &quot;meta&quot; for metagenomes, low quality draft genomes, small viruses, and small plasmids">
+                <option value="single" selected="true">
+                        single
+                    </option>
+                    <option value="meta">
+                        meta
+                    </option>
+            </param>
+            <param argument="--mode" type="select" label="Execution mode" optional="true">
+                <option value="1">Only exact matches at DNA level</option>
+                <option value="2">Exact matches at DNA and Protein level </option>
+                <option value="3">Exact matches and minimizer-based clustering to find similar alleles based on BSR+0.1 </option>
+                <option value="4" selected="true">Exact matches and minimizer-based clustering to find similar alleles based on BSR+0.1 </option>
+            </param>
+        </section>
+        <section name="output" title="Output Options">
+            <param name="output_selector" type="select" multiple="true" optional="true" display="checkboxes" label="Select / Deselect all">
+                <option value="output_unclassified">Create a Fasta file with unclassified coding sequences. (--output-unclassified)</option>
+                <option value="output_missing">Create a Fasta file with coding sequences classified as NIPH, NIPHEM, ASM, ALM, PLOT3, PLOT5 and LOTSC. (--output-missing)</option>
+                <option value="output_novel">Create Fasta file with the novel alleles inferred during the allele calling. (--output-novel)</option>
+                <option value="hash_profile">Create TSV file with hashed allelic profiles. (--hash-profile) </option>
+            </param>
+        </section>
+    </inputs>
+    <outputs>
+        <collection name="allelecall_results" type="list" label="${tool.name} on ${on_string}: AlleleCall Results">
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.tsv$" format="tabular" directory="output"/>
+        </collection>
+        <collection name="allelcall_log" type="list" label="${tool.name} on ${on_string}: AlleleCall Logs">
+            <discover_datasets pattern="(?P&lt;name&gt;.+)\.txt$" format="txt" directory="output"/>
+        </collection>
+        <data name="unclassified_fasta" format="fasta" from_work_dir="output/unclassified_sequences.fasta" label="${tool.name} on ${on_string}: Unclassified fasta">
+            <filter>output['output_selector'] and 'output_unclassified' in output['output_selector']</filter>
+        </data>
+        <data name="missing_fasta" format="fasta" from_work_dir="output/missing_classes.fasta" label="${tool.name} on ${on_string}: Missing fasta">
+            <filter>output['output_selector'] and 'output_missing' in output['output_selector']</filter>
+        </data>
+        <data name="novel_fasta" format="fasta" from_work_dir="output/novel_alleles.fasta" label="${tool.name} on ${on_string}: Novel fasta">
+            <filter>output['output_selector'] and 'output_novel' in output['output_selector']</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="4">
+            <param name="input_file" value="GCA_000007265"/>
+            <param name="input_schema" value="schema.zip"/>
+            <param name="output_selector" value="output_unclassified,output_missing,hash_profile" />
+            <output_collection name="allelecall_results" type="list">
+                <element name="cds_coordinates" file="cds_coordinates.tsv" compare="diff"/>
+                <element name="loci_summary_stats" file="loci_summary_stats.tsv" compare="diff"/>
+                <element name="paralogous_loci" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="Genome.*Loci.*CDS"/>
+                    </assert_contents>
+                </element>
+                <element name="results_alleles" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="1.*1.*NIPHEM.*1.*1"/>
+                        <has_text_matching expression="GCA_000007265.*1"/>
+                    </assert_contents>
+                </element>
+                <element name="results_alleles" file="results_alleles.tsv" compare="diff"/>
+                <element name="results_alleles_hashed" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="FILE.*GCA-000007265-protein1.*GCA-000007265-protein10.*GCA-000007265-protein100"/>
+                        <has_text_matching expression="GCA_000007265.*308e7666834338d0530d925b2737f2c6.*4aece26d201d59a90947e3400c7abf3f.*ebea148832aa2ae2704d37ebd5123169"/>
+                    </assert_contents>
+                </element>
+                <element name="results_statistics" file="results_statistics.tsv" compare="diff"/>
+            </output_collection>
+            <output_collection name="allelcall_log" type="list">
+                <element name="logging_info" ftype="txt">
+                    <assert_contents>
+                        <has_text_matching expression="Used a BSR of: 0.6"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output name="unclassified_fasta">
+                <assert_contents>
+                    <has_text_matching expression="GCA_000007265-protein15"/>
+                    <has_text_matching expression="ATGCACCACCTGTCACTTCTGCTCCGAAGAGAAAGCCTATCTCTAGGCCGGTCAGAAGGATGTCAAGACCTGGTAAGGTTCTTCGCGTTGCTTCGAATTAAACCACATGCTCCACCGCTTGTGCGGGCCCCCGTCAATTCCTTTGAGTTTCAACCTTGCGGTCGTACTCCCCAGGCGGAGTGCTTAATGCGTTAG"/>
+                </assert_contents>
+            </output>
+            <output name="missing_fasta">
+                <assert_contents>
+                    <has_text_matching expression="1|GCA_000007265|GCA-000007265-protein16&amp;NIPHEM|GCA_000007265-protein16&amp;EXC"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="4">
+            <param name="input_file" value="GCA_000007265.fna"/>
+            <param name="input_schema" value="schema.zip"/>
+            <param name="output_selector" value="output_unclassified,output_missing,hash_profile" />
+            <output_collection name="allelecall_results" type="list">
+                <element name="paralogous_loci" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="Genome.*Loci.*CDS"/>
+                    </assert_contents>
+                </element>
+                <element name="results_alleles" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="1.*1.*NIPHEM.*1.*1"/>
+                        <has_text_matching expression="GCA_000007265.*1"/>
+                    </assert_contents>
+                </element>
+                <element name="results_alleles_hashed" ftype="tabular">
+                    <assert_contents>
+                        <has_text_matching expression="FILE.*GCA-000007265-protein1.*GCA-000007265-protein10.*GCA-000007265-protein100"/>
+                        <has_text_matching expression="GCA_000007265_fna.*308e7666834338d0530d925b2737f2c6.*4aece26d201d59a90947e3400c7abf3f.*ebea148832aa2ae2704d37ebd5123169"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output_collection name="allelcall_log" type="list">
+                <element name="logging_info" ftype="txt">
+                    <assert_contents>
+                        <has_text_matching expression="Used a BSR of: 0.6"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+            <output name="unclassified_fasta">
+                <assert_contents>
+                    <has_text_matching expression="GCA_000007265_fna-protein83"/>
+                    <has_text_matching expression="ATGCACCACCTGTCACTTCTGCTCCGAAGAGAAAGCCTATCTCTAGGCCGGTCAGAAGGATGTCAAGACCTGGTAAGGTTCTTCGCGTTGCTTCGAATTAAACCACATGCTCCACCGCTTGTGCGGGCCCCCGTCAATTCCTTTGAGTTTCAACCTTGCGGTCGTACTCCCCAGGCGGAGTGCTTAATGCGTTAG"/>
+                </assert_contents>
+            </output>
+            <output name="missing_fasta">
+                <assert_contents>
+                    <has_text_matching expression="1|GCA_000007265|GCA-000007265-protein16&amp;NIPHEM|GCA_000007265-protein16&amp;EXC"/>
+                </assert_contents>
+            </output>
+        </test>
+    </tests>
+    <help>
+chewBBACA is a software suite for the creation and evaluation of core genome and whole genome MultiLocus Sequence Typing (cg/wgMLST) schemas and results.
+
+In chewBBACA, by default, an allele needs to be a CDS defined by Prodigal_. To ensure reproducibility of the CDS prediction, the same Prodigal training file for each bacterial species should be used and provided as input.
+
+.. class:: infomark
+
+**Important**
+
+Although the use of a training file is optional, it is highly recommended to ensure consistent results.
+
+If the schema files are created by chewBBACA v2, please use the PrepExternalSchema module to convert the schema to a format fully compatible with chewBBACA v3.
+
+By default, the AlleleCall module uses the Prodigal training file included in the schema’s directory and it is not necessary to pass a training file to the --ptf parameter.
+
+.. class:: infomark
+
+**Note**
+
+If a text file that contains a list of full paths to loci FASTA files or loci IDs, one per line, is passed to the --genes-list parameter, the process will only perform allele calling for the loci in that list.
+
+.. _Prodigal: https://github.com/hyattpd/Prodigal
+    </help>
+    <expand macro="citations" />
+</tool>
\ No newline at end of file