comparison AlleleCall.xml @ 0:1ac58e449c87 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/chewbbaca commit 8bb518e20d68623904232ae28bb8a51ec05c1c4a
author iuc
date Wed, 25 Sep 2024 14:12:27 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1ac58e449c87
1 <tool id="chewbbaca_allelecall" name="ChewBBACA AlleleCall" version="@CHEW_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>Determine the allelic profiles of a set of genomes</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <command detect_errors="exit_code"><![CDATA[
8 #import re
9 mkdir 'input' &&
10 mkdir 'schema' &&
11 #for $file in $input_file
12 #set escaped_element_identifier = re.sub('[^\w\-]', '_', str($file.element_identifier))
13 ln -sf '$file' 'input/${escaped_element_identifier}.${file.ext}' &&
14 #end for
15 unzip '$input_schema' -d 'schema' &&
16 chewBBACA.py AlleleCall
17 #if $training_file:
18 --ptf '$training_file'
19 #end if
20 $cds_input
21 #if $genes_list:
22 --gl '$genes_list'
23 #end if
24 #if str($blast_score_ratio) != ""
25 --bsr $blast_score_ratio
26 #end if
27 #if str($minimum_length) != ""
28 --l $minimum_length
29 #end if
30 #if str($translation_table) != ""
31 --t $translation_table
32 #end if
33 #if str($size_threshold) != ""
34 --st $size_threshold
35 #end if
36 $no_inferred
37 --pm $prodigal_mode
38 --mode $mode
39 --force-continue
40 #if 'output_unclassified' in $output_selector:
41 --output-unclassified
42 #end if
43 #if 'output_missing' in $output_selector:
44 --output-missing
45 #end if
46 #if 'output_novel' in $output_selector:
47 --output-novel
48 #end if
49 #if 'hash_profile' in $output_selector:
50 ## It can use any hashing algorithm from hashlib but for simplicity we set it to md5
51 --hash-profile md5
52 #end if
53 -i 'input' -g 'schema/schema_seed/' -o 'output'
54 ]]></command>
55 <inputs>
56 <param format="fasta" name="input_file" type="data" multiple="true" label="Genome assemblies in FASTA format"/>
57 <param format="zip" name="input_schema" type="data" label="Schema Files in zip format" help="The schema directory contains the loci FASTA files and a folder named 'short' that contains the FASTA files with the loci representative alleles."/>
58 <section name="advanced" title="Advanced options">
59 <param argument="--genes-list" type="data" format="txt" label="Gene list" optional="true" />
60 <param argument="--training-file" type="data" format="binary" label="Prodigal training file" optional="true" help="By default, gets the training file from the schema"/>
61 <param argument="--cds-input" type="boolean" truevalue="--cds-input" falsevalue="" checked="false" label="CDS input" optional="true"/>
62 <param argument="--blast-score-ratio" type="float" min="0.0" max="1.0" value="" optional="true" label="BLAST Score Ratio value" />
63 <param argument="--minimum-length" type="integer" min="0" value="" optional="true" label="Minimum sequence length value"/>
64 <param argument="--translation-table" type="integer" min="0" value="" optional="true" help="Must match the genetic code used to create the training file (default: uses value defined in schema config)." label="Genetic code used to predict genes and to translate coding sequences"/>
65 <param argument="--size-threshold" type="float" min="0" value="" optional="true" label="CDS size variation threshold"/>
66 <param argument="--no-inferred" type="boolean" truevalue="--no-inferred" falsevalue="" checked="false" optional="true" label="Add the sequences of inferred alleles (INF) to the schema" help="Use this parameter if the schema is being accessed by multiple processes/users simultaneously." />
67 <param argument="--prodigal-mode" type="select" optional="true" label="Prodigal Mode" help="&quot;single&quot; for finished genomes, reasonable quality draft genomes and big viruses. &quot;meta&quot; for metagenomes, low quality draft genomes, small viruses, and small plasmids">
68 <option value="single" selected="true">
69 single
70 </option>
71 <option value="meta">
72 meta
73 </option>
74 </param>
75 <param argument="--mode" type="select" label="Execution mode" optional="true">
76 <option value="1">Only exact matches at DNA level</option>
77 <option value="2">Exact matches at DNA and Protein level </option>
78 <option value="3">Exact matches and minimizer-based clustering to find similar alleles based on BSR+0.1 </option>
79 <option value="4" selected="true">Exact matches and minimizer-based clustering to find similar alleles based on BSR+0.1 </option>
80 </param>
81 </section>
82 <section name="output" title="Output Options">
83 <param name="output_selector" type="select" multiple="true" optional="true" display="checkboxes" label="Select / Deselect all">
84 <option value="output_unclassified">Create a Fasta file with unclassified coding sequences. (--output-unclassified)</option>
85 <option value="output_missing">Create a Fasta file with coding sequences classified as NIPH, NIPHEM, ASM, ALM, PLOT3, PLOT5 and LOTSC. (--output-missing)</option>
86 <option value="output_novel">Create Fasta file with the novel alleles inferred during the allele calling. (--output-novel)</option>
87 <option value="hash_profile">Create TSV file with hashed allelic profiles. (--hash-profile) </option>
88 </param>
89 </section>
90 </inputs>
91 <outputs>
92 <collection name="allelecall_results" type="list" label="${tool.name} on ${on_string}: AlleleCall Results">
93 <discover_datasets pattern="(?P&lt;name&gt;.+)\.tsv$" format="tabular" directory="output"/>
94 </collection>
95 <collection name="allelcall_log" type="list" label="${tool.name} on ${on_string}: AlleleCall Logs">
96 <discover_datasets pattern="(?P&lt;name&gt;.+)\.txt$" format="txt" directory="output"/>
97 </collection>
98 <data name="unclassified_fasta" format="fasta" from_work_dir="output/unclassified_sequences.fasta" label="${tool.name} on ${on_string}: Unclassified fasta">
99 <filter>output['output_selector'] and 'output_unclassified' in output['output_selector']</filter>
100 </data>
101 <data name="missing_fasta" format="fasta" from_work_dir="output/missing_classes.fasta" label="${tool.name} on ${on_string}: Missing fasta">
102 <filter>output['output_selector'] and 'output_missing' in output['output_selector']</filter>
103 </data>
104 <data name="novel_fasta" format="fasta" from_work_dir="output/novel_alleles.fasta" label="${tool.name} on ${on_string}: Novel fasta">
105 <filter>output['output_selector'] and 'output_novel' in output['output_selector']</filter>
106 </data>
107 </outputs>
108 <tests>
109 <test expect_num_outputs="4">
110 <param name="input_file" value="GCA_000007265"/>
111 <param name="input_schema" value="schema.zip"/>
112 <param name="output_selector" value="output_unclassified,output_missing,hash_profile" />
113 <output_collection name="allelecall_results" type="list">
114 <element name="cds_coordinates" file="cds_coordinates.tsv" compare="diff"/>
115 <element name="loci_summary_stats" file="loci_summary_stats.tsv" compare="diff"/>
116 <element name="paralogous_loci" ftype="tabular">
117 <assert_contents>
118 <has_text_matching expression="Genome.*Loci.*CDS"/>
119 </assert_contents>
120 </element>
121 <element name="results_alleles" ftype="tabular">
122 <assert_contents>
123 <has_text_matching expression="1.*1.*NIPHEM.*1.*1"/>
124 <has_text_matching expression="GCA_000007265.*1"/>
125 </assert_contents>
126 </element>
127 <element name="results_alleles" file="results_alleles.tsv" compare="diff"/>
128 <element name="results_alleles_hashed" ftype="tabular">
129 <assert_contents>
130 <has_text_matching expression="FILE.*GCA-000007265-protein1.*GCA-000007265-protein10.*GCA-000007265-protein100"/>
131 <has_text_matching expression="GCA_000007265.*308e7666834338d0530d925b2737f2c6.*4aece26d201d59a90947e3400c7abf3f.*ebea148832aa2ae2704d37ebd5123169"/>
132 </assert_contents>
133 </element>
134 <element name="results_statistics" file="results_statistics.tsv" compare="diff"/>
135 </output_collection>
136 <output_collection name="allelcall_log" type="list">
137 <element name="logging_info" ftype="txt">
138 <assert_contents>
139 <has_text_matching expression="Used a BSR of: 0.6"/>
140 </assert_contents>
141 </element>
142 </output_collection>
143 <output name="unclassified_fasta">
144 <assert_contents>
145 <has_text_matching expression="GCA_000007265-protein15"/>
146 <has_text_matching expression="ATGCACCACCTGTCACTTCTGCTCCGAAGAGAAAGCCTATCTCTAGGCCGGTCAGAAGGATGTCAAGACCTGGTAAGGTTCTTCGCGTTGCTTCGAATTAAACCACATGCTCCACCGCTTGTGCGGGCCCCCGTCAATTCCTTTGAGTTTCAACCTTGCGGTCGTACTCCCCAGGCGGAGTGCTTAATGCGTTAG"/>
147 </assert_contents>
148 </output>
149 <output name="missing_fasta">
150 <assert_contents>
151 <has_text_matching expression="1|GCA_000007265|GCA-000007265-protein16&amp;NIPHEM|GCA_000007265-protein16&amp;EXC"/>
152 </assert_contents>
153 </output>
154 </test>
155 <test expect_num_outputs="4">
156 <param name="input_file" value="GCA_000007265.fna"/>
157 <param name="input_schema" value="schema.zip"/>
158 <param name="output_selector" value="output_unclassified,output_missing,hash_profile" />
159 <output_collection name="allelecall_results" type="list">
160 <element name="paralogous_loci" ftype="tabular">
161 <assert_contents>
162 <has_text_matching expression="Genome.*Loci.*CDS"/>
163 </assert_contents>
164 </element>
165 <element name="results_alleles" ftype="tabular">
166 <assert_contents>
167 <has_text_matching expression="1.*1.*NIPHEM.*1.*1"/>
168 <has_text_matching expression="GCA_000007265.*1"/>
169 </assert_contents>
170 </element>
171 <element name="results_alleles_hashed" ftype="tabular">
172 <assert_contents>
173 <has_text_matching expression="FILE.*GCA-000007265-protein1.*GCA-000007265-protein10.*GCA-000007265-protein100"/>
174 <has_text_matching expression="GCA_000007265_fna.*308e7666834338d0530d925b2737f2c6.*4aece26d201d59a90947e3400c7abf3f.*ebea148832aa2ae2704d37ebd5123169"/>
175 </assert_contents>
176 </element>
177 </output_collection>
178 <output_collection name="allelcall_log" type="list">
179 <element name="logging_info" ftype="txt">
180 <assert_contents>
181 <has_text_matching expression="Used a BSR of: 0.6"/>
182 </assert_contents>
183 </element>
184 </output_collection>
185 <output name="unclassified_fasta">
186 <assert_contents>
187 <has_text_matching expression="GCA_000007265_fna-protein83"/>
188 <has_text_matching expression="ATGCACCACCTGTCACTTCTGCTCCGAAGAGAAAGCCTATCTCTAGGCCGGTCAGAAGGATGTCAAGACCTGGTAAGGTTCTTCGCGTTGCTTCGAATTAAACCACATGCTCCACCGCTTGTGCGGGCCCCCGTCAATTCCTTTGAGTTTCAACCTTGCGGTCGTACTCCCCAGGCGGAGTGCTTAATGCGTTAG"/>
189 </assert_contents>
190 </output>
191 <output name="missing_fasta">
192 <assert_contents>
193 <has_text_matching expression="1|GCA_000007265|GCA-000007265-protein16&amp;NIPHEM|GCA_000007265-protein16&amp;EXC"/>
194 </assert_contents>
195 </output>
196 </test>
197 </tests>
198 <help>
199 chewBBACA is a software suite for the creation and evaluation of core genome and whole genome MultiLocus Sequence Typing (cg/wgMLST) schemas and results.
200
201 In chewBBACA, by default, an allele needs to be a CDS defined by Prodigal_. To ensure reproducibility of the CDS prediction, the same Prodigal training file for each bacterial species should be used and provided as input.
202
203 .. class:: infomark
204
205 **Important**
206
207 Although the use of a training file is optional, it is highly recommended to ensure consistent results.
208
209 If the schema files are created by chewBBACA v2, please use the PrepExternalSchema module to convert the schema to a format fully compatible with chewBBACA v3.
210
211 By default, the AlleleCall module uses the Prodigal training file included in the schema’s directory and it is not necessary to pass a training file to the --ptf parameter.
212
213 .. class:: infomark
214
215 **Note**
216
217 If a text file that contains a list of full paths to loci FASTA files or loci IDs, one per line, is passed to the --genes-list parameter, the process will only perform allele calling for the loci in that list.
218
219 .. _Prodigal: https://github.com/hyattpd/Prodigal
220 </help>
221 <expand macro="citations" />
222 </tool>