Mercurial > repos > iuc > chewbbaca_allelecall
comparison AlleleCall.xml @ 0:1ac58e449c87 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/chewbbaca commit 8bb518e20d68623904232ae28bb8a51ec05c1c4a
| author | iuc |
|---|---|
| date | Wed, 25 Sep 2024 14:12:27 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:1ac58e449c87 |
|---|---|
| 1 <tool id="chewbbaca_allelecall" name="ChewBBACA AlleleCall" version="@CHEW_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description>Determine the allelic profiles of a set of genomes</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="requirements" /> | |
| 7 <command detect_errors="exit_code"><![CDATA[ | |
| 8 #import re | |
| 9 mkdir 'input' && | |
| 10 mkdir 'schema' && | |
| 11 #for $file in $input_file | |
| 12 #set escaped_element_identifier = re.sub('[^\w\-]', '_', str($file.element_identifier)) | |
| 13 ln -sf '$file' 'input/${escaped_element_identifier}.${file.ext}' && | |
| 14 #end for | |
| 15 unzip '$input_schema' -d 'schema' && | |
| 16 chewBBACA.py AlleleCall | |
| 17 #if $training_file: | |
| 18 --ptf '$training_file' | |
| 19 #end if | |
| 20 $cds_input | |
| 21 #if $genes_list: | |
| 22 --gl '$genes_list' | |
| 23 #end if | |
| 24 #if str($blast_score_ratio) != "" | |
| 25 --bsr $blast_score_ratio | |
| 26 #end if | |
| 27 #if str($minimum_length) != "" | |
| 28 --l $minimum_length | |
| 29 #end if | |
| 30 #if str($translation_table) != "" | |
| 31 --t $translation_table | |
| 32 #end if | |
| 33 #if str($size_threshold) != "" | |
| 34 --st $size_threshold | |
| 35 #end if | |
| 36 $no_inferred | |
| 37 --pm $prodigal_mode | |
| 38 --mode $mode | |
| 39 --force-continue | |
| 40 #if 'output_unclassified' in $output_selector: | |
| 41 --output-unclassified | |
| 42 #end if | |
| 43 #if 'output_missing' in $output_selector: | |
| 44 --output-missing | |
| 45 #end if | |
| 46 #if 'output_novel' in $output_selector: | |
| 47 --output-novel | |
| 48 #end if | |
| 49 #if 'hash_profile' in $output_selector: | |
| 50 ## It can use any hashing algorithm from hashlib but for simplicity we set it to md5 | |
| 51 --hash-profile md5 | |
| 52 #end if | |
| 53 -i 'input' -g 'schema/schema_seed/' -o 'output' | |
| 54 ]]></command> | |
| 55 <inputs> | |
| 56 <param format="fasta" name="input_file" type="data" multiple="true" label="Genome assemblies in FASTA format"/> | |
| 57 <param format="zip" name="input_schema" type="data" label="Schema Files in zip format" help="The schema directory contains the loci FASTA files and a folder named 'short' that contains the FASTA files with the loci representative alleles."/> | |
| 58 <section name="advanced" title="Advanced options"> | |
| 59 <param argument="--genes-list" type="data" format="txt" label="Gene list" optional="true" /> | |
| 60 <param argument="--training-file" type="data" format="binary" label="Prodigal training file" optional="true" help="By default, gets the training file from the schema"/> | |
| 61 <param argument="--cds-input" type="boolean" truevalue="--cds-input" falsevalue="" checked="false" label="CDS input" optional="true"/> | |
| 62 <param argument="--blast-score-ratio" type="float" min="0.0" max="1.0" value="" optional="true" label="BLAST Score Ratio value" /> | |
| 63 <param argument="--minimum-length" type="integer" min="0" value="" optional="true" label="Minimum sequence length value"/> | |
| 64 <param argument="--translation-table" type="integer" min="0" value="" optional="true" help="Must match the genetic code used to create the training file (default: uses value defined in schema config)." label="Genetic code used to predict genes and to translate coding sequences"/> | |
| 65 <param argument="--size-threshold" type="float" min="0" value="" optional="true" label="CDS size variation threshold"/> | |
| 66 <param argument="--no-inferred" type="boolean" truevalue="--no-inferred" falsevalue="" checked="false" optional="true" label="Add the sequences of inferred alleles (INF) to the schema" help="Use this parameter if the schema is being accessed by multiple processes/users simultaneously." /> | |
| 67 <param argument="--prodigal-mode" type="select" optional="true" label="Prodigal Mode" help=""single" for finished genomes, reasonable quality draft genomes and big viruses. "meta" for metagenomes, low quality draft genomes, small viruses, and small plasmids"> | |
| 68 <option value="single" selected="true"> | |
| 69 single | |
| 70 </option> | |
| 71 <option value="meta"> | |
| 72 meta | |
| 73 </option> | |
| 74 </param> | |
| 75 <param argument="--mode" type="select" label="Execution mode" optional="true"> | |
| 76 <option value="1">Only exact matches at DNA level</option> | |
| 77 <option value="2">Exact matches at DNA and Protein level </option> | |
| 78 <option value="3">Exact matches and minimizer-based clustering to find similar alleles based on BSR+0.1 </option> | |
| 79 <option value="4" selected="true">Exact matches and minimizer-based clustering to find similar alleles based on BSR+0.1 </option> | |
| 80 </param> | |
| 81 </section> | |
| 82 <section name="output" title="Output Options"> | |
| 83 <param name="output_selector" type="select" multiple="true" optional="true" display="checkboxes" label="Select / Deselect all"> | |
| 84 <option value="output_unclassified">Create a Fasta file with unclassified coding sequences. (--output-unclassified)</option> | |
| 85 <option value="output_missing">Create a Fasta file with coding sequences classified as NIPH, NIPHEM, ASM, ALM, PLOT3, PLOT5 and LOTSC. (--output-missing)</option> | |
| 86 <option value="output_novel">Create Fasta file with the novel alleles inferred during the allele calling. (--output-novel)</option> | |
| 87 <option value="hash_profile">Create TSV file with hashed allelic profiles. (--hash-profile) </option> | |
| 88 </param> | |
| 89 </section> | |
| 90 </inputs> | |
| 91 <outputs> | |
| 92 <collection name="allelecall_results" type="list" label="${tool.name} on ${on_string}: AlleleCall Results"> | |
| 93 <discover_datasets pattern="(?P<name>.+)\.tsv$" format="tabular" directory="output"/> | |
| 94 </collection> | |
| 95 <collection name="allelcall_log" type="list" label="${tool.name} on ${on_string}: AlleleCall Logs"> | |
| 96 <discover_datasets pattern="(?P<name>.+)\.txt$" format="txt" directory="output"/> | |
| 97 </collection> | |
| 98 <data name="unclassified_fasta" format="fasta" from_work_dir="output/unclassified_sequences.fasta" label="${tool.name} on ${on_string}: Unclassified fasta"> | |
| 99 <filter>output['output_selector'] and 'output_unclassified' in output['output_selector']</filter> | |
| 100 </data> | |
| 101 <data name="missing_fasta" format="fasta" from_work_dir="output/missing_classes.fasta" label="${tool.name} on ${on_string}: Missing fasta"> | |
| 102 <filter>output['output_selector'] and 'output_missing' in output['output_selector']</filter> | |
| 103 </data> | |
| 104 <data name="novel_fasta" format="fasta" from_work_dir="output/novel_alleles.fasta" label="${tool.name} on ${on_string}: Novel fasta"> | |
| 105 <filter>output['output_selector'] and 'output_novel' in output['output_selector']</filter> | |
| 106 </data> | |
| 107 </outputs> | |
| 108 <tests> | |
| 109 <test expect_num_outputs="4"> | |
| 110 <param name="input_file" value="GCA_000007265"/> | |
| 111 <param name="input_schema" value="schema.zip"/> | |
| 112 <param name="output_selector" value="output_unclassified,output_missing,hash_profile" /> | |
| 113 <output_collection name="allelecall_results" type="list"> | |
| 114 <element name="cds_coordinates" file="cds_coordinates.tsv" compare="diff"/> | |
| 115 <element name="loci_summary_stats" file="loci_summary_stats.tsv" compare="diff"/> | |
| 116 <element name="paralogous_loci" ftype="tabular"> | |
| 117 <assert_contents> | |
| 118 <has_text_matching expression="Genome.*Loci.*CDS"/> | |
| 119 </assert_contents> | |
| 120 </element> | |
| 121 <element name="results_alleles" ftype="tabular"> | |
| 122 <assert_contents> | |
| 123 <has_text_matching expression="1.*1.*NIPHEM.*1.*1"/> | |
| 124 <has_text_matching expression="GCA_000007265.*1"/> | |
| 125 </assert_contents> | |
| 126 </element> | |
| 127 <element name="results_alleles" file="results_alleles.tsv" compare="diff"/> | |
| 128 <element name="results_alleles_hashed" ftype="tabular"> | |
| 129 <assert_contents> | |
| 130 <has_text_matching expression="FILE.*GCA-000007265-protein1.*GCA-000007265-protein10.*GCA-000007265-protein100"/> | |
| 131 <has_text_matching expression="GCA_000007265.*308e7666834338d0530d925b2737f2c6.*4aece26d201d59a90947e3400c7abf3f.*ebea148832aa2ae2704d37ebd5123169"/> | |
| 132 </assert_contents> | |
| 133 </element> | |
| 134 <element name="results_statistics" file="results_statistics.tsv" compare="diff"/> | |
| 135 </output_collection> | |
| 136 <output_collection name="allelcall_log" type="list"> | |
| 137 <element name="logging_info" ftype="txt"> | |
| 138 <assert_contents> | |
| 139 <has_text_matching expression="Used a BSR of: 0.6"/> | |
| 140 </assert_contents> | |
| 141 </element> | |
| 142 </output_collection> | |
| 143 <output name="unclassified_fasta"> | |
| 144 <assert_contents> | |
| 145 <has_text_matching expression="GCA_000007265-protein15"/> | |
| 146 <has_text_matching expression="ATGCACCACCTGTCACTTCTGCTCCGAAGAGAAAGCCTATCTCTAGGCCGGTCAGAAGGATGTCAAGACCTGGTAAGGTTCTTCGCGTTGCTTCGAATTAAACCACATGCTCCACCGCTTGTGCGGGCCCCCGTCAATTCCTTTGAGTTTCAACCTTGCGGTCGTACTCCCCAGGCGGAGTGCTTAATGCGTTAG"/> | |
| 147 </assert_contents> | |
| 148 </output> | |
| 149 <output name="missing_fasta"> | |
| 150 <assert_contents> | |
| 151 <has_text_matching expression="1|GCA_000007265|GCA-000007265-protein16&NIPHEM|GCA_000007265-protein16&EXC"/> | |
| 152 </assert_contents> | |
| 153 </output> | |
| 154 </test> | |
| 155 <test expect_num_outputs="4"> | |
| 156 <param name="input_file" value="GCA_000007265.fna"/> | |
| 157 <param name="input_schema" value="schema.zip"/> | |
| 158 <param name="output_selector" value="output_unclassified,output_missing,hash_profile" /> | |
| 159 <output_collection name="allelecall_results" type="list"> | |
| 160 <element name="paralogous_loci" ftype="tabular"> | |
| 161 <assert_contents> | |
| 162 <has_text_matching expression="Genome.*Loci.*CDS"/> | |
| 163 </assert_contents> | |
| 164 </element> | |
| 165 <element name="results_alleles" ftype="tabular"> | |
| 166 <assert_contents> | |
| 167 <has_text_matching expression="1.*1.*NIPHEM.*1.*1"/> | |
| 168 <has_text_matching expression="GCA_000007265.*1"/> | |
| 169 </assert_contents> | |
| 170 </element> | |
| 171 <element name="results_alleles_hashed" ftype="tabular"> | |
| 172 <assert_contents> | |
| 173 <has_text_matching expression="FILE.*GCA-000007265-protein1.*GCA-000007265-protein10.*GCA-000007265-protein100"/> | |
| 174 <has_text_matching expression="GCA_000007265_fna.*308e7666834338d0530d925b2737f2c6.*4aece26d201d59a90947e3400c7abf3f.*ebea148832aa2ae2704d37ebd5123169"/> | |
| 175 </assert_contents> | |
| 176 </element> | |
| 177 </output_collection> | |
| 178 <output_collection name="allelcall_log" type="list"> | |
| 179 <element name="logging_info" ftype="txt"> | |
| 180 <assert_contents> | |
| 181 <has_text_matching expression="Used a BSR of: 0.6"/> | |
| 182 </assert_contents> | |
| 183 </element> | |
| 184 </output_collection> | |
| 185 <output name="unclassified_fasta"> | |
| 186 <assert_contents> | |
| 187 <has_text_matching expression="GCA_000007265_fna-protein83"/> | |
| 188 <has_text_matching expression="ATGCACCACCTGTCACTTCTGCTCCGAAGAGAAAGCCTATCTCTAGGCCGGTCAGAAGGATGTCAAGACCTGGTAAGGTTCTTCGCGTTGCTTCGAATTAAACCACATGCTCCACCGCTTGTGCGGGCCCCCGTCAATTCCTTTGAGTTTCAACCTTGCGGTCGTACTCCCCAGGCGGAGTGCTTAATGCGTTAG"/> | |
| 189 </assert_contents> | |
| 190 </output> | |
| 191 <output name="missing_fasta"> | |
| 192 <assert_contents> | |
| 193 <has_text_matching expression="1|GCA_000007265|GCA-000007265-protein16&NIPHEM|GCA_000007265-protein16&EXC"/> | |
| 194 </assert_contents> | |
| 195 </output> | |
| 196 </test> | |
| 197 </tests> | |
| 198 <help> | |
| 199 chewBBACA is a software suite for the creation and evaluation of core genome and whole genome MultiLocus Sequence Typing (cg/wgMLST) schemas and results. | |
| 200 | |
| 201 In chewBBACA, by default, an allele needs to be a CDS defined by Prodigal_. To ensure reproducibility of the CDS prediction, the same Prodigal training file for each bacterial species should be used and provided as input. | |
| 202 | |
| 203 .. class:: infomark | |
| 204 | |
| 205 **Important** | |
| 206 | |
| 207 Although the use of a training file is optional, it is highly recommended to ensure consistent results. | |
| 208 | |
| 209 If the schema files are created by chewBBACA v2, please use the PrepExternalSchema module to convert the schema to a format fully compatible with chewBBACA v3. | |
| 210 | |
| 211 By default, the AlleleCall module uses the Prodigal training file included in the schema’s directory and it is not necessary to pass a training file to the --ptf parameter. | |
| 212 | |
| 213 .. class:: infomark | |
| 214 | |
| 215 **Note** | |
| 216 | |
| 217 If a text file that contains a list of full paths to loci FASTA files or loci IDs, one per line, is passed to the --genes-list parameter, the process will only perform allele calling for the loci in that list. | |
| 218 | |
| 219 .. _Prodigal: https://github.com/hyattpd/Prodigal | |
| 220 </help> | |
| 221 <expand macro="citations" /> | |
| 222 </tool> |
