comparison CreateSchema.xml @ 0:1fc5ac7a8dce draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/chewbbaca commit 8bb518e20d68623904232ae28bb8a51ec05c1c4a
author iuc
date Wed, 25 Sep 2024 14:13:09 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:1fc5ac7a8dce
1 <tool id="chewbbaca_createschema" name="chewBBACA CreateSchema" version="@CHEW_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>Create a gene-by-gene schema</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <command detect_errors="exit_code"><![CDATA[
8 #import re
9 mkdir 'input' &&
10 #for $file in $input_file
11 #set escaped_element_identifier = re.sub('[^\w\-]', '_', str($file.element_identifier))
12 ln -sf '$file' 'input/${escaped_element_identifier}.${file.ext}' &&
13 #end for
14 chewBBACA.py CreateSchema
15 #if $training_file:
16 --ptf '$training_file'
17 #end if
18 $cds_input
19 @COMMON_INPUT@
20 --pm $prodigal_mode
21 -i 'input' -o 'output' &&
22 cd 'output/' &&
23 zip -r schema_seed.zip 'schema_seed'
24 ]]></command>
25 <inputs>
26 <param format="fasta" name="input_file" type="data" multiple="True" label="Genome assemblies in FASTA format"/>
27 <section name="advanced" title="Advanced options">
28 <param argument="--training-file" type="data" format="binary" label="Prodigal training file" optional="true" />
29 <param argument="--cds-input" type="boolean" truevalue="--cds-input" falsevalue="" checked="false" label="CDS input" optional="true"/>
30 <param argument="--minimum-length" type="integer" min="0" value="201" label="Minimum sequence length value"/>
31 <expand macro="common_param" />
32 <param argument="--prodigal-mode" type="select" label="Prodigal Mode" help="&quot;single&quot; for finished genomes, reasonable quality draft genomes and big viruses. &quot;meta&quot; for metagenomes, low quality draft genomes, small viruses, and small plasmids">
33 <option value="single" selected="true">
34 single
35 </option>
36 <option value="meta">
37 meta
38 </option>
39 </param>
40 </section>
41 <section name="output" title="Output options">
42 <param name="show_cds_invalid" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output invalid CDS file?"/>
43 <param name="show_cds_coord" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Output CDS coordinates File?"/>
44 </section>
45 </inputs>
46 <outputs>
47 <data format="zip" name="schema" from_work_dir="output/schema_seed.zip" label="${tool.name} on ${on_string}: Schema files"/>
48 <data format="txt" name="txt_file" from_work_dir="output/invalid_cds.txt" label="${tool.name} on ${on_string}: Invalid CDS">
49 <filter>output['show_cds_invalid']</filter>
50 </data>
51 <data format="tabular" name="tsv_file" from_work_dir="output/cds_coordinates.tsv" label="${tool.name} on ${on_string}: CDS coordinates">
52 <filter>output['show_cds_coord']</filter>
53 </data>
54 </outputs>
55 <tests>
56 <test expect_num_outputs="1">
57 <param name="input_file" value="GCA_000007265.fna"/>
58 <output name="schema">
59 <assert_contents>
60 <has_archive_member path="schema_seed/.*\.fasta" n="204"/>
61 <has_archive_member path="schema_seed/short/.*\.fasta" n="102"/>
62 <has_archive_member path="schema_seed/\.schema_config"/>
63 </assert_contents>
64 </output>
65 </test>
66 <test expect_num_outputs="1">
67 <param name="input_file" value="GCA_000007265"/>
68 <output name="schema">
69 <assert_contents>
70 <has_archive_member path="schema_seed/.*\.fasta" n="204"/>
71 <has_archive_member path="schema_seed/short/.*\.fasta" n="102"/>
72 <has_archive_member path="schema_seed/\.schema_config"/>
73 </assert_contents>
74 </output>
75 </test>
76 <test expect_num_outputs="1">
77 <param name="input_file" value="GCA_000007265.fna"/>
78 <param name="training_file" value="Streptococcus_agalactiae.trn"/>
79 <output name="schema">
80 <assert_contents>
81 <has_archive_member path="schema_seed/.*\.fasta" n="198"/>
82 <has_archive_member path="schema_seed/short/.*\.fasta" n="99"/>
83 <has_archive_member path="schema_seed/\.schema_config"/>
84 </assert_contents>
85 </output>
86 </test>
87 <test expect_num_outputs="1">
88 <param name="input_file" value="CDS_Str_agalactiae.fasta"/>
89 <param name="cds_input" value="true"/>
90 <output name="schema">
91 <assert_contents>
92 <has_archive_member path="schema_seed/CDS-Str-agalactiae-fasta-protein1.fasta"/>
93 </assert_contents>
94 </output>
95 </test>
96 </tests>
97 <help>
98
99 chewBBACA is a software suite for the creation and evaluation of core genome and whole genome MultiLocus Sequence Typing (cg/wgMLST) schemas and results.
100
101 A Schema is a pre-defined set of loci that is used in MLST analyses. Traditional MLST schemas relied in 7 loci that were internal fragments of housekeeping genes and each locus was defined by its amplification by a pair of primers yielding a fragment of a defined size.
102
103 In genomic analyses, schemas are a set of loci that are:
104
105 - Present in the majority of strains for core genome (cg) MLST schemas, typically a threshold of presence in 95% of the strains is used in schema creation. The assumption is that in each strain up to 5% of loci may not be identified due to sequencing coverage problems, assembly problems or other issues related to the use of draft genome assemblies.
106 - Present in at least one of the analyzed strains in the schema creation for pan genome/whole genome (pg/wg) MLST schemas.
107 - Present in less than 95% of the strains for accessory genome (ag) MLST schemas.
108
109 .. class:: infomark
110
111 **Note**
112
113 These definitions are always operational in nature, in the sense that the analyses are performed on a limited number of strains representing part of the biological diversity of a given species or genus and are always dependent on the definition of thresholds.
114
115 .. class:: infomark
116
117 **Important**
118
119 The use of a prodigal training file for schema creation is highly recommended.
120
121 .. class:: infomark
122
123 **Important**
124
125 If you provide the **--cds-input** parameter, chewBBACA assumes that the input FASTA files contain coding sequences and skips the gene prediction step with Prodigal. To avoid issues related with the format of the sequence headers, chewBBACA renames the sequence headers based on the unique basename prefix determined for each input file and on the order of the coding sequences (e.g.: coding sequences inside a file named GCF_000007125.1_ASM712v1_cds_from_genomic.fna are renamed to GCF_000007125-protein1, GCF_000007125-protein2, …, GCF_000007125-proteinN).
126
127 </help>
128 <expand macro="citations" />
129 </tool>