comparison binette.xml @ 0:8faabc0f7f46 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/binette/ commit 59b031eff1c156122720281e42b0eaa8d3724c57
author iuc
date Mon, 20 Jan 2025 16:19:23 +0000
parents
children 6108dc80d9b2
comparison
equal deleted inserted replaced
-1:000000000000 0:8faabc0f7f46
1 <tool id="binette" name="Binette" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
2 <description>Binning refinement tool</description>
3 <macros>
4 <token name="@TOOL_VERSION@">1.0.5</token>
5 <token name="@VERSION_SUFFIX@">0</token>
6 <token name="@PROFILE@">24.1</token>
7 </macros>
8 <requirements>
9 <requirement type="package" version="@TOOL_VERSION@">binette</requirement>
10 </requirements>
11 <command detect_errors="exit_code">
12 <![CDATA[
13 mkdir -p 'input' 'output' &&
14
15 #for $i, $file in enumerate($contig2bin_tables):
16 ln -s '$file' 'input/bin_table_${i}.tsv' &&
17 #end for
18
19 ln -s '$contigs' 'input_contigs.fasta' &&
20 #if $database_type.is_select == 'his':
21 ln -s '$checkm2_db' 'input_database.dmnd' &&
22 #end if
23
24 #if $proteins:
25 ln -s '$proteins' 'input_proteins.fasta' &&
26 #end if
27
28 binette
29 -b input/*.tsv
30 -c 'input_contigs.fasta'
31 #if $proteins:
32 -p 'input_proteins.fasta'
33 #end if
34 -m ${min_completeness}
35 -t "\${GALAXY_SLOTS:-1}"
36 -o 'output/'
37 -w ${contamination_weight}
38 #if $database_type.is_select == 'his':
39 --checkm2_db 'input_database.dmnd'
40 #else
41 --checkm2_db '$datamanager.fields.path'
42 #end if
43
44 ]]>
45 </command>
46 <inputs>
47 <param argument="--contig2bin_tables" type="data" multiple="true" min="2" format="tabular" label="Input contig table"
48 help="Input at least 2 different contig tables. Look into the help section at the bottom for more information!"/>
49 <param argument="--contigs" type="data" format="fasta,fasta.gz" label="Input contig file"/>
50 <param argument="--proteins" type="data" format="fasta,fasta.gz" optional="true" label="Input FASTA file in Prodigal format (>contigID_geneID)"
51 help="If this file is provided all predicted genes contained in this file will be skipped. A example for this format is in the help section"/>
52 <param argument="--min_completeness" type="integer" min="0" max="100" value="40" label="Set minimus completeness"
53 help="Threshold for bins for the final bin selection"/>
54 <param argument="--contamination_weight" type="integer" value="2" label="Set contamination weight"
55 help="This weight is used for the scoring the bins. A low weight favor complete bins over low contaminated bins"/>
56 <conditional name="database_type">
57 <param name="is_select" type="select" label="Select if database should be used either via file or cached database">
58 <option value="cached">cached database</option>
59 <option value="his">History</option>
60 </param>
61 <when value="his">
62 <param argument="--checkm2_db" type="data" format="dmnd" label="Input CheckM2 diamond database"
63 help="When a CheckM2 diamond database should be used download and input it here."/>
64 </when>
65 <when value="cached">
66 <param name="datamanager" type="select" label="Select reference genome" help="Checkm2 Diamond database">
67 <options from_data_table="checkm2">
68 <filter type="sort_by" column="2"/>
69 </options>
70 <validator type="no_options" message="No databases are available for this version of Checkm2. Please contact the Galaxy administrators to request one be installed."/>
71 </param>
72 </when>
73 </conditional>
74 </inputs>
75 <outputs>
76 <collection name="bins" type="list" label="${tool.name} on ${on_string}: Bins">
77 <discover_datasets pattern="((?P&lt;designation&gt;.*)\.fa)" format="fasta" directory="output/final_bins"/>
78 </collection>
79 <collection name="quality" type="list" label="${tool.name} on ${on_string}: Quality Report">
80 <discover_datasets pattern="((?P&lt;designation&gt;.*)\.tsv)" format="tabular" directory="output/input_bins_quality_reports"/>
81 </collection>
82 <data name="final" format="tabular" from_work_dir="output/final_bins_quality_reports.tsv" label="${tool.name} on ${on_string}: Final Quality Report"/>
83 </outputs>
84 <tests>
85 <test expect_num_outputs="3">
86 <param name="contig2bin_tables" ftype="tabular" value="A.binning,B.binning,C.binning"/>
87 <param name="contigs" value="all_contig.fasta.gz" ftype="fasta.gz"/>
88 <param name="min_completeness" value="5"/>
89 <param name="contamination_weight" value="0"/>
90 <conditional name="database_type">
91 <param name="is_select" value="his"/>
92 <param name="checkm2_db" value="checkm2_tiny_db.dmnd"/>
93 </conditional>
94 <output name="final" ftype="tabular">
95 <assert_contents>
96 <has_text text="binC"/>
97 <has_text text="50"/>
98 <has_text text="9"/>
99 </assert_contents>
100 </output>
101 <output_collection name="bins" count="4"/>
102 </test>
103 <test expect_num_outputs="3">
104 <param name="contig2bin_tables" ftype="tabular" value="A.binning,B.binning,C.binning"/>
105 <param name="contigs" value="all_contig.fasta.gz" ftype="fasta.gz"/>
106 <param name="min_completeness" value="40"/>
107 <param name="contamination_weight" value="2"/>
108 <conditional name="database_type">
109 <param name="is_select" value="cached"/>
110 <param name="datamanager" value="test_db"/>
111 </conditional>
112 <param name="proteins" ftype="fasta.gz" value="proteins.fasta.gz"/>
113 <output name="final" ftype="tabular">
114 <assert_contents>
115 <has_text text="binC"/>
116 <has_text text="50"/>
117 <has_text text="40"/>
118 </assert_contents>
119 </output>
120 <output_collection name="bins" count="4"/>
121 </test>
122 </tests>
123 <help>
124 <![CDATA[
125
126 .. class:: infomark
127
128 **What does Binette**
129
130 Binette is a fast and accurate binning refinement tool to constructs high quality MAGs from the output of multiple binning tools.
131
132 **Inputs**
133
134 - At least 2 different contig tables.
135
136 .. class:: infomark
137
138 The contig tables can be generate by the tool *Converts genome bins in fasta format*. This tool only need the bins which where created by any binner as input.
139
140 - The contig file
141
142 .. class:: infomark
143
144 This file should contain all reads used to create the bins. The format of this file should be either fasta or fasta.gz.
145
146 - A CheckM2 diamond database
147
148 .. class::infomark
149
150 This database can be download with using the CheckM2 package and the followed command: *checkm2 database --download --path <checkm2/database/>* or it is possible to use a database cached on Galaxy.
151
152
153 - An optional (fasta/fasta.gz) file with predicted genes
154
155 .. class:: infomark
156
157 This file, in a fasta format, is generate with the tool *Prodigal*
158
159 Example:
160
161 ::
162
163 >Chlamydia_trachomatis_part1_1 # 1 # 1776 # 1 # ID=1_1;partial=10;start_type=Edge;rbs_motif=None;rbs_spacer=None;gc_cont=0.466
164 MSIRGVGGNGNSRIPSHNGDGSNRRSQNTKGNNKVEDRVCSLYSSRSNENRESPYAVVDV
165 SSMIESTPTSGETTRASRGVFSRFQRGLVRIADKVRRAVQCAWSSVSTSRSSATRAAESG
166 SSSRTARGASSGYREYSPSAARGLRLMFTDFWRTRVLRQTSPMAGVFGNLDVNEARLMAA
167 YTSECADHLEAKELAGPDGVAAAREIAKRWEKRVRDLQDKGAARKLLNDPLGRRTPNYQS
168 KNPGEYTVGNSMFYDGPQVANLQNVDTGFWLDMSNLSDVVLSREIQTGLRARATLEESMP
169 MLENLEERFRRLQETCDAARTEIEESGWTRESASRMEGDEAQGPSRAQQAFQSFVNECNS
170 IEFSFGSFGEHVRVLCARVSRGLAAAGEAIRRCFSCCKGSTHRYAPRDDLSPEGASLAET
171 LARFADDMGIERGADGTYDIPLVDDWRRGVPSIEGEGSDSIYEIMMPIYEVMNMDLETRR
172 SFAVQQGHYQDPRASDYDLPRASDYDLPRSPYPTPPLPPRYQLQNMDVEAGFREAVYASF
173 VAGMYNYVVTQPQERIPNSQQVEGILRDMLTNGSQTFRDLMKRWNREVDRE*
174
175 **Outputs**
176
177 - A collection (list) with all the selected bins in fasta format.
178
179 - A final quality report file containing quality information about the final selected bins.
180
181 - A collection (list) storing quality reports for the input bin sets, with files following the same structure as the final quality report file.
182
183 ]]>
184 </help>
185 <citations>
186 <citation type="doi">10.21105/joss.06782</citation>
187 </citations>
188 </tool>