comparison busco.xml @ 22:5dd4f18b0d96 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/busco/ commit 1696bfc7f5bade0686ad6731da06a436c43b59e5
author iuc
date Sat, 04 Oct 2025 17:47:40 +0000
parents c209d02621e7
children
comparison
equal deleted inserted replaced
21:c209d02621e7 22:5dd4f18b0d96
23 ## a more thorough test can be executed as follows 23 ## a more thorough test can be executed as follows
24 ## - set the `test` parameters to `""` 24 ## - set the `test` parameters to `""`
25 ## - download complete reference DB (~200G, final 105G) to tools/busco/test-data/test-db/busco_downloads 25 ## - download complete reference DB (~200G, final 105G) to tools/busco/test-data/test-db/busco_downloads
26 ## ``` 26 ## ```
27 ## busco --download_path tools/busco/test-data/test-db/busco_downloads/ --download all 27 ## busco --download_path tools/busco/test-data/test-db/busco_downloads/ --download all
28 ## find tools/busco/test-data/test-db/busco_downloads/lineages/ -mindepth 1 -maxdepth 1 ! -name '*_odb10*' -exec rm -rf {} \; 28 ## find tools/busco/test-data/busco_downloads/lineages/ -mindepth 1 -maxdepth 1 ! -name '*_odb10*' -exec rm -rf {} \;
29 ## find tools/busco/test-data/test-db/busco_downloads/placement_files -mindepth 1 -maxdepth 1 ! -name '*_odb10*' -delete 29 ## find tools/busco/test-data/busco_downloads/placement_files -mindepth 1 -maxdepth 1 ! -name '*_odb10*' -delete
30 ## find tools/busco/test-data/test-db/busco_downloads/lineages/ -name "*.faa.gz" -exec gunzip {} \;; 30 ## find tools/busco/test-data/busco_downloads/lineages/ -name "*.faa.gz" -exec gunzip {} \;;
31 ## ``` 31 ## ```
32 ## - test containerized (note: test-data is mounted ro in containerized tests) 32 ## - test containerized (note: test-data is mounted ro in containerized tests)
33 ## 33 ##
34 ## alternatively .. a bit weaker 34 ## alternatively .. a bit weaker
35 ## - set the `test` parameters to `""` comment --offline 35 ## - set the `test` parameters to `""` comment --offline
94 #else: 94 #else:
95 --metaeuk 95 --metaeuk
96 #end if 96 #end if
97 #end if 97 #end if
98 98
99 #if $outputs and 'image' in $outputs: 99 #if $lineage.lineage_mode != "auto_detect":
100 && mkdir BUSCO_summaries 100 && cp busco_galaxy/run_*/full_table.tsv busco_galaxy/full_table_specific_lineage.tsv
101 && cp busco_galaxy/short_summary.*.txt BUSCO_summaries/ 101 && cp busco_galaxy/run_*/missing_busco_list.tsv busco_galaxy/missing_busco_list_specific_lineage.tsv
102 && generate_plot.py -wd BUSCO_summaries -rt specific 102 #if $outputs and 'image' in $outputs:
103 #end if 103 && mkdir BUSCO_summaries
104 104 && cp busco_galaxy/short_summary.*.txt BUSCO_summaries/
105 #if $outputs and 'gff' in $outputs: 105 && generate_plot.py -wd BUSCO_summaries -rt specific
106 && echo "\##gff-version 3" > busco_output.gff 106 && cp BUSCO_summaries/busco_figure.png busco_galaxy/busco_figure_specific_lineage.png
107 ## gff files can be absent 107 #end if
108 && (cat busco_galaxy/run_*/busco_sequences/*busco_sequences/*.gff >> busco_output.gff 2> /dev/null || true) 108
109 #end if 109 #if $outputs and 'gff' in $outputs:
110 #if $outputs and 'faa' in $outputs: 110 && echo "\##gff-version 3" > busco_galaxy/busco_output.gff
111 && (cat busco_galaxy/run_*/busco_sequences/*busco_sequences/*.faa >> busco_output.faa 2> /dev/null || true) 111 ## gff files can be absent
112 #end if 112 && (cat busco_galaxy/run_*/busco_sequences/*busco_sequences/*.gff >> busco_galaxy/busco_output_specific.gff 2> /dev/null || true)
113 #if $outputs and 'fna' in $outputs: 113 #end if
114 && (cat busco_galaxy/run_*/busco_sequences/*busco_sequences/*.fna >> busco_output.fna 2> /dev/null || true) 114 #if $outputs and 'faa' in $outputs:
115 && (cat busco_galaxy/run_*/busco_sequences/*busco_sequences/*.faa >> busco_galaxy/busco_output_specific.faa 2> /dev/null || true)
116 #end if
117 #if $outputs and 'fna' in $outputs:
118 && (cat busco_galaxy/run_*/busco_sequences/*busco_sequences/*.fna >> busco_galaxy/busco_output_specific.fna 2> /dev/null || true)
119 #end if
120 #end if
121
122 ## if $lineage.lineage_mode == "auto_detect", BUSCO output several files with the same name, causing an issue, we need to rename the files
123 #if $lineage.lineage_mode == "auto_detect":
124 && specific_lineage=\$(find ./busco_galaxy -name "short_summary.specific.*.txt" | head -n 1 | cut -d'.' -f4)
125 && generic_lineage=\$(find ./busco_galaxy -name "short_summary.generic.*.txt" | head -n 1 | cut -d'.' -f4)
126
127 ##Rename the output files to differenciate specific and generic lineages results
128 ## Full table
129 && cp busco_galaxy/run_\${specific_lineage}/full_table.tsv busco_galaxy/full_table_specific_lineage.tsv
130 && cp busco_galaxy/run_\${generic_lineage}/full_table.tsv busco_galaxy/full_table_generic_lineage.tsv
131 ##Missing busco list
132 && cp busco_galaxy/run_\${specific_lineage}/missing_busco_list.tsv busco_galaxy/missing_busco_list_specific_lineage.tsv
133 && cp busco_galaxy/run_\${generic_lineage}/missing_busco_list.tsv busco_galaxy/missing_busco_list_generic_lineage.tsv
134 #if $outputs and 'image' in $outputs:
135 && mkdir BUSCO_summaries_specific
136 && cp busco_galaxy/short_summary.specific.*.txt BUSCO_summaries_specific/
137 && generate_plot.py -wd BUSCO_summaries_specific -rt specific
138 && cp BUSCO_summaries_specific/busco_figure.png busco_galaxy/busco_figure_specific_lineage.png
139 && mkdir BUSCO_summaries_generic
140 && cp busco_galaxy/short_summary.generic.*.txt BUSCO_summaries_generic/
141 && generate_plot.py -wd BUSCO_summaries_generic -rt generic
142 && cp BUSCO_summaries_generic/busco_figure.png busco_galaxy/busco_figure_generic_lineage.png
143 #end if
144 #if $outputs and 'gff' in $outputs:
145 ## gff files can be absent
146 && echo "\##gff-version 3" > busco_galaxy/busco_output_specific.gff
147 && (cat busco_galaxy/run_\${specific_lineage}/busco_sequences/*busco_sequences/*.gff >> busco_galaxy/busco_output_specific.gff 2> /dev/null || true)
148 && echo "\##gff-version 3" > busco_galaxy/busco_output_generic.gff
149 && (cat busco_galaxy/run_\${generic_lineage}/busco_sequences/*busco_sequences/*.gff >> busco_galaxy/busco_output_generic.gff 2> /dev/null || true)
150 #end if
151 #if $outputs and 'faa' in $outputs:
152 && (cat busco_galaxy/run_\${specific_lineage}/busco_sequences/*busco_sequences/*.faa >> busco_galaxy/busco_output_specific.faa 2> /dev/null || true)
153 && (cat busco_galaxy/run_\${generic_lineage}/busco_sequences/*busco_sequences/*.faa >> busco_galaxy/busco_output_generic.faa 2> /dev/null || true)
154 #end if
155 #if $outputs and 'fna' in $outputs:
156 && (cat busco_galaxy/run_\${specific_lineage}/busco_sequences/*busco_sequences/*.fna >> busco_galaxy/busco_output_specific.fna 2> /dev/null || true)
157 && (cat busco_galaxy/run_\${generic_lineage}/busco_sequences/*busco_sequences/*.fna >> busco_galaxy/busco_output_generic.fna 2> /dev/null || true)
158 #end if
115 #end if 159 #end if
116 ]]></command> 160 ]]></command>
117 <inputs> 161 <inputs>
118 <param name="test" type="hidden"/> 162 <param name="test" type="hidden"/>
119 <param type="data" name="input" format="fasta,fasta.gz" label="Sequences to analyse" help="Can be an assembled genome or transcriptome (DNA), or protein sequences from an annotated gene set."/> 163 <param type="data" name="input" format="fasta,fasta.gz" label="Sequences to analyse" help="Can be an assembled genome or transcriptome (DNA), or protein sequences from an annotated gene set."/>
170 <param name="auto_lineage" type="select" label="auto-lineage group" help="Taxonomic group to run with auto-lineage."> 214 <param name="auto_lineage" type="select" label="auto-lineage group" help="Taxonomic group to run with auto-lineage.">
171 <option value="--auto-lineage">All taxonomic groups (--auto-lineage)</option> 215 <option value="--auto-lineage">All taxonomic groups (--auto-lineage)</option>
172 <option value="--auto-lineage-prok">Prokaryotes (--auto-lineage-prok)</option> 216 <option value="--auto-lineage-prok">Prokaryotes (--auto-lineage-prok)</option>
173 <option value="--auto-lineage-euk">Eukaryotes (--auto-lineage-euk)</option> 217 <option value="--auto-lineage-euk">Eukaryotes (--auto-lineage-euk)</option>
174 </param> 218 </param>
219 <param name="generic_results" type="boolean" checked="false" label="Do you want to display the results from the generic lineage" help="BUSCO generates files for the parent lineage (generic) and the final selected lineage (specific). Generally, the lineage to select for your assessments should be the most specific lineage available. The generic lineage datasets for the domains archaea, bacteria and eukaryota."/>
175 </when> 220 </when>
176 <when value="select_lineage"> 221 <when value="select_lineage">
177 <param argument="--lineage_dataset" type="select" label="Lineage"> 222 <param argument="--lineage_dataset" type="select" label="Lineage">
178 <options from_data_table="busco_database_options"> 223 <options from_data_table="busco_database_options">
179 <filter type="param_value" column="2" ref="cached_db"/> 224 <filter type="param_value" column="2" ref="cached_db"/>
181 </param> 226 </param>
182 </when> 227 </when>
183 </conditional> 228 </conditional>
184 229
185 <param name="outputs" type="select" optional="true" multiple="true" label="Which outputs should be generated"> 230 <param name="outputs" type="select" optional="true" multiple="true" label="Which outputs should be generated">
186 <option value="short_summary" selected="true">short summary text</option> 231 <option value="short_summary" selected="true">Short summary text</option>
187 <option value="missing">list with missing IDs</option> 232 <option value="missing">List with missing IDs</option>
188 <option value="image">summary image</option> 233 <option value="image">Summary image</option>
189 <option value="gff">gff</option> 234 <option value="gff">gff</option>
190 <option value="faa">Protein sequences</option> 235 <option value="faa">Protein sequences</option>
191 <option value="fna">Nucleotide sequences</option> 236 <option value="fna">Nucleotide sequences</option>
192 </param> 237 </param>
193 238
197 <param argument="--contig_break" type="integer" value="10" label="Number of contiguous Ns to signify a break between contigs"/> 242 <param argument="--contig_break" type="integer" value="10" label="Number of contiguous Ns to signify a break between contigs"/>
198 </section> 243 </section>
199 </inputs> 244 </inputs>
200 245
201 <outputs> 246 <outputs>
202 <data name='busco_sum' format='txt' label="${tool.name} on ${on_string}: short summary" from_work_dir="busco_galaxy/run_*/short_summary.txt"> 247 <data name='busco_sum' format='txt' label="${tool.name} on ${on_string}: Short summary - Specific lineage" from_work_dir="busco_galaxy/short_summary.specific.*.txt">
203 <filter>outputs and 'short_summary' in outputs</filter> 248 <filter>outputs and 'short_summary' in outputs</filter>
204 </data> 249 </data>
205 <data name='busco_table' format='tabular' label="${tool.name} on ${on_string}: full table" from_work_dir="busco_galaxy/run_*/full_table.tsv"/> 250 <data name='busco_sum_generic' format='txt' label="${tool.name} on ${on_string}: Short summary - Generic lineage" from_work_dir="busco_galaxy/short_summary.generic.*.txt">
206 <data name='busco_missing' format='tabular' label="${tool.name} on ${on_string}: missing buscos" from_work_dir="busco_galaxy/run_*/missing_busco_list.tsv"> 251 <filter>(outputs and 'short_summary' in outputs) and lineage['lineage_mode'] == 'auto_detect' and lineage['generic_results']</filter>
252 </data>
253 <data name='busco_table' format='tabular' label="${tool.name} on ${on_string}: Full table - Specific lineage" from_work_dir="busco_galaxy/full_table_specific_lineage.tsv">
254 </data>
255 <data name='busco_table_generic' format='tabular' label="${tool.name} on ${on_string}: Full table - Generic lineage" from_work_dir="busco_galaxy/full_table_generic_lineage.tsv">
256 <filter>lineage['lineage_mode'] == 'auto_detect' and lineage['generic_results']</filter>
257 </data>
258 <data name='busco_missing' format='tabular' label="${tool.name} on ${on_string}: Missing buscos - Specific lineage" from_work_dir="busco_galaxy/missing_busco_list_specific_lineage.tsv">
207 <filter>outputs and 'missing' in outputs</filter> 259 <filter>outputs and 'missing' in outputs</filter>
208 </data> 260 </data>
209 <data name='summary_image' format='png' label="${tool.name} on ${on_string}: summary image" from_work_dir="BUSCO_summaries/busco_figure.png"> 261 <data name='busco_missing_generic' format='tabular' label="${tool.name} on ${on_string}: Missing buscos - Generic lineage" from_work_dir="busco_galaxy/missing_busco_list_generic_lineage.tsv">
262 <filter>(outputs and 'missing' in outputs) and lineage['lineage_mode'] == 'auto_detect' and lineage['generic_results']</filter>
263 </data>
264 <data name='summary_image' format='png' label="${tool.name} on ${on_string}: Summary image - Specific lineage" from_work_dir="busco_galaxy/busco_figure_specific_lineage.png">
210 <filter>outputs and 'image' in outputs</filter> 265 <filter>outputs and 'image' in outputs</filter>
211 </data> 266 </data>
212 <data name='busco_gff' format='gff3' label="${tool.name} on ${on_string}: GFF" from_work_dir="busco_output.gff"> 267 <data name='summary_image_generic' format='png' label="${tool.name} on ${on_string}: Summary image - Generic lineage" from_work_dir="busco_galaxy/busco_figure_generic_lineage.png">
268 <filter>(outputs and 'image' in outputs) and lineage['lineage_mode'] == 'auto_detect' and lineage['generic_results']</filter>
269 </data>
270 <data name='busco_gff' format='gff3' label="${tool.name} on ${on_string}: GFF - Specific lineage" from_work_dir="busco_galaxy/busco_output_specific.gff">
213 <filter>outputs and 'gff' in outputs</filter> 271 <filter>outputs and 'gff' in outputs</filter>
214 </data> 272 </data>
215 <data name='busco_faa' format='fasta' label="${tool.name} on ${on_string}: Protein sequences" from_work_dir="busco_output.faa"> 273 <data name='busco_gff_generic' format='gff3' label="${tool.name} on ${on_string}: GFF - Generic lineage" from_work_dir="busco_galaxy/busco_output_generic.gff">
274 <filter>(outputs and 'gff' in outputs) and lineage['lineage_mode'] == 'auto_detect' and lineage['generic_results']</filter>
275 </data>
276 <data name='busco_faa' format='fasta' label="${tool.name} on ${on_string}: Protein sequences - Specific lineage" from_work_dir="busco_galaxy/busco_output_specific.faa">
216 <filter>outputs and 'faa' in outputs</filter> 277 <filter>outputs and 'faa' in outputs</filter>
217 </data> 278 </data>
218 <data name='busco_fna' format='fasta' label="${tool.name} on ${on_string}: Nucleotide sequences" from_work_dir="busco_output.fna"> 279 <data name='busco_faa_generic' format='fasta' label="${tool.name} on ${on_string}: Protein sequences - Generic lineage" from_work_dir="busco_galaxy/busco_output_generic.faa">
280 <filter>(outputs and 'faa' in outputs) and lineage['lineage_mode'] == 'auto_detect' and lineage['generic_results']</filter>
281 </data>
282 <data name='busco_fna' format='fasta' label="${tool.name} on ${on_string}: Nucleotide sequences - Specific lineage" from_work_dir="busco_galaxy/busco_output_specific.fna">
219 <filter>outputs and 'fna' in outputs</filter> 283 <filter>outputs and 'fna' in outputs</filter>
284 </data>
285 <data name='busco_fna_generic' format='fasta' label="${tool.name} on ${on_string}: Nucleotide sequences - Generic lineage" from_work_dir="busco_galaxy/busco_output_generic.fna">
286 <filter>(outputs and 'fna' in outputs) and lineage['lineage_mode'] == 'auto_detect' and lineage['generic_results']</filter>
220 </data> 287 </data>
221 </outputs> 288 </outputs>
289
222 290
223 <tests> 291 <tests>
224 <!-- <test expect_num_outputs="6"> 292 <!-- <test expect_num_outputs="6">
225 <param name="test" value="true"/> 293 <param name="test" value="true"/>
226 <param name="input" value="genome.fa.gz" ftype="fasta.gz"/> 294 <param name="input" value="genome.fa.gz" ftype="fasta.gz"/>
416 <output name="busco_gff" file="genome_results_metaeuk/out.gff3" compare="diff" lines_diff="6"/> 484 <output name="busco_gff" file="genome_results_metaeuk/out.gff3" compare="diff" lines_diff="6"/>
417 <assert_stdout> 485 <assert_stdout>
418 <has_text text="BUSCO analysis done"/> 486 <has_text text="BUSCO analysis done"/>
419 </assert_stdout> 487 </assert_stdout>
420 </test> --> 488 </test> -->
421 <test expect_num_outputs="5"> 489 <test expect_num_outputs="2">
490 <param name="test" value="true"/>
491 <param name="input" value="bacilli_odb_test.fasta"/>
492 <conditional name="lineage">
493 <param name="lineage_mode" value="auto_detect"/>
494 <param name="auto_lineage" value="--auto-lineage-prok"/>
495 </conditional>
496 <conditional name="busco_mode">
497 <param name="mode" value="geno"/>
498 </conditional>
499 <param name="outputs" value="short_summary"/>
500 <output name="busco_sum">
501 <assert_contents>
502 <has_text text="Gene predictor used: prodigal"/>
503 </assert_contents>
504 </output>
505 <output name="busco_table">
506 <assert_contents>
507 <has_text text="# BUSCO version is: @TOOL_VERSION@"/>
508 <has_text text="The lineage dataset is: bacilli_odb10"/>
509 </assert_contents>
510 </output>
511 </test>
512 <test expect_num_outputs="10">
422 <param name="test" value="true"/> 513 <param name="test" value="true"/>
423 <param name="input" value="genome.fa"/> 514 <param name="input" value="genome.fa"/>
424 <conditional name="lineage"> 515 <conditional name="lineage">
425 <param name="lineage_mode" value="auto_detect"/> 516 <param name="lineage_mode" value="auto_detect"/>
426 <param name="auto_lineage" value="--auto-lineage"/> 517 <param name="auto_lineage" value="--auto-lineage"/>
518 <param name="generic_results" value="true"/>
427 </conditional> 519 </conditional>
428 <conditional name="busco_mode"> 520 <conditional name="busco_mode">
429 <param name="mode" value="geno"/> 521 <param name="mode" value="geno"/>
430 <conditional name="use_augustus"> 522 <conditional name="use_augustus">
431 <param name="use_augustus_selector" value="metaeuk"/> 523 <param name="use_augustus_selector" value="metaeuk"/>
432 </conditional> 524 </conditional>
433 </conditional> 525 </conditional>
434 <param name="outputs" value="short_summary,missing,image,gff"/> 526 <param name="outputs" value="short_summary,missing,image,gff"/>
435 <output name="busco_sum"> 527 <output name="busco_sum">
436 <assert_contents> 528 <assert_contents>
437 <has_text text="Gene predictor used: metaeuk"/> 529 <has_text text="Gene predictor used: prodigal"/>
530 </assert_contents>
531 </output>
532 <output name="busco_sum_generic">
533 <assert_contents>
534 <has_text text="Gene predictor used: prodigal"/>
438 </assert_contents> 535 </assert_contents>
439 </output> 536 </output>
440 <output name="busco_table"> 537 <output name="busco_table">
441 <assert_contents> 538 <assert_contents>
442 <has_text text="# BUSCO version is: @TOOL_VERSION@"/> 539 <has_text text="# BUSCO version is: @TOOL_VERSION@"/>
443 <has_text text="The lineage dataset is: eukaryota_odb10"/> 540 <has_text text="The lineage dataset is: "/>
444 </assert_contents> 541 </assert_contents>
445 </output> 542 </output>
446 <output name="busco_missing"> 543 <output name="busco_table_generic">
447 <assert_contents> 544 <assert_contents>
448 <has_text text="# BUSCO version is: @TOOL_VERSION@"/> 545 <has_text text="# BUSCO version is: @TOOL_VERSION@"/>
449 <has_text text="The lineage dataset is: eukaryota_odb10"/> 546 <has_text text="The lineage dataset is: "/>
547 </assert_contents>
548 </output>
549 <output name="busco_missing">
550 <assert_contents>
551 <has_text text="# BUSCO version is: @TOOL_VERSION@"/>
552 <has_text text="The lineage dataset is: "/>
553 </assert_contents>
554 </output>
555 <output name="busco_missing_generic">
556 <assert_contents>
557 <has_text text="# BUSCO version is: @TOOL_VERSION@"/>
558 <has_text text="The lineage dataset is: "/>
450 </assert_contents> 559 </assert_contents>
451 </output> 560 </output>
452 <output name="summary_image" file="genome_results_metaeuk_auto/summary.png" compare="sim_size"/> 561 <output name="summary_image" file="genome_results_metaeuk_auto/summary.png" compare="sim_size"/>
453 <output name="busco_gff" file="genome_results_metaeuk_auto/out.gff" compare="diff"/> 562 <output name="summary_image_generic" file="genome_results_metaeuk_auto/summary.png" compare="sim_size"/>
563 <output name="busco_gff">
564 <assert_contents>
565 <has_text text="##gff-version 3"/>
566 </assert_contents>
567 </output>
568 <output name="busco_gff_generic">
569 <assert_contents>
570 <has_text text="##gff-version 3"/>
571 </assert_contents>
572 </output>
454 <assert_stdout> 573 <assert_stdout>
455 <has_text text="BUSCO analysis done"/> 574 <has_text text="BUSCO analysis done"/>
456 </assert_stdout> 575 </assert_stdout>
457 </test> 576 </test>
458 <!-- <test expect_num_outputs="3"> 577 <!-- <test expect_num_outputs="3">