Mercurial > repos > iuc > semibin
diff semibin.xml @ 3:8673617e7e09 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/semibin commit 2c08a2e49a2844efe92340c5a9e9c8323e4a33d6
| author | iuc |
|---|---|
| date | Tue, 28 Oct 2025 08:15:27 +0000 |
| parents | 9de6b5e570df |
| children |
line wrap: on
line diff
--- a/semibin.xml Tue Mar 25 15:52:22 2025 +0000 +++ b/semibin.xml Tue Oct 28 08:15:27 2025 +0000 @@ -10,7 +10,15 @@ <expand macro="version"/> <command detect_errors="exit_code"><![CDATA[ #import re -@BAM_FILES@ +#if $mode.select != "single": + #if $mode.align_select.align_select == "bam": + @BAM_FILES@ + #else: + @STROBEALIGN_FILES@ + #end if +#else: + @BAM_FILES@ +#end if @FASTA_FILES@ SemiBin2 #if $mode.select == 'single' or $mode.select == 'co' @@ -37,8 +45,16 @@ #end for #end if #end if - --input-fasta 'contigs.fasta' - --input-bam *.bam + --input-fasta 'contigs.$input_fasta.ext' + #if $mode.select == "single": + --input-bam *.bam + #else: + #if $mode.align_select.align_select == "bam": + --input-bam *.bam + #else: + -a *.txt + #end if + #end if --output 'output' --cannot-name 'cannot' @MIN_LEN@ @@ -68,18 +84,34 @@ <expand macro="mode_select"/> <when value="single"> <expand macro="input-fasta-single"/> - <expand macro="input-bam-single"/> + <expand macro="input-bam-single"/> <expand macro="ref-single"/> <expand macro="environment"/> </when> <when value="co"> <expand macro="input-fasta-single"/> - <expand macro="input-bam-multi"/> + <conditional name="align_select"> + <expand macro="bam_or_strobealign"/> + <when value="bam"> + <expand macro="input-bam-multi"/> + </when> + <when value="txt"> + <expand macro="input-txt"/> + </when> + </conditional> <expand macro="ref-single"/> </when> <when value="multi"> <expand macro="input-fasta-multi"/> - <expand macro="input-bam-multi"/> + <conditional name="align_select"> + <expand macro="bam_or_strobealign"/> + <when value="bam"> + <expand macro="input-bam-multi"/> + </when> + <when value="txt"> + <expand macro="input-txt"/> + </when> + </conditional> <expand macro="ref-multi"/> </when> </conditional> @@ -107,42 +139,42 @@ </inputs> <outputs> <collection name="output_pre_recluster_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering"> - <filter>mode["select"]!="multi" and extra_output and "pre_reclustering_bins" in extra_output</filter> + <filter>mode['select']!="multi" and extra_output and "pre_reclustering_bins" in extra_output</filter> <discover_datasets pattern="(?P<designation>.*).fa" format="fasta" directory="output/output_prerecluster_bins"/> </collection> <collection name="output_after_recluster_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins after reclustering"> - <filter>mode["select"]!="multi" and extra_output and "pre_reclustering_bins" in extra_output</filter> + <filter>mode['select']!="multi" and extra_output and "pre_reclustering_bins" in extra_output</filter> <discover_datasets pattern="(?P<designation>.*).fa" format="fasta" directory="output/output_recluster_bins"/> </collection> <collection name="output_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins"> - <filter>mode["select"]!="multi" and not "pre_reclustering_bins" in extra_output</filter> + <filter>mode['select']!="multi" and not "pre_reclustering_bins" in extra_output</filter> <discover_datasets pattern="(?P<designation>.*).fa" format="fasta" directory="output/output_bins"/> </collection> <collection name="multi_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering (multi_bins)"> - <filter>mode["select"]=="multi"</filter> + <filter>mode['select']=="multi"</filter> <discover_datasets pattern="(?P<designation>.*).fa" format="fasta" directory="output/bins"/> </collection> <data name="single_data" format="csv" from_work_dir="output/data.csv" label="${tool.name} on ${on_string}: Training data"> - <filter>(mode["select"]=="single" or mode["select"]=="co") and extra_output and "data" in extra_output</filter> + <filter>(mode['select']=="single" or mode['select']=="co") and extra_output and "data" in extra_output</filter> </data> <data name="single_data_split" format="csv" from_work_dir="output/data_split.csv" label="${tool.name} on ${on_string}: Split training data"> - <filter>(mode["select"]=="single" or mode["select"]=="co") and extra_output and "data" in extra_output</filter> + <filter>(mode['select']=="single" or mode['select']=="co") and extra_output and "data" in extra_output</filter> </data> <collection name="multi_data" type="list" label="${tool.name} on ${on_string}: Training data per sample"> - <filter>mode["select"]=="multi" and extra_output and "data" in extra_output</filter> + <filter>mode['select']=="multi" and extra_output and "data" in extra_output</filter> <discover_datasets pattern="(?P<designation>.*)\/data.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> </collection> <collection name="multi_data_split" type="list" label="${tool.name} on ${on_string}: Split training data per sample"> - <filter>mode["select"]=="multi" and extra_output and "data" in extra_output</filter> + <filter>mode['select']=="multi" and extra_output and "data" in extra_output</filter> <discover_datasets pattern="(?P<designation>.*)\/data_split.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> </collection> - <expand macro="generate_sequence_features_extra_outputs"/> + <expand macro="generate_sequence_features_extra_outputs_main"/> </outputs> <tests> <test expect_num_outputs="5"> <conditional name="mode"> <param name="select" value="single"/> - <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> + <param name="input_fasta" ftype="fasta.gz" value="input_single.fasta.gz"/> <param name="input_bam" ftype="bam" value="input_single.bam"/> <conditional name="ref"> <param name="select" value="taxonomy"/> @@ -166,7 +198,6 @@ <param name="minfasta_kbs" value="200"/> </section> <param name="extra_output" value="data,coverage,contigs"/> - <output_collection name="output_bins" count="0"/> <output name="single_data" ftype="csv"> <assert_contents> <has_text text="g1k_0"/> @@ -194,7 +225,7 @@ <test expect_num_outputs="5"> <conditional name="mode"> <param name="select" value="single"/> - <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> + <param name="input_fasta" ftype="fasta.bz2" value="input_single.fasta.bz2"/> <param name="input_bam" ftype="bam" value="input_single.bam"/> <conditional name="ref"> <param name="select" value="ml"/> @@ -217,7 +248,6 @@ <param name="minfasta_kbs" value="200"/> </section> <param name="extra_output" value="data,coverage,contigs"/> - <output_collection name="output_bins" count="0"/> <output name="single_data" ftype="csv"> <assert_contents> <has_text text="g1k_0"/> @@ -246,7 +276,10 @@ <conditional name="mode"> <param name="select" value="co"/> <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> - <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/> + <conditional name="align_select"> + <param name="align_select" value="bam"/> + <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/> + </conditional> <conditional name="ref"> <param name="select" value="ml"/> </conditional> @@ -267,8 +300,7 @@ <param name="minfasta_kbs" value="200"/> </section> <param name="extra_output" value="coverage"/> - <output_collection name="output_bins" count="0"/> - <output_collection name="co_cov" count="5"> + <output_collection name="co_cov_bam" count="5"> <element name="0" ftype="csv"> <assert_contents> <has_text text="g1k_0"/> @@ -288,7 +320,7 @@ </assert_contents> </element> </output_collection> - <output_collection name="co_split_cov" count="5"> + <output_collection name="co_split_cov_bam" count="5"> <element name="0" ftype="csv"> <assert_contents> <has_text text="g1k_0_1"/> @@ -313,7 +345,10 @@ <conditional name="mode"> <param name="select" value="co"/> <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> - <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/> + <conditional name="align_select"> + <param name="align_select" value="bam"/> + <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/> + </conditional> <conditional name="ref"> <param name="select" value="taxonomy"/> <param name="taxonomy_annotation_table" value="taxonomy.tsv"/> @@ -335,8 +370,7 @@ <param name="minfasta_kbs" value="200"/> </section> <param name="extra_output" value="coverage"/> - <output_collection name="output_bins" count="0"/> - <output_collection name="co_cov" count="5"> + <output_collection name="co_cov_bam" count="5"> <element name="0" ftype="csv"> <assert_contents> <has_text text="g1k_0"/> @@ -356,7 +390,7 @@ </assert_contents> </element> </output_collection> - <output_collection name="co_split_cov" count="5"> + <output_collection name="co_split_cov_bam" count="5"> <element name="0" ftype="csv"> <assert_contents> <has_text text="g1k_0_1"/> @@ -381,7 +415,10 @@ <conditional name="mode"> <param name="select" value="co"/> <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> - <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/> + <conditional name="align_select"> + <param name="align_select" value="bam"/> + <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/> + </conditional> <conditional name="ref"> <param name="select" value="taxonomy"/> <param name="taxonomy_annotation_table" value="taxonomy.tsv"/> @@ -391,8 +428,11 @@ <param name="method" value="ratio"/> <param name="ratio" value="0.05"/> </conditional> - <param name="orf_finder" value="fraggenescan"/> + <param name="orf_finder" value="fast-naive"/> <param name="random_seed" value="0"/> + <section name="annot"> + <param name="ml_threshold" value="0"/> + </section> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> @@ -403,8 +443,7 @@ <param name="minfasta_kbs" value="200"/> </section> <param name="extra_output" value="coverage"/> - <output_collection name="output_bins" count="0"/> - <output_collection name="co_cov" count="5"> + <output_collection name="co_cov_bam" count="5"> <element name="0" ftype="csv"> <assert_contents> <has_text text="g1k_0"/> @@ -424,7 +463,7 @@ </assert_contents> </element> </output_collection> - <output_collection name="co_split_cov" count="5"> + <output_collection name="co_split_cov_bam" count="5"> <element name="0" ftype="csv"> <assert_contents> <has_text text="g1k_0_1"/> @@ -459,8 +498,11 @@ <param name="method" value="ratio"/> <param name="ratio" value="0.05"/> </conditional> - <param name="orf_finder" value="fraggenescan"/> + <param name="orf_finder" value="fast-naive"/> <param name="random_seed" value="0"/> + <section name="annot"> + <param name="ml_threshold" value="0"/> + </section> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> @@ -470,13 +512,6 @@ <param name="max_edges" value="200"/> <param name="minfasta_kbs" value="200"/> </section> - <output_collection name="output_bins" count="1"> - <element name="SemiBin_30" ftype="fasta"> - <assert_contents> - <has_text text=">g3k_0"/> - </assert_contents> - </element> - </output_collection> </test> <test expect_num_outputs="2"> <conditional name="mode"> @@ -492,8 +527,11 @@ <param name="method" value="ratio"/> <param name="ratio" value="0.05"/> </conditional> - <param name="orf_finder" value="fraggenescan"/> + <param name="orf_finder" value="fast-naive"/> <param name="random_seed" value="0"/> + <section name="annot"> + <param name="ml_threshold" value="0"/> + </section> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> @@ -504,7 +542,7 @@ <param name="minfasta_kbs" value="200"/> </section> <param name="extra_output" value="pre_reclustering_bins"/> - <output_collection name="output_pre_recluster_bins" count="3"> + <output_collection name="output_pre_recluster_bins"> <element name="SemiBin_0" ftype="fasta"> <assert_contents> <has_text text="g1k_0"/> @@ -536,7 +574,10 @@ <param name="select" value="concatenated"/> <param name="input_fasta" ftype="fasta" value="input_multi.fasta.gz"/> </conditional> - <param name="input_bam" ftype="bam" value="input_multi_sorted1.bam,input_multi_sorted2.bam,input_multi_sorted3.bam,input_multi_sorted4.bam,input_multi_sorted5.bam,input_multi_sorted6.bam,input_multi_sorted7.bam,input_multi_sorted8.bam,input_multi_sorted9.bam,input_multi_sorted10.bam"/> + <conditional name="align_select"> + <param name="align_select" value="bam"/> + <param name="input_bam" ftype="bam" value="input_multi_sorted1.bam,input_multi_sorted2.bam,input_multi_sorted3.bam,input_multi_sorted4.bam,input_multi_sorted5.bam,input_multi_sorted6.bam,input_multi_sorted7.bam,input_multi_sorted8.bam,input_multi_sorted9.bam,input_multi_sorted10.bam"/> + </conditional> <conditional name="ref"> <param name="select" value="taxonomy"/> <param name="taxonomy_annotation_table" value="taxonomy.tsv,taxonomy_2.tsv,taxonomy_3.tsv,taxonomy_4.tsv,taxonomy_5.tsv,taxonomy_6.tsv,taxonomy_7.tsv,taxonomy_8.tsv,taxonomy_9.tsv,taxonomy_10.tsv"/> @@ -546,7 +587,7 @@ <param name="method" value="ratio"/> <param name="ratio" value="0.05"/> </conditional> - <param name="orf_finder" value="fraggenescan"/> + <param name="orf_finder" value="fast-naive"/> <param name="random_seed" value="0"/> <section name="training"> <param name="epoches" value="20"/> @@ -573,29 +614,29 @@ </assert_contents> </element> </output_collection> - <output_collection name="multi_cov" count="10"> + <output_collection name="multi_cov_bam" count="10"> <element name="8" ftype="csv"> <assert_contents> <has_text text="S1:g1k_5,"/> </assert_contents> </element> </output_collection> - <output_collection name="multi_cov_sample" count="10"> + <output_collection name="multi_cov_sample_bam" count="10"> <element name="S8" ftype="csv"> <assert_contents> <has_text text="g1k_3"/> </assert_contents> </element> </output_collection> - <output_collection name="multi_split_cov" count="10"> + <output_collection name="multi_split_cov_bam" count="10"> <element name="8" ftype="csv"> <assert_contents> <has_text text="S1:g1k_5_1,0."/> </assert_contents> </element> </output_collection> - <output_collection name="multi_split_cov_sample" count="10"> - <element name="S8" ftype="csv"> + <output_collection name="multi_split_cov_sample_bam" count="10"> + <element name="8" ftype="csv"> <assert_contents> <has_text text="g1k_3_1"/> </assert_contents> @@ -609,8 +650,116 @@ </element> </output_collection> </test> + <test expect_num_outputs="6"> + <conditional name="mode"> + <param name="select" value="co"/> + <param name="input_fasta" ftype="fasta" value="input_multi.fasta.gz"/> + <conditional name="align_select"> + <param name="align_select" value="txt"/> + <param name="abundance" ftype="txt" value="strobealign_1.txt,strobealign_2.txt,strobealign_3.txt,strobealign_4.txt,strobealign_5.txt"/> + </conditional> + <conditional name="ref"> + <param name="select" value="taxonomy"/> + <param name="taxonomy_annotation_table" value="taxonomy.tsv"/> + </conditional> + </conditional> + <conditional name="min_len"> + <param name="method" value="ratio"/> + <param name="ratio" value="0.05"/> + </conditional> + <param name="orf_finder" value="fast-naive"/> + <param name="random_seed" value="0"/> + <section name="annot"> + <param name="ml_threshold" value="0"/> + </section> + <section name="training"> + <param name="epoches" value="20"/> + <param name="batch_size" value="2048"/> + </section> + <section name="bin"> + <param name="max_node" value="0.15"/> + <param name="max_edges" value="20"/> + <param name="minfasta_kbs" value="20"/> + </section> + <param name="extra_output" value="data,coverage,contigs,pre_reclustering_bins"/> + </test> + <test expect_num_outputs="6"> + <conditional name="mode"> + <param name="select" value="multi"/> + <conditional name="multi_fasta"> + <param name="select" value="concatenated"/> + <param name="input_fasta" ftype="fasta" value="input_multi.fasta.gz"/> + </conditional> + <conditional name="align_select"> + <param name="align_select" value="txt"/> + <param name="abundance" ftype="txt" value="strobealign_1.txt,strobealign_2.txt,strobealign_3.txt,strobealign_4.txt,strobealign_5.txt"/> + </conditional> + <conditional name="ref"> + <param name="select" value="taxonomy"/> + <param name="taxonomy_annotation_table" value="taxonomy.tsv,taxonomy_2.tsv,taxonomy_3.tsv,taxonomy_4.tsv,taxonomy_5.tsv,taxonomy_6.tsv,taxonomy_7.tsv,taxonomy_8.tsv,taxonomy_9.tsv,taxonomy_10.tsv"/> + </conditional> + </conditional> + <conditional name="min_len"> + <param name="method" value="ratio"/> + <param name="ratio" value="0.05"/> + </conditional> + <param name="orf_finder" value="fast-naive"/> + <param name="random_seed" value="0"/> + <section name="annot"> + <param name="ml_threshold" value="0"/> + </section> + <section name="training"> + <param name="epoches" value="20"/> + <param name="batch_size" value="2048"/> + </section> + <section name="bin"> + <param name="max_node" value="0.15"/> + <param name="max_edges" value="30"/> + <param name="minfasta_kbs" value="30"/> + </section> + <param name="extra_output" value="data,coverage,contigs"/> + <output_collection name="multi_bins" count="10"/> + <output_collection name="multi_data" count="10"> + <element name="S8" ftype="csv"> + <assert_contents> + <has_text text="g1k_0,"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="multi_data_split" count="10"> + <element name="S8" ftype="csv"> + <assert_contents> + <has_text text="g1k_0_1,"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="multi_cov_txt" count="10"> + <element name="S8" ftype="csv"> + <assert_contents> + <has_text text="g1k_5,"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="multi_split_cov_txt" count="10"> + <element name="S8" ftype="csv"> + <assert_contents> + <has_text text="g1k_5_1,1."/> + </assert_contents> + </element> + </output_collection> + <output_collection name="multi_contigs" count="10"> + <element name="S8" ftype="fasta"> + <assert_contents> + <has_text text=">g1k_0"/> + </assert_contents> + </element> + </output_collection> + </test> </tests> <help><![CDATA[ +**Please note that there is a known issue with Semibin2 where results may be inconsistent across runs on different, despite a set seed. This may cause issues with reproducibility.** +For more information, see this [issue]{https://github.com/BigDataBiology/SemiBin/issues/186} on their repository: https://github.com/BigDataBiology/SemiBin/issues/186 + @HELP_HEADER@ Inputs
