Mercurial > repos > iuc > semibin
diff semibin.xml @ 2:9de6b5e570df draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/semibin commit a9fc83e0029266f910b549d5d1eef6a9bc3e3f7b
| author | iuc |
|---|---|
| date | Tue, 25 Mar 2025 15:52:22 +0000 |
| parents | 2eeff5d4a5de |
| children | 8673617e7e09 |
line wrap: on
line diff
--- a/semibin.xml Mon Mar 27 08:22:16 2023 +0000 +++ b/semibin.xml Tue Mar 25 15:52:22 2025 +0000 @@ -12,24 +12,25 @@ #import re @BAM_FILES@ @FASTA_FILES@ - -SemiBin +SemiBin2 #if $mode.select == 'single' or $mode.select == 'co' single_easy_bin #if $mode.select == 'single' and str($mode.environment) != '' --environment '$mode.environment' #end if - #if $mode.ref.select == "cached" + #if $mode.ref.select == "cached": --reference-db-data-dir '$mode.ref.cached_db.fields.path' - #else + #end if + #if $mode.ref.select == "taxonomy" --taxonomy-annotation-table '$mode.ref.taxonomy_annotation_table' #end if #else multi_easy_bin --separator '$separator' - #if $mode.ref.select == "cached" + #if $mode.ref.select == "cached": --reference-db-data-dir '$mode.ref.cached_db.fields.path' - #else + #end if + #if $mode.ref.select == "taxonomy" --taxonomy-annotation-table #for $e in $mode.ref.taxonomy_annotation_table '$e' @@ -44,7 +45,7 @@ --orf-finder '$orf_finder' --random-seed $random_seed -#if str($annot.ml_threshold) != '' +#if $annot.ml_threshold: --ml-threshold $annot.ml_threshold #end if --epoches $training.epoches @@ -52,7 +53,10 @@ --max-node $bin.max_node --max-edges $bin.max_edges --minfasta-kbs $bin.minfasta_kbs - $bin.no_recluster +#if ($mode.select == 'single' or $mode.select == 'co') and $extra_output and "pre_reclustering_bins" in $extra_output + --write-pre-reclustering-bins +#end if + --compression none --threads \${GALAXY_SLOTS:-1} --processes \${GALAXY_SLOTS:-1} && @@ -93,26 +97,30 @@ <expand macro="max-node"/> <expand macro="max-edges"/> <expand macro="minfasta-kbs"/> - <expand macro="no-recluster"/> </section> <param name="extra_output" type="select" multiple="true" optional="true" label="Extra outputs" help="In addition to the training data"> <option value="data">Training data</option> <option value="coverage">Coverage files</option> <option value="contigs">Contigs (if multiple sample)</option> + <option value="pre_reclustering_bins">Pre-reclustering bins (only single sample and co-assembly)</option> </param> </inputs> <outputs> - <collection name="output_recluster_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins after reclustering"> - <filter>not bin["no_recluster"]</filter> - <discover_datasets pattern=".*?\.(?P<designation>.*).fa" format="fasta" directory="output/output_recluster_bins" /> + <collection name="output_pre_recluster_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering"> + <filter>mode["select"]!="multi" and extra_output and "pre_reclustering_bins" in extra_output</filter> + <discover_datasets pattern="(?P<designation>.*).fa" format="fasta" directory="output/output_prerecluster_bins"/> </collection> - <collection name="output_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering"> - <filter>mode["select"]!="multi"</filter> - <discover_datasets pattern=".*?\.(?P<designation>.*).fa" format="fasta" directory="output/output_bins" /> + <collection name="output_after_recluster_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins after reclustering"> + <filter>mode["select"]!="multi" and extra_output and "pre_reclustering_bins" in extra_output</filter> + <discover_datasets pattern="(?P<designation>.*).fa" format="fasta" directory="output/output_recluster_bins"/> + </collection> + <collection name="output_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins"> + <filter>mode["select"]!="multi" and not "pre_reclustering_bins" in extra_output</filter> + <discover_datasets pattern="(?P<designation>.*).fa" format="fasta" directory="output/output_bins"/> </collection> <collection name="multi_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering (multi_bins)"> <filter>mode["select"]=="multi"</filter> - <discover_datasets pattern=".*?\.(?P<designation>.*).fa" format="fasta" directory="output/bins" /> + <discover_datasets pattern="(?P<designation>.*).fa" format="fasta" directory="output/bins"/> </collection> <data name="single_data" format="csv" from_work_dir="output/data.csv" label="${tool.name} on ${on_string}: Training data"> <filter>(mode["select"]=="single" or mode["select"]=="co") and extra_output and "data" in extra_output</filter> @@ -131,7 +139,7 @@ <expand macro="generate_sequence_features_extra_outputs"/> </outputs> <tests> - <test expect_num_outputs="6"> + <test expect_num_outputs="5"> <conditional name="mode"> <param name="select" value="single"/> <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> @@ -147,10 +155,7 @@ <param name="min_len" value="0" /> </conditional> <param name="orf_finder" value="prodigal"/> - <param name="random-seed" value="0"/> - <section name="annot"> - <param name="ml_threshold" value=""/> - </section> + <param name="random_seed" value="0"/> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> @@ -158,49 +163,61 @@ <section name="bin"> <param name="max_node" value="1"/> <param name="max_edges" value="200"/> - <param name="minfasta_kbs" value="2"/> - <param name="no_recluster" value="false"/> + <param name="minfasta_kbs" value="200"/> </section> <param name="extra_output" value="data,coverage,contigs"/> - <output_collection name="output_recluster_bins" count="39"> - <element name="0" ftype="fasta"> - <assert_contents> - <has_text text=">g1k_0"/> - </assert_contents> - </element> - <element name="1" ftype="fasta"> - <assert_contents> - <has_text text=">g1k_1"/> - </assert_contents> - </element> - <element name="2" ftype="fasta"> - <assert_contents> - <has_text text=">g1k_2"/> - </assert_contents> - </element> - <element name="39" ftype="fasta"> - <assert_contents> - <has_text text=">g3k_9"/> - </assert_contents> - </element> - </output_collection> - <output_collection name="output_bins" count="0"> - <!--<element name="0" ftype="fasta"> - <assert_contents> - <has_text text=">g1k_0"/> - </assert_contents> - </element> - <element name="1" ftype="fasta"> - <assert_contents> - <has_text text=">g2k_0"/> - </assert_contents> - </element> - <element name="2" ftype="fasta"> - <assert_contents> - <has_text text=">g3k_0"/> - </assert_contents> - </element>--> - </output_collection> + <output_collection name="output_bins" count="0"/> + <output name="single_data" ftype="csv"> + <assert_contents> + <has_text text="g1k_0"/> + <has_text text="g4k_7"/> + </assert_contents> + </output> + <output name="single_data_split" ftype="csv"> + <assert_contents> + <has_text text="g1k_0_1"/> + <has_text text="g1k_6_2"/> + </assert_contents> + </output> + <output name="single_cov" ftype="csv"> + <assert_contents> + <has_text text="g1k_0"/> + <has_text text="0.027"/> + </assert_contents> + </output> + <output name="single_split_cov" ftype="csv"> + <assert_contents> + <has_size value="1" delta="1"/> + </assert_contents> + </output> + </test> + <test expect_num_outputs="5"> + <conditional name="mode"> + <param name="select" value="single"/> + <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> + <param name="input_bam" ftype="bam" value="input_single.bam"/> + <conditional name="ref"> + <param name="select" value="ml"/> + </conditional> + <param name="environment" value="human_gut"/> + </conditional> + <conditional name="min_len"> + <param name="method" value="min-len"/> + <param name="min_len" value="0" /> + </conditional> + <param name="orf_finder" value="prodigal"/> + <param name="random_seed" value="0"/> + <section name="training"> + <param name="epoches" value="20"/> + <param name="batch_size" value="2048"/> + </section> + <section name="bin"> + <param name="max_node" value="1"/> + <param name="max_edges" value="200"/> + <param name="minfasta_kbs" value="200"/> + </section> + <param name="extra_output" value="data,coverage,contigs"/> + <output_collection name="output_bins" count="0"/> <output name="single_data" ftype="csv"> <assert_contents> <has_text text="g1k_0"/> @@ -231,19 +248,15 @@ <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/> <conditional name="ref"> - <param name="select" value="taxonomy"/> - <param name="taxonomy_annotation_table" value="taxonomy.tsv"/> + <param name="select" value="ml"/> </conditional> </conditional> <conditional name="min_len"> <param name="method" value="ratio"/> <param name="ratio" value="0.05"/> </conditional> - <param name="orf_finder" value="fraggenescan"/> - <param name="random-seed" value="0"/> - <section name="annot"> - <param name="ml_threshold" value=""/> - </section> + <param name="orf_finder" value="fast-naive"/> + <param name="random_seed" value="0"/> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> @@ -252,26 +265,9 @@ <param name="max_node" value="1"/> <param name="max_edges" value="200"/> <param name="minfasta_kbs" value="200"/> - <param name="no_recluster" value="true"/> </section> <param name="extra_output" value="coverage"/> - <output_collection name="output_bins" count="3"> - <element name="0" ftype="fasta"> - <assert_contents> - <has_text text=">g1k_0"/> - </assert_contents> - </element> - <element name="1" ftype="fasta"> - <assert_contents> - <has_text text=">g2k_0"/> - </assert_contents> - </element> - <element name="2" ftype="fasta"> - <assert_contents> - <has_text text=">g3k_0"/> - </assert_contents> - </element> - </output_collection> + <output_collection name="output_bins" count="0"/> <output_collection name="co_cov" count="5"> <element name="0" ftype="csv"> <assert_contents> @@ -313,7 +309,75 @@ </element> </output_collection> </test> - <test expect_num_outputs="4"> + <test expect_num_outputs="3"> + <conditional name="mode"> + <param name="select" value="co"/> + <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> + <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/> + <conditional name="ref"> + <param name="select" value="taxonomy"/> + <param name="taxonomy_annotation_table" value="taxonomy.tsv"/> + </conditional> + </conditional> + <conditional name="min_len"> + <param name="method" value="ratio"/> + <param name="ratio" value="0.05"/> + </conditional> + <param name="orf_finder" value="fast-naive"/> + <param name="random_seed" value="0"/> + <section name="training"> + <param name="epoches" value="20"/> + <param name="batch_size" value="2048"/> + </section> + <section name="bin"> + <param name="max_node" value="1"/> + <param name="max_edges" value="200"/> + <param name="minfasta_kbs" value="200"/> + </section> + <param name="extra_output" value="coverage"/> + <output_collection name="output_bins" count="0"/> + <output_collection name="co_cov" count="5"> + <element name="0" ftype="csv"> + <assert_contents> + <has_text text="g1k_0"/> + <has_text text="g2k_7"/> + </assert_contents> + </element> + <element name="1" ftype="csv"> + <assert_contents> + <has_text text="g1k_0"/> + <has_text text="g2k_7"/> + </assert_contents> + </element> + <element name="4" ftype="csv"> + <assert_contents> + <has_text text="g1k_0"/> + <has_text text="g2k_7"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="co_split_cov" count="5"> + <element name="0" ftype="csv"> + <assert_contents> + <has_text text="g1k_0_1"/> + <has_text text="g2k_7_2"/> + </assert_contents> + </element> + <element name="1" ftype="csv"> + <assert_contents> + <has_text text="g1k_0_1"/> + <has_text text="g2k_7_2"/> + </assert_contents> + </element> + <element name="2" ftype="csv"> + <assert_contents> + <has_text text="g1k_0_1"/> + <has_text text="g2k_7_2"/> + </assert_contents> + </element> + </output_collection> + </test> + <test expect_num_outputs="3"> <conditional name="mode"> <param name="select" value="co"/> <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> @@ -328,10 +392,7 @@ <param name="ratio" value="0.05"/> </conditional> <param name="orf_finder" value="fraggenescan"/> - <param name="random-seed" value="0"/> - <section name="annot"> - <param name="ml_threshold" value=""/> - </section> + <param name="random_seed" value="0"/> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> @@ -340,16 +401,9 @@ <param name="max_node" value="1"/> <param name="max_edges" value="200"/> <param name="minfasta_kbs" value="200"/> - <param name="no_recluster" value="false"/> </section> <param name="extra_output" value="coverage"/> - <output_collection name="output_recluster_bins" count="1"> - <element name="30" ftype="fasta"> - <assert_contents> - <has_text text=">g3k_0"/> - </assert_contents> - </element> - </output_collection> + <output_collection name="output_bins" count="0"/> <output_collection name="co_cov" count="5"> <element name="0" ftype="csv"> <assert_contents> @@ -397,7 +451,7 @@ <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> <param name="input_bam" ftype="bam" value="input_single.bam"/> <conditional name="ref"> - <param name="db_selector" value="cached"/> + <param name="select" value="cached"/> <param name="cached_db" value="test-db"/> </conditional> </conditional> @@ -406,10 +460,7 @@ <param name="ratio" value="0.05"/> </conditional> <param name="orf_finder" value="fraggenescan"/> - <param name="random-seed" value="0"/> - <section name="annot"> - <param name="ml_threshold" value=""/> - </section> + <param name="random_seed" value="0"/> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> @@ -418,13 +469,11 @@ <param name="max_node" value="1"/> <param name="max_edges" value="200"/> <param name="minfasta_kbs" value="200"/> - <param name="no_recluster" value="true"/> </section> - <param name="extra_output" value=""/> - <output_collection name="output_bins" count="3"> - <element name="0" ftype="fasta"> + <output_collection name="output_bins" count="1"> + <element name="SemiBin_30" ftype="fasta"> <assert_contents> - <has_text text=">g1k_0"/> + <has_text text=">g3k_0"/> </assert_contents> </element> </output_collection> @@ -435,7 +484,7 @@ <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> <param name="input_bam" ftype="bam" value="input_single.bam"/> <conditional name="ref"> - <param name="db_selector" value="cached"/> + <param name="select" value="cached"/> <param name="cached_db" value="test-db"/> </conditional> </conditional> @@ -444,10 +493,7 @@ <param name="ratio" value="0.05"/> </conditional> <param name="orf_finder" value="fraggenescan"/> - <param name="random-seed" value="0"/> - <section name="annot"> - <param name="ml_threshold" value=""/> - </section> + <param name="random_seed" value="0"/> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> @@ -456,13 +502,29 @@ <param name="max_node" value="1"/> <param name="max_edges" value="200"/> <param name="minfasta_kbs" value="200"/> - <param name="no_recluster" value="false"/> </section> - <param name="extra_output" value=""/> - <output_collection name="output_recluster_bins" count="1"> - <element name="30" ftype="fasta"> + <param name="extra_output" value="pre_reclustering_bins"/> + <output_collection name="output_pre_recluster_bins" count="3"> + <element name="SemiBin_0" ftype="fasta"> + <assert_contents> + <has_text text="g1k_0"/> + </assert_contents> + </element> + <element name="SemiBin_1" ftype="fasta"> + <assert_contents> + <has_text text="g2k_0"/> + </assert_contents> + </element> + <element name="SemiBin_2" ftype="fasta"> + <assert_contents> + <has_text text="g3k_0"/> + </assert_contents> + </element> + </output_collection> + <output_collection name="output_after_recluster_bins" count="1"> + <element name="SemiBin_30" ftype="fasta"> <assert_contents> - <has_text text=">g3k_0"/> + <has_text text="g3k_0"/> </assert_contents> </element> </output_collection> @@ -486,9 +548,6 @@ </conditional> <param name="orf_finder" value="fraggenescan"/> <param name="random_seed" value="0"/> - <section name="annot"> - <param name="ml_threshold" value=""/> - </section> <section name="training"> <param name="epoches" value="20"/> <param name="batch_size" value="2048"/> @@ -497,23 +556,9 @@ <param name="max_node" value="1"/> <param name="max_edges" value="200"/> <param name="minfasta_kbs" value="200"/> - <param name="no_recluster" value="true"/> </section> <param name="extra_output" value="data,coverage,contigs"/> - <output_collection name="multi_bins" count="2"> - <element name="0" ftype="fasta"> - <assert_contents> - <has_text text=">g1k_0"/> - </assert_contents> - </element> - </output_collection> - <output_collection name="multi_contigs" count="10"> - <element name="S8" ftype="fasta"> - <assert_contents> - <has_text text=">g1k_0"/> - </assert_contents> - </element> - </output_collection> + <output_collection name="multi_bins" count="0"/> <output_collection name="multi_data" count="10"> <element name="S8" ftype="csv"> <assert_contents> @@ -521,6 +566,13 @@ </assert_contents> </element> </output_collection> + <output_collection name="multi_data_split" count="10"> + <element name="S8" ftype="csv"> + <assert_contents> + <has_text text="g1k_0_1,"/> + </assert_contents> + </element> + </output_collection> <output_collection name="multi_cov" count="10"> <element name="8" ftype="csv"> <assert_contents> @@ -557,7 +609,6 @@ </element> </output_collection> </test> - </tests> <help><![CDATA[ @HELP_HEADER@ @@ -570,4 +621,4 @@ ]]></help> <expand macro="citations"/> -</tool> +</tool> \ No newline at end of file
