Mercurial > repos > iuc > semibin
comparison semibin.xml @ 0:d1f0be443b8b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4
| author | iuc |
|---|---|
| date | Fri, 14 Oct 2022 22:05:37 +0000 |
| parents | |
| children | 2eeff5d4a5de |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d1f0be443b8b |
|---|---|
| 1 <tool id="semibin" name="SemiBin" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | |
| 2 <description> | |
| 3 for Semi-supervised Metagenomic Binning | |
| 4 </description> | |
| 5 <macros> | |
| 6 <import>macros.xml</import> | |
| 7 </macros> | |
| 8 <expand macro="biotools"/> | |
| 9 <expand macro="requirements"/> | |
| 10 <expand macro="version"/> | |
| 11 <command detect_errors="exit_code"><![CDATA[ | |
| 12 #import re | |
| 13 @BAM_FILES@ | |
| 14 @FASTA_FILES@ | |
| 15 | |
| 16 SemiBin | |
| 17 #if $mode.select == 'single' or $mode.select == 'co' | |
| 18 single_easy_bin | |
| 19 #if $mode.select == 'single' and str($mode.environment) != '' | |
| 20 --environment '$mode.environment' | |
| 21 #end if | |
| 22 #if $mode.ref.select == "cached" | |
| 23 --reference-db-data-dir '$mode.ref.cached_db.fields.path' | |
| 24 #else | |
| 25 --taxonomy-annotation-table '$mode.ref.taxonomy_annotation_table' | |
| 26 #end if | |
| 27 #else | |
| 28 multi_easy_bin | |
| 29 --separator '$separator' | |
| 30 #if $mode.ref.select == "cached" | |
| 31 --reference-db-data-dir '$mode.ref.cached_db.fields.path' | |
| 32 #else | |
| 33 --taxonomy-annotation-table | |
| 34 #for $e in $mode.ref.taxonomy_annotation_table | |
| 35 '$e' | |
| 36 #end for | |
| 37 #end if | |
| 38 #end if | |
| 39 --input-fasta 'contigs.fasta' | |
| 40 --input-bam *.bam | |
| 41 --output 'output' | |
| 42 --cannot-name 'cannot' | |
| 43 @MIN_LEN@ | |
| 44 --orf-finder '$orf_finder' | |
| 45 --random-seed $random_seed | |
| 46 | |
| 47 #if str($annot.ml_threshold) != '' | |
| 48 --ml-threshold $annot.ml_threshold | |
| 49 #end if | |
| 50 --epoches $training.epoches | |
| 51 --batch-size $training.batch_size | |
| 52 --max-node $bin.max_node | |
| 53 --max-edges $bin.max_edges | |
| 54 --minfasta-kbs $bin.minfasta_kbs | |
| 55 $bin.no_recluster | |
| 56 --threads \${GALAXY_SLOTS:-1} | |
| 57 --processes \${GALAXY_SLOTS:-1} | |
| 58 && | |
| 59 echo "output" && | |
| 60 ls output | |
| 61 ]]></command> | |
| 62 <inputs> | |
| 63 <conditional name="mode"> | |
| 64 <expand macro="mode_select"/> | |
| 65 <when value="single"> | |
| 66 <expand macro="input-fasta-single"/> | |
| 67 <expand macro="input-bam-single"/> | |
| 68 <expand macro="ref-single"/> | |
| 69 <expand macro="environment"/> | |
| 70 </when> | |
| 71 <when value="co"> | |
| 72 <expand macro="input-fasta-single"/> | |
| 73 <expand macro="input-bam-multi"/> | |
| 74 <expand macro="ref-single"/> | |
| 75 </when> | |
| 76 <when value="multi"> | |
| 77 <expand macro="input-fasta-multi"/> | |
| 78 <expand macro="input-bam-multi"/> | |
| 79 <expand macro="ref-multi"/> | |
| 80 </when> | |
| 81 </conditional> | |
| 82 <expand macro="min_len"/> | |
| 83 <expand macro="orf-finder"/> | |
| 84 <expand macro="random-seed"/> | |
| 85 <section name="annot" title="Contig annotations" expanded="true"> | |
| 86 <expand macro="ml-threshold"/> | |
| 87 </section> | |
| 88 <section name="training" title="Training"> | |
| 89 <expand macro="epoches"/> | |
| 90 <expand macro="batch-size"/> | |
| 91 </section> | |
| 92 <section name="bin" title="Binning"> | |
| 93 <expand macro="max-node"/> | |
| 94 <expand macro="max-edges"/> | |
| 95 <expand macro="minfasta-kbs"/> | |
| 96 <expand macro="no-recluster"/> | |
| 97 </section> | |
| 98 <param name="extra_output" type="select" multiple="true" optional="true" label="Extra outputs" help="In addition to the training data"> | |
| 99 <option value="data">Training data</option> | |
| 100 <option value="coverage">Coverage files</option> | |
| 101 <option value="contigs">Contigs (if multiple sample)</option> | |
| 102 </param> | |
| 103 </inputs> | |
| 104 <outputs> | |
| 105 <collection name="output_recluster_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins after reclustering"> | |
| 106 <filter>not bin["no_recluster"]</filter> | |
| 107 <discover_datasets pattern=".*?\.(?P<designation>.*).fa" format="fasta" directory="output/output_recluster_bins" /> | |
| 108 </collection> | |
| 109 <collection name="output_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering"> | |
| 110 <filter>mode["select"]!="multi"</filter> | |
| 111 <discover_datasets pattern=".*?\.(?P<designation>.*).fa" format="fasta" directory="output/output_bins" /> | |
| 112 </collection> | |
| 113 <collection name="multi_bins" type="list" label="${tool.name} on ${on_string}: Reconstructed bins before reclustering"> | |
| 114 <filter>mode["select"]=="multi"</filter> | |
| 115 <discover_datasets pattern=".*?\.(?P<designation>.*).fa" format="fasta" directory="output/bins" /> | |
| 116 </collection> | |
| 117 <data name="single_data" format="csv" from_work_dir="output/data.csv" label="${tool.name} on ${on_string}: Training data"> | |
| 118 <filter>(mode["select"]=="single" or mode["select"]=="co") and extra_output and "data" in extra_output</filter> | |
| 119 </data> | |
| 120 <data name="single_data_split" format="csv" from_work_dir="output/data_split.csv" label="${tool.name} on ${on_string}: Split training data"> | |
| 121 <filter>(mode["select"]=="single" or mode["select"]=="co") and extra_output and "data" in extra_output</filter> | |
| 122 </data> | |
| 123 <collection name="multi_data" type="list" label="${tool.name} on ${on_string}: Training data per sample"> | |
| 124 <filter>mode["select"]=="multi" and extra_output and "data" in extra_output</filter> | |
| 125 <discover_datasets pattern="(?P<designation>.*)\/data.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> | |
| 126 </collection> | |
| 127 <collection name="multi_data_split" type="list" label="${tool.name} on ${on_string}: Split training data per sample"> | |
| 128 <filter>mode["select"]=="multi" and extra_output and "data" in extra_output</filter> | |
| 129 <discover_datasets pattern="(?P<designation>.*)\/data_split.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> | |
| 130 </collection> | |
| 131 <expand macro="generate_sequence_features_extra_outputs"/> | |
| 132 </outputs> | |
| 133 <tests> | |
| 134 <test expect_num_outputs="6"> | |
| 135 <conditional name="mode"> | |
| 136 <param name="select" value="single"/> | |
| 137 <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> | |
| 138 <param name="input_bam" ftype="bam" value="input_single.bam"/> | |
| 139 <conditional name="ref"> | |
| 140 <param name="select" value="taxonomy"/> | |
| 141 <param name="taxonomy_annotation_table" value="taxonomy.tsv"/> | |
| 142 </conditional> | |
| 143 <param name="environment" value="human_gut"/> | |
| 144 </conditional> | |
| 145 <conditional name="min_len"> | |
| 146 <param name="method" value="min-len"/> | |
| 147 <param name="min_len" value="0" /> | |
| 148 </conditional> | |
| 149 <param name="orf_finder" value="prodigal"/> | |
| 150 <param name="random-seed" value="0"/> | |
| 151 <section name="annot"> | |
| 152 <param name="ml_threshold" value=""/> | |
| 153 </section> | |
| 154 <section name="training"> | |
| 155 <param name="epoches" value="20"/> | |
| 156 <param name="batch_size" value="2048"/> | |
| 157 </section> | |
| 158 <section name="bin"> | |
| 159 <param name="max_node" value="1"/> | |
| 160 <param name="max_edges" value="200"/> | |
| 161 <param name="minfasta_kbs" value="200"/> | |
| 162 <param name="no_recluster" value="false"/> | |
| 163 </section> | |
| 164 <param name="extra_output" value="data,coverage,contigs"/> | |
| 165 <output_collection name="output_recluster_bins" count="0"/> | |
| 166 <output_collection name="output_bins" count="3"> | |
| 167 <element name="0" ftype="fasta"> | |
| 168 <assert_contents> | |
| 169 <has_text text=">g1k_0"/> | |
| 170 </assert_contents> | |
| 171 </element> | |
| 172 <element name="1" ftype="fasta"> | |
| 173 <assert_contents> | |
| 174 <has_text text=">g2k_0"/> | |
| 175 </assert_contents> | |
| 176 </element> | |
| 177 <element name="2" ftype="fasta"> | |
| 178 <assert_contents> | |
| 179 <has_text text=">g3k_0"/> | |
| 180 </assert_contents> | |
| 181 </element> | |
| 182 </output_collection> | |
| 183 <output name="single_data" ftype="csv"> | |
| 184 <assert_contents> | |
| 185 <has_text text="g1k_0"/> | |
| 186 <has_text text="g4k_7"/> | |
| 187 </assert_contents> | |
| 188 </output> | |
| 189 <output name="single_data_split" ftype="csv"> | |
| 190 <assert_contents> | |
| 191 <has_text text="g1k_0_1"/> | |
| 192 <has_text text="g1k_6_2"/> | |
| 193 </assert_contents> | |
| 194 </output> | |
| 195 <output name="single_cov" ftype="csv"> | |
| 196 <assert_contents> | |
| 197 <has_text text="g1k_0"/> | |
| 198 <has_text text="0.027"/> | |
| 199 </assert_contents> | |
| 200 </output> | |
| 201 <output name="single_split_cov" ftype="csv"> | |
| 202 <assert_contents> | |
| 203 <has_size value="1" delta="1"/> | |
| 204 </assert_contents> | |
| 205 </output> | |
| 206 </test> | |
| 207 <test expect_num_outputs="3"> | |
| 208 <conditional name="mode"> | |
| 209 <param name="select" value="co"/> | |
| 210 <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> | |
| 211 <param name="input_bam" ftype="bam" value="input_coassembly_sorted1.bam,input_coassembly_sorted2.bam,input_coassembly_sorted3.bam,input_coassembly_sorted4.bam,input_coassembly_sorted5.bam"/> | |
| 212 <conditional name="ref"> | |
| 213 <param name="select" value="taxonomy"/> | |
| 214 <param name="taxonomy_annotation_table" value="taxonomy.tsv"/> | |
| 215 </conditional> | |
| 216 </conditional> | |
| 217 <conditional name="min_len"> | |
| 218 <param name="method" value="ratio"/> | |
| 219 <param name="ratio" value="0.05"/> | |
| 220 </conditional> | |
| 221 <param name="orf_finder" value="fraggenescan"/> | |
| 222 <param name="random-seed" value="0"/> | |
| 223 <section name="annot"> | |
| 224 <param name="ml_threshold" value=""/> | |
| 225 </section> | |
| 226 <section name="training"> | |
| 227 <param name="epoches" value="20"/> | |
| 228 <param name="batch_size" value="2048"/> | |
| 229 </section> | |
| 230 <section name="bin"> | |
| 231 <param name="max_node" value="1"/> | |
| 232 <param name="max_edges" value="200"/> | |
| 233 <param name="minfasta_kbs" value="200"/> | |
| 234 <param name="no_recluster" value="true"/> | |
| 235 </section> | |
| 236 <param name="extra_output" value="coverage"/> | |
| 237 <output_collection name="output_bins" count="3"> | |
| 238 <element name="0" ftype="fasta"> | |
| 239 <assert_contents> | |
| 240 <has_text text=">g1k_0"/> | |
| 241 </assert_contents> | |
| 242 </element> | |
| 243 <element name="1" ftype="fasta"> | |
| 244 <assert_contents> | |
| 245 <has_text text=">g2k_0"/> | |
| 246 </assert_contents> | |
| 247 </element> | |
| 248 <element name="2" ftype="fasta"> | |
| 249 <assert_contents> | |
| 250 <has_text text=">g3k_0"/> | |
| 251 </assert_contents> | |
| 252 </element> | |
| 253 </output_collection> | |
| 254 <output_collection name="co_cov" count="5"> | |
| 255 <element name="0" ftype="csv"> | |
| 256 <assert_contents> | |
| 257 <has_text text="g1k_0"/> | |
| 258 <has_text text="g2k_7"/> | |
| 259 </assert_contents> | |
| 260 </element> | |
| 261 <element name="1" ftype="csv"> | |
| 262 <assert_contents> | |
| 263 <has_text text="g1k_0"/> | |
| 264 <has_text text="g2k_7"/> | |
| 265 </assert_contents> | |
| 266 </element> | |
| 267 <element name="4" ftype="csv"> | |
| 268 <assert_contents> | |
| 269 <has_text text="g1k_0"/> | |
| 270 <has_text text="g2k_7"/> | |
| 271 </assert_contents> | |
| 272 </element> | |
| 273 </output_collection> | |
| 274 <output_collection name="co_split_cov" count="5"> | |
| 275 <element name="0" ftype="csv"> | |
| 276 <assert_contents> | |
| 277 <has_text text="g1k_0_1"/> | |
| 278 <has_text text="g2k_7_2"/> | |
| 279 </assert_contents> | |
| 280 </element> | |
| 281 <element name="1" ftype="csv"> | |
| 282 <assert_contents> | |
| 283 <has_text text="g1k_0_1"/> | |
| 284 <has_text text="g2k_7_2"/> | |
| 285 </assert_contents> | |
| 286 </element> | |
| 287 <element name="2" ftype="csv"> | |
| 288 <assert_contents> | |
| 289 <has_text text="g1k_0_1"/> | |
| 290 <has_text text="g2k_7_2"/> | |
| 291 </assert_contents> | |
| 292 </element> | |
| 293 </output_collection> | |
| 294 </test> | |
| 295 <test expect_num_outputs="1"> | |
| 296 <conditional name="mode"> | |
| 297 <param name="select" value="single"/> | |
| 298 <param name="input_fasta" ftype="fasta" value="input_single.fasta"/> | |
| 299 <param name="input_bam" ftype="bam" value="input_single.bam"/> | |
| 300 <conditional name="ref"> | |
| 301 <param name="db_selector" value="cached"/> | |
| 302 <param name="cached_db" value="test-db"/> | |
| 303 </conditional> | |
| 304 </conditional> | |
| 305 <conditional name="min_len"> | |
| 306 <param name="method" value="ratio"/> | |
| 307 <param name="ratio" value="0.05"/> | |
| 308 </conditional> | |
| 309 <param name="orf_finder" value="fraggenescan"/> | |
| 310 <param name="random-seed" value="0"/> | |
| 311 <section name="annot"> | |
| 312 <param name="ml_threshold" value=""/> | |
| 313 </section> | |
| 314 <section name="training"> | |
| 315 <param name="epoches" value="20"/> | |
| 316 <param name="batch_size" value="2048"/> | |
| 317 </section> | |
| 318 <section name="bin"> | |
| 319 <param name="max_node" value="1"/> | |
| 320 <param name="max_edges" value="200"/> | |
| 321 <param name="minfasta_kbs" value="200"/> | |
| 322 <param name="no_recluster" value="true"/> | |
| 323 </section> | |
| 324 <param name="extra_output" value=""/> | |
| 325 <output_collection name="output_bins" count="3"> | |
| 326 <element name="0" ftype="fasta"> | |
| 327 <assert_contents> | |
| 328 <has_text text=">g1k_0"/> | |
| 329 </assert_contents> | |
| 330 </element> | |
| 331 </output_collection> | |
| 332 </test> | |
| 333 <test expect_num_outputs="8"> | |
| 334 <conditional name="mode"> | |
| 335 <param name="select" value="multi"/> | |
| 336 <conditional name="multi_fasta"> | |
| 337 <param name="select" value="concatenated"/> | |
| 338 <param name="input_fasta" ftype="fasta" value="input_multi.fasta.gz"/> | |
| 339 </conditional> | |
| 340 <param name="input_bam" ftype="bam" value="input_multi_sorted1.bam,input_multi_sorted2.bam,input_multi_sorted3.bam,input_multi_sorted4.bam,input_multi_sorted5.bam,input_multi_sorted6.bam,input_multi_sorted7.bam,input_multi_sorted8.bam,input_multi_sorted9.bam,input_multi_sorted10.bam"/> | |
| 341 <conditional name="ref"> | |
| 342 <param name="select" value="taxonomy"/> | |
| 343 <param name="taxonomy_annotation_table" value="taxonomy.tsv,taxonomy_2.tsv,taxonomy_3.tsv,taxonomy_4.tsv,taxonomy_5.tsv,taxonomy_6.tsv,taxonomy_7.tsv,taxonomy_8.tsv,taxonomy_9.tsv,taxonomy_10.tsv"/> | |
| 344 </conditional> | |
| 345 </conditional> | |
| 346 <conditional name="min_len"> | |
| 347 <param name="method" value="ratio"/> | |
| 348 <param name="ratio" value="0.05"/> | |
| 349 </conditional> | |
| 350 <param name="orf_finder" value="fraggenescan"/> | |
| 351 <param name="random_seed" value="0"/> | |
| 352 <section name="annot"> | |
| 353 <param name="ml_threshold" value=""/> | |
| 354 </section> | |
| 355 <section name="training"> | |
| 356 <param name="epoches" value="20"/> | |
| 357 <param name="batch_size" value="2048"/> | |
| 358 </section> | |
| 359 <section name="bin"> | |
| 360 <param name="max_node" value="1"/> | |
| 361 <param name="max_edges" value="200"/> | |
| 362 <param name="minfasta_kbs" value="200"/> | |
| 363 <param name="no_recluster" value="true"/> | |
| 364 </section> | |
| 365 <param name="extra_output" value="data,coverage,contigs"/> | |
| 366 <output_collection name="multi_bins" count="2"> | |
| 367 <element name="0" ftype="fasta"> | |
| 368 <assert_contents> | |
| 369 <has_text text=">g1k_0"/> | |
| 370 </assert_contents> | |
| 371 </element> | |
| 372 </output_collection> | |
| 373 <output_collection name="multi_contigs" count="10"> | |
| 374 <element name="S8" ftype="fasta"> | |
| 375 <assert_contents> | |
| 376 <has_text text=">g1k_0"/> | |
| 377 </assert_contents> | |
| 378 </element> | |
| 379 </output_collection> | |
| 380 <output_collection name="multi_data" count="10"> | |
| 381 <element name="S8" ftype="csv"> | |
| 382 <assert_contents> | |
| 383 <has_text text="g1k_0,"/> | |
| 384 </assert_contents> | |
| 385 </element> | |
| 386 </output_collection> | |
| 387 <output_collection name="multi_cov" count="10"> | |
| 388 <element name="8" ftype="csv"> | |
| 389 <assert_contents> | |
| 390 <has_text text="S1:g1k_5,"/> | |
| 391 </assert_contents> | |
| 392 </element> | |
| 393 </output_collection> | |
| 394 <output_collection name="multi_cov_sample" count="10"> | |
| 395 <element name="S8" ftype="csv"> | |
| 396 <assert_contents> | |
| 397 <has_text text="g1k_3"/> | |
| 398 </assert_contents> | |
| 399 </element> | |
| 400 </output_collection> | |
| 401 <output_collection name="multi_split_cov" count="10"> | |
| 402 <element name="8" ftype="csv"> | |
| 403 <assert_contents> | |
| 404 <has_text text="S1:g1k_5_1,0."/> | |
| 405 </assert_contents> | |
| 406 </element> | |
| 407 </output_collection> | |
| 408 <output_collection name="multi_split_cov_sample" count="10"> | |
| 409 <element name="S8" ftype="csv"> | |
| 410 <assert_contents> | |
| 411 <has_text text="g1k_3_1"/> | |
| 412 </assert_contents> | |
| 413 </element> | |
| 414 </output_collection> | |
| 415 <output_collection name="multi_contigs" count="10"> | |
| 416 <element name="S8" ftype="fasta"> | |
| 417 <assert_contents> | |
| 418 <has_text text=">g1k_0"/> | |
| 419 </assert_contents> | |
| 420 </element> | |
| 421 </output_collection> | |
| 422 </test> | |
| 423 </tests> | |
| 424 <help><![CDATA[ | |
| 425 @HELP_HEADER@ | |
| 426 | |
| 427 Inputs | |
| 428 ====== | |
| 429 | |
| 430 @HELP_INPUT_FASTA@ | |
| 431 @HELP_INPUT_BAM@ | |
| 432 | |
| 433 ]]></help> | |
| 434 <expand macro="citations"/> | |
| 435 </tool> |
