Mercurial > repos > iuc > semibin_generate_sequence_features
comparison macros.xml @ 0:5e336e1a7e7e draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/semibin commit aa9bfb2fb62547ee8bac34f0de5b3beaa0bfd1a4
| author | iuc |
|---|---|
| date | Fri, 14 Oct 2022 22:06:46 +0000 |
| parents | |
| children | cae8e4d4f726 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:5e336e1a7e7e |
|---|---|
| 1 <?xml version="1.0"?> | |
| 2 <macros> | |
| 3 <token name="@TOOL_VERSION@">1.1.1</token> | |
| 4 <token name="@VERSION_SUFFIX@">0</token> | |
| 5 <token name="@PROFILE@">21.01</token> | |
| 6 <xml name="biotools"> | |
| 7 <xrefs> | |
| 8 <xref type="bio.tools">semibin</xref> | |
| 9 </xrefs> | |
| 10 </xml> | |
| 11 <xml name="requirements"> | |
| 12 <requirements> | |
| 13 <requirement type="package" version="@TOOL_VERSION@">semibin</requirement> | |
| 14 <yield/> | |
| 15 </requirements> | |
| 16 </xml> | |
| 17 <xml name="version"> | |
| 18 <version_command>SemiBin -v</version_command> | |
| 19 </xml> | |
| 20 <xml name="mode_fasta_bam"> | |
| 21 <conditional name="mode"> | |
| 22 <expand macro="mode_select"/> | |
| 23 <when value="single"> | |
| 24 <expand macro="input-fasta-single"/> | |
| 25 <expand macro="input-bam-single"/> | |
| 26 </when> | |
| 27 <when value="co"> | |
| 28 <expand macro="input-fasta-single"/> | |
| 29 <expand macro="input-bam-multi"/> | |
| 30 </when> | |
| 31 <when value="multi"> | |
| 32 <expand macro="input-fasta-multi"/> | |
| 33 <expand macro="input-bam-multi"/> | |
| 34 </when> | |
| 35 </conditional> | |
| 36 </xml> | |
| 37 <xml name="mode_fasta"> | |
| 38 <conditional name="mode"> | |
| 39 <expand macro="mode_select"/> | |
| 40 <when value="single"> | |
| 41 <expand macro="input-fasta-single"/> | |
| 42 </when> | |
| 43 <when value="co"> | |
| 44 <expand macro="input-fasta-single"/> | |
| 45 </when> | |
| 46 <when value="multi"> | |
| 47 <expand macro="input-fasta-multi"/> | |
| 48 </when> | |
| 49 </conditional> | |
| 50 </xml> | |
| 51 <xml name="mode_select"> | |
| 52 <param name="select" type="select" label="Binning mode"> | |
| 53 <option value="single" selected="true">Single sample binning (each sample is assembled and binned independently)</option> | |
| 54 <option value="co">Co-assembly binning (samples are co-assembled together and binned together)</option> | |
| 55 <option value="multi">Multi-sample binning (multiple samples are assembled and binned individually, but information from multiple samples is used together)</option> | |
| 56 </param> | |
| 57 </xml> | |
| 58 <xml name="input-fasta-single"> | |
| 59 <param argument="--input-fasta" type="data" format="fasta,fasta.gz" label="Contig sequences"/> | |
| 60 </xml> | |
| 61 <xml name="input-fasta-multi"> | |
| 62 <conditional name="multi_fasta"> | |
| 63 <param name="select" type="select" label="Contig files of the samples"> | |
| 64 <option value="concatenated" selected="true">1 concatenated file (created using the dedicated tool) with all sample contigs </option> | |
| 65 <option value="multi">1 contig file per sample</option> | |
| 66 </param> | |
| 67 <when value="concatenated"> | |
| 68 <param argument="--input-fasta" type="data" format="fasta,fasta.gz" label="Combined contig sequences"/> | |
| 69 <expand macro="separator"/> | |
| 70 </when> | |
| 71 <when value="multi"> | |
| 72 <param argument="--input-fasta" type="data" multiple="true" format="fasta,fasta.gz" label="Contig sequences"/> | |
| 73 <expand macro="concat_min_len"/> | |
| 74 </when> | |
| 75 </conditional> | |
| 76 </xml> | |
| 77 <xml name="concat_min_len"> | |
| 78 <param name="min_len" type="integer" min="0" value="0" label="Minimal length for contigs to be kept"/> | |
| 79 </xml> | |
| 80 <token name="@SINGLE_FASTA_FILES@"><![CDATA[ | |
| 81 #if $input_fasta.ext.endswith(".gz") | |
| 82 gunzip -c '$input_fasta' > 'contigs.fasta' && | |
| 83 #else | |
| 84 ln -s '$input_fasta' 'contigs.fasta' && | |
| 85 #end if | |
| 86 ]]></token> | |
| 87 <token name="@FASTA_FILES@"><![CDATA[ | |
| 88 #if $mode.select == 'single' or $mode.select == 'co' | |
| 89 #if $mode.input_fasta.ext.endswith(".gz") | |
| 90 gunzip -c '$mode.input_fasta' > 'contigs.fasta' && | |
| 91 #else | |
| 92 ln -s '$mode.input_fasta' 'contigs.fasta' && | |
| 93 #end if | |
| 94 #else | |
| 95 #if $mode.multi_fasta.select == 'concatenated' | |
| 96 #if $mode.multi_fasta.input_fasta.ext.endswith(".gz") | |
| 97 gunzip -c '$mode.multi_fasta.input_fasta' > 'contigs.fasta' && | |
| 98 #else | |
| 99 ln -s '$mode.multi_fasta.input_fasta' 'contigs.fasta' && | |
| 100 #end if | |
| 101 #set $separator = $mode.multi_fasta.separator | |
| 102 #else | |
| 103 #for $e in $mode.multi_fasta.input_fasta | |
| 104 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($e.element_identifier)) | |
| 105 #if $e.ext.endswith(".gz") | |
| 106 gunzip -c '$e' > '${identifier}.fasta' && | |
| 107 #else | |
| 108 ln -s '$e' '${identifier}.fasta' && | |
| 109 #end if | |
| 110 #end for | |
| 111 #set $separator = ':' | |
| 112 SemiBin concatenate_fasta | |
| 113 --input-fasta *.fasta | |
| 114 --output 'output' | |
| 115 --separator '$separator' | |
| 116 -m $mode.multi_fasta.min_len | |
| 117 && | |
| 118 ln -s 'output/concatenated.fa' 'contigs.fasta' && | |
| 119 #end if | |
| 120 #end if | |
| 121 ]]></token> | |
| 122 <xml name="separator"> | |
| 123 <param argument="--separator" type="text" value=":" label="Separator in the contig file between sample name and contig name"/> | |
| 124 </xml> | |
| 125 <xml name="input-bam-single"> | |
| 126 <param argument="--input-bam" type="data" format="bam" label="Read mapping to the contigs" help="Sorted BAM files"/> | |
| 127 </xml> | |
| 128 <xml name="input-bam-multi"> | |
| 129 <param argument="--input-bam" type="data" format="bam" multiple="true" label="Read mapping to the contigs" help="One file per sample, sorted BAM files"/> | |
| 130 </xml> | |
| 131 <token name="@BAM_FILES@"><![CDATA[ | |
| 132 #if $mode.select == 'single' | |
| 133 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($input_bam.element_identifier)) | |
| 134 ln -s '$input_bam' '${identifier}.bam' && | |
| 135 #else | |
| 136 #for $e in $input_bam | |
| 137 #set $identifier = re.sub('[^\s\w\-\\.]', '_', str($e.element_identifier)) | |
| 138 ln -s '$e' '${identifier}.bam' && | |
| 139 #end for | |
| 140 #end if | |
| 141 ]]></token> | |
| 142 <xml name="ref_select"> | |
| 143 <param name="select" type="select" label="Reference database"> | |
| 144 <option value="cached" selected="true">Cached database</option> | |
| 145 <option value="taxonomy">Pre-computed taxonomy</option> | |
| 146 </param> | |
| 147 </xml> | |
| 148 <xml name="cached_db"> | |
| 149 <param name="cached_db" label="Cached databases" type="select"> | |
| 150 <options from_data_table="gtdb"> | |
| 151 <validator message="No GTDB database is available" type="no_options" /> | |
| 152 </options> | |
| 153 </param> | |
| 154 </xml> | |
| 155 <xml name="ref-single"> | |
| 156 <conditional name="ref"> | |
| 157 <expand macro="ref_select"/> | |
| 158 <when value="cached"> | |
| 159 <expand macro="cached_db"/> | |
| 160 </when> | |
| 161 <when value="taxonomy"> | |
| 162 <param argument="--taxonomy-annotation-table" type="data" format="tabular" label="Pre-computed mmseqs2 format taxonomy TSV file"/> | |
| 163 </when> | |
| 164 </conditional> | |
| 165 </xml> | |
| 166 <xml name="ref-multi"> | |
| 167 <conditional name="ref"> | |
| 168 <expand macro="ref_select"/> | |
| 169 <when value="cached"> | |
| 170 <expand macro="cached_db"/> | |
| 171 </when> | |
| 172 <when value="taxonomy"> | |
| 173 <param argument="--taxonomy-annotation-table" type="data" format="tabular" multiple="true" label="Pre-computed mmseqs2 format taxonomy TSV file" help="One per bin file"/> | |
| 174 </when> | |
| 175 </conditional> | |
| 176 </xml> | |
| 177 <xml name="ref_single"> | |
| 178 <conditional name="ref"> | |
| 179 <expand macro="ref_select"/> | |
| 180 <when value="cached"> | |
| 181 <expand macro="cached_db"/> | |
| 182 </when> | |
| 183 <when value="taxonomy"> | |
| 184 <param argument="--taxonomy-annotation-table" type="data" format="tabular" label="Pre-computed mmseqs2 format taxonomy TSV file"/> | |
| 185 </when> | |
| 186 </conditional> | |
| 187 </xml> | |
| 188 <xml name="min_len"> | |
| 189 <conditional name="min_len"> | |
| 190 <param name="method" type="select" label="Method to set up the minimal length for contigs in binning"> | |
| 191 <option value="automatic">Automatic</option> | |
| 192 <option value="min-len">Manual</option> | |
| 193 <option value="ratio">Computation based on ratio of the number of base pairs</option> | |
| 194 </param> | |
| 195 <when value="automatic"/> | |
| 196 <when value="min-len"> | |
| 197 <param argument="--min-len" type="integer" min="0" value="0" label="Minimal length for contigs in binning"/> | |
| 198 </when> | |
| 199 <when value="ratio"> | |
| 200 <param argument="--ratio" type="float" min="0" max="1" value="0.05" label="Ratio of the number of base pairs of contigs between 1000-2500 bp below which the minimal length will be set as 1000bp, otherwise 2500bp."/> | |
| 201 </when> | |
| 202 </conditional> | |
| 203 </xml> | |
| 204 <token name="@MIN_LEN@"><![CDATA[ | |
| 205 #if $min_len.method == 'min-len' | |
| 206 --min-len $min_len.min_len | |
| 207 #else if $min_len.method == 'ratio' | |
| 208 --ratio $min_len.ratio | |
| 209 #end if | |
| 210 ]]></token> | |
| 211 <xml name="random-seed"> | |
| 212 <param argument="--random-seed" type="integer" min="0" value="0" label="Random seed to reproduce result"/> | |
| 213 </xml> | |
| 214 <xml name="ml-threshold"> | |
| 215 <param argument="--ml-threshold" type="integer" min="0" value="" optional="true" label="Length threshold for generating must-link constraints" help="If no value is given, the threshold is calculated from the contig, and the default minimum value is 4,000 bp."/> | |
| 216 </xml> | |
| 217 <xml name="epoches"> | |
| 218 <param argument="--epoches" type="integer" min="0" value="20" label="Number of epoches used in the training process"/> | |
| 219 </xml> | |
| 220 <xml name="batch-size"> | |
| 221 <param argument="--batch-size" type="integer" min="0" value="2048" label="Batch size used in the training process"/> | |
| 222 </xml> | |
| 223 <xml name="orf-finder"> | |
| 224 <param argument="--orf-finder" type="select" label="ORF finder used to estimate the number of bins"> | |
| 225 <option value="prodigal" selected="true">Prodigal</option> | |
| 226 <option value="fraggenescan">Fraggenescan</option> | |
| 227 </param> | |
| 228 </xml> | |
| 229 <xml name="max-node"> | |
| 230 <param argument="--max-node" type="float" min="0" max="1" value="1" label="Fraction of contigs that considered to be binned"/> | |
| 231 </xml> | |
| 232 <xml name="max-edges"> | |
| 233 <param argument="--max-edges" type="integer" min="0" value="200" label="Maximum number of edges that can be connected to one contig"/> | |
| 234 </xml> | |
| 235 <xml name="environment"> | |
| 236 <param argument="--environment" type="select" optional="true" label="Environment for the built-in model"> | |
| 237 <option value="" selected="true">None</option> | |
| 238 <option value="human_gut">Human gut</option> | |
| 239 <option value="dog_gut">Dog gut</option> | |
| 240 <option value="ocean">Ocean</option> | |
| 241 <option value="soil">Soil</option> | |
| 242 <option value="cat_gut">Cat gut</option> | |
| 243 <option value="human_oral">Human oral</option> | |
| 244 <option value="mouse_gut">Mouse gut</option> | |
| 245 <option value="pig_gut">Pig gut</option> | |
| 246 <option value="built_environment">Built environment</option> | |
| 247 <option value="wastewater">Wastewater</option> | |
| 248 <option value="global">Global</option> | |
| 249 </param> | |
| 250 </xml> | |
| 251 <xml name="minfasta-kbs"> | |
| 252 <param argument="--minfasta-kbs" type="integer" min="0" value="200" label="Miminimum bin size in Kbps"/> | |
| 253 </xml> | |
| 254 <xml name="no-recluster"> | |
| 255 <param argument="--no-recluster" type="boolean" truevalue="--no-recluster" falsevalue="" checked="false" label="Do not recluster bins?"/> | |
| 256 </xml> | |
| 257 <xml name="data"> | |
| 258 <param argument="--data" type="data" format="csv" label="Train data"/> | |
| 259 </xml> | |
| 260 <xml name="data_output_single"> | |
| 261 <data name="single_data" format="csv" from_work_dir="output/data.csv" label="${tool.name} on ${on_string}: Training data"> | |
| 262 <filter>mode["select"]=="single" or mode["select"]=="co"</filter> | |
| 263 </data> | |
| 264 <data name="single_data_split" format="csv" from_work_dir="output/data_split.csv" label="${tool.name} on ${on_string}: Split training data"> | |
| 265 <filter>mode["select"]=="single" or mode["select"]=="co"</filter> | |
| 266 </data> | |
| 267 </xml> | |
| 268 <xml name="data_output_multi"> | |
| 269 <collection name="multi_data" type="list" label="${tool.name} on ${on_string}: Training data per sample"> | |
| 270 <filter>mode["select"]=="multi"</filter> | |
| 271 <discover_datasets pattern="(?P<designation>.*)\/data.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> | |
| 272 </collection> | |
| 273 <collection name="multi_data_split" type="list" label="${tool.name} on ${on_string}: Split training data per sample"> | |
| 274 <filter>mode["select"]=="multi"</filter> | |
| 275 <discover_datasets pattern="(?P<designation>.*)\/data_split.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> | |
| 276 </collection> | |
| 277 </xml> | |
| 278 <xml name="generate_sequence_features_extra_outputs"> | |
| 279 <data name="single_cov" format="csv" from_work_dir="output/*_data_cov.csv" label="${tool.name} on ${on_string}: Coverage"> | |
| 280 <filter>mode["select"]=="single" and extra_output and "coverage" in extra_output</filter> | |
| 281 </data> | |
| 282 <data name="single_split_cov" format="csv" from_work_dir="output/*_data_split_cov.csv" label="${tool.name} on ${on_string}: Coverage (split data)"> | |
| 283 <filter>mode["select"]=="single" and extra_output and "coverage" in extra_output</filter> | |
| 284 </data> | |
| 285 <collection name="co_cov" type="list" label="${tool.name} on ${on_string}: Coverage"> | |
| 286 <filter>mode["select"]=="co" and extra_output and "coverage" in extra_output</filter> | |
| 287 <discover_datasets pattern=".*\.bam_(?P<designation>.*)_data_cov\.csv" format="csv" directory="output/" /> | |
| 288 </collection> | |
| 289 <collection name="co_split_cov" type="list" label="${tool.name} on ${on_string}: Coverage (split data) per sample"> | |
| 290 <filter>mode["select"]=="co" and extra_output and "coverage" in extra_output</filter> | |
| 291 <discover_datasets pattern=".*\.bam_(?P<designation>.*)_data_split_cov\.csv" format="csv" directory="output/" /> | |
| 292 </collection> | |
| 293 <collection name="multi_cov" type="list" label="${tool.name} on ${on_string}: Coverage"> | |
| 294 <filter>mode["select"]=="multi" and extra_output and "coverage" in extra_output</filter> | |
| 295 <discover_datasets pattern=".*\.bam_(?P<designation>.*)_data_cov.csv" format="csv" directory="output/samples/" /> | |
| 296 </collection> | |
| 297 <collection name="multi_cov_sample" type="list" label="${tool.name} on ${on_string}: Coverage per sample"> | |
| 298 <filter>mode["select"]=="multi" and extra_output and "coverage" in extra_output</filter> | |
| 299 <discover_datasets pattern="(?P<designation>.*)\/data_cov.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> | |
| 300 </collection> | |
| 301 <collection name="multi_split_cov" type="list" label="${tool.name} on ${on_string}: Coverage (split data) per sample"> | |
| 302 <filter>mode["select"]=="multi" and extra_output and "coverage" in extra_output</filter> | |
| 303 <discover_datasets pattern=".*\.bam_(?P<designation>.*)_data_split_cov.csv" format="csv" directory="output/samples/" /> | |
| 304 </collection> | |
| 305 <collection name="multi_split_cov_sample" type="list" label="${tool.name} on ${on_string}: Coverage (split data) per sample"> | |
| 306 <filter>mode["select"]=="multi" and extra_output and "coverage" in extra_output</filter> | |
| 307 <discover_datasets pattern="(?P<designation>.*)\/data_split_cov.csv" format="csv" directory="output/samples/" recurse="true" match_relative_path="true"/> | |
| 308 </collection> | |
| 309 <collection name="multi_contigs" type="list" label="${tool.name} on ${on_string}: Contigs"> | |
| 310 <filter>mode["select"]=="multi" and extra_output and "contigs" in extra_output</filter> | |
| 311 <discover_datasets pattern="(?P<designation>.*).fa" format="fasta" directory="output/samples/" /> | |
| 312 </collection> | |
| 313 </xml> | |
| 314 <xml name="train_output"> | |
| 315 <data name="model" format="h5" from_work_dir="output/model.h5" label="${tool.name} on ${on_string}: Semi-supervised deep learning model" /> | |
| 316 </xml> | |
| 317 <xml name="cannot_link_output"> | |
| 318 <data name="cannot" format="txt" from_work_dir="output/cannot/cannot.txt" label="${tool.name} on ${on_string}: Cannot-link constraints" /> | |
| 319 </xml> | |
| 320 <token name="@HELP_HEADER@"><![CDATA[ | |
| 321 What it does | |
| 322 ============ | |
| 323 | |
| 324 SemiBin is a Semi-supervised siamese neural network for metagenomic binning | |
| 325 | |
| 326 ]]></token> | |
| 327 <token name="@HELP_INPUT_FASTA@"><![CDATA[ | |
| 328 - Contigs in fasta for 1 or several samples from single or co-assembly | |
| 329 ]]></token> | |
| 330 <token name="@HELP_INPUT_BAM@"><![CDATA[ | |
| 331 - BAM with reads mapping to the contigs | |
| 332 ]]></token> | |
| 333 <token name="@HELP_CANNOT@"><![CDATA[ | |
| 334 - Cannot-link constraints | |
| 335 ]]></token> | |
| 336 <token name="@HELP_DATA@"><![CDATA[ | |
| 337 - Training data and split training data for the model | |
| 338 ]]></token> | |
| 339 <token name="@HELP_MODEL@"><![CDATA[ | |
| 340 - Semi-supervised deep learning model | |
| 341 ]]></token> | |
| 342 <token name="@HELP_BINS@"><![CDATA[ | |
| 343 - Reconstructed bins after reclustering | |
| 344 - Reconstructed bins before reclustering | |
| 345 ]]></token> | |
| 346 <xml name="citations"> | |
| 347 <citations> | |
| 348 <citation type="doi">10.1038/s41467-022-29843-y</citation> | |
| 349 </citations> | |
| 350 </xml> | |
| 351 </macros> |
