Mercurial > repos > iuc > drep_dereplicate
comparison macros.xml @ 0:aba9d1e647b6 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/drep commit 8fa5ff35b45c2b046c7f4800410cf39cb89a299a"
| author | iuc |
|---|---|
| date | Tue, 05 May 2020 09:53:33 +0000 |
| parents | |
| children | e9621d0f4e6b |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:aba9d1e647b6 |
|---|---|
| 1 <macros> | |
| 2 <token name="@VERSION@">2.5.4</token> | |
| 3 <xml name="requirements"> | |
| 4 <requirements> | |
| 5 <requirement type="package" version="@VERSION@">drep</requirement> | |
| 6 <yield/> | |
| 7 </requirements> | |
| 8 </xml> | |
| 9 <xml name="citations"> | |
| 10 <citations> | |
| 11 <citation type="doi">10.1038/ismej.2017.126</citation> | |
| 12 <yield /> | |
| 13 </citations> | |
| 14 </xml> | |
| 15 | |
| 16 | |
| 17 <xml name="genomes"> | |
| 18 <param argument="--genomes" type="data" format="fasta" label="genomes fasta files" multiple="true"/> | |
| 19 </xml> | |
| 20 <token name="@PREPARE_GENOMES@"><![CDATA[ | |
| 21 #import re | |
| 22 #set $genomefiles = [] | |
| 23 #for $genome in $genomes | |
| 24 #set $input_name = $re.sub('[^\w\-_.]', '_',str($genome.element_identifier.split('/')[-1])) | |
| 25 ln -s '${genome}' '${input_name}' && | |
| 26 $genomefiles.append($input_name) | |
| 27 #end for | |
| 28 ]]></token> | |
| 29 <token name="@GENOMES@"><![CDATA[ | |
| 30 -g | |
| 31 #for $genomefile in $genomefiles | |
| 32 '${genomefile}' | |
| 33 #end for | |
| 34 ]]></token> | |
| 35 | |
| 36 | |
| 37 <xml name="checkm_method"> | |
| 38 <param argument="--checkM_method" type="select" label="checkm method" optional="true"> | |
| 39 <option value="taxonomy_wf">taxonomy_wf (faster)</option> | |
| 40 <option value="lineage_wf">lineage_wf (more accurate)</option> | |
| 41 </param> | |
| 42 </xml> | |
| 43 <token name="@CHECKM_METHOD@"><![CDATA[ | |
| 44 #if $checkM_method: | |
| 45 --checkM_method $checkM_method | |
| 46 #end if | |
| 47 ]]></token> | |
| 48 | |
| 49 <xml name="filtering_options"> | |
| 50 <conditional name="filter"> | |
| 51 <param name="set_options" type="select" label="set filtering options"> | |
| 52 <option value="yes">Yes</option> | |
| 53 <option value="no" selected="true">No (use --checkM_method taxonomy_wf)</option> | |
| 54 </param> | |
| 55 <when value="yes"> | |
| 56 <param argument="--length" type="integer" value="50000" label="Minimum genome length"/> | |
| 57 <param argument="--completeness" type="integer" value="75" min="0" max="100" label="Minimum genome completeness percent"/> | |
| 58 <param argument="--contamination" type="integer" value="25" min="0" max="100" label="Maximum genome contamination percent"/> | |
| 59 | |
| 60 <conditional name="quality"> | |
| 61 <param argument="source" type="select" label="genome quality"> | |
| 62 <help> | |
| 63 --ignoreGenomeQuality is useful with | |
| 64 bacteriophages or eukaryotes or things where checkM | |
| 65 scoring does not work. Will only choose genomes based | |
| 66 on length and N50. | |
| 67 </help> | |
| 68 <option value="checkm" selected="true">Run checkM</option> | |
| 69 <option value="genomeInfo">User supplied genomeInfo csv file</option> | |
| 70 <option value="ignoreGenomeQuality">--ignoreGenomeQuality (NOT RECOMMENDED!)</option> | |
| 71 </param> | |
| 72 <when value="checkm"> | |
| 73 <param argument="--checkM_method" type="select" label="checkm method" optional="true"> | |
| 74 <help> | |
| 75 Using the checkm method of lineage_wf can require more than 40Gb of RAM. | |
| 76 </help> | |
| 77 <option value="taxonomy_wf">taxonomy_wf (faster)</option> | |
| 78 <option value="lineage_wf">lineage_wf (more accurate)</option> | |
| 79 </param> | |
| 80 </when> | |
| 81 <when value="genomeInfo"> | |
| 82 <param argument="--genomeInfo" type="data" format="csv" label="genomes fasta files"> | |
| 83 <help><![CDATA[ | |
| 84 A CSV dataset that must contain: [ | |
| 85 "genome"(history dataset name of .fasta dataset of that genome), | |
| 86 "completeness"(0-100 value for completeness of the genome), | |
| 87 "contamination"(0-100 value of the contamination of the genome)] | |
| 88 ]]></help> | |
| 89 </param> | |
| 90 </when> | |
| 91 <when value="ignoreGenomeQuality"/> | |
| 92 </conditional> | |
| 93 </when> | |
| 94 <when value="no"/> | |
| 95 </conditional> | |
| 96 </xml> | |
| 97 <token name="@FILTER_OPTIONS@"><![CDATA[ | |
| 98 #if $filter.set_options == 'yes': | |
| 99 --length $filter.length | |
| 100 --completeness $filter.completeness | |
| 101 --contamination $filter.contamination | |
| 102 #if $filter.quality.source == 'checkm' | |
| 103 --checkM_method $filter.quality.checkM_method | |
| 104 #elif $filter.quality.source == 'genomeInfo' | |
| 105 --genomeInfo $filter.quality.genomeInfo | |
| 106 #elif $filter.quality.source == 'ignoreGenomeQuality' | |
| 107 --ignoreGenomeQuality | |
| 108 #end if | |
| 109 #else | |
| 110 --checkM_method taxonomy_wf | |
| 111 #end if | |
| 112 ]]></token> | |
| 113 | |
| 114 <xml name="genome_comparison_options"> | |
| 115 <conditional name="genome_comparison"> | |
| 116 <param name="set_options" type="select" label="set genome comparison options"> | |
| 117 <option value="yes">Yes</option> | |
| 118 <option value="no" selected="true">No</option> | |
| 119 </param> | |
| 120 <when value="yes"> | |
| 121 <param argument="--MASH_sketch" type="integer" value="1000" label="MASH sketch size"/> | |
| 122 <param argument="--S_algorithm" type="select" label="Algorithm for secondary clustering comaprisons"> | |
| 123 <option value="ANImf" selected="true">ANImf = (RECOMMENDED) Align whole genomes with nucmer; filter alignment; compare aligned regions</option> | |
| 124 <option value="ANIn">ANIn = Align whole genomes with nucmer; compare aligned regions</option> | |
| 125 <option value="gANI">gANI = Identify and align ORFs; compare aligned ORFS</option> | |
| 126 </param> | |
| 127 <param argument="-n_PRESET" type="select" label="Presets to pass to nucmer"> | |
| 128 <option value="normal" selected="true">normal = default ANIn parameters (default: normal)</option> | |
| 129 <option value="tight">tight = only align highly conserved regions</option> | |
| 130 </param> | |
| 131 </when> | |
| 132 <when value="no"/> | |
| 133 </conditional> | |
| 134 </xml> | |
| 135 <token name="@GENOME_COMPARISON_OPTIONS@"><![CDATA[ | |
| 136 #if $genome_comparison.set_options == 'yes': | |
| 137 --MASH_sketch $genome_comparison.MASH_sketch | |
| 138 --S_algorithm $genome_comparison.S_algorithm | |
| 139 -n_PRESET $genome_comparison.n_PRESET | |
| 140 #end if | |
| 141 ]]></token> | |
| 142 | |
| 143 <xml name="clustering_options"> | |
| 144 <conditional name="clustering"> | |
| 145 <param name="set_options" type="select" label="set clustering options"> | |
| 146 <option value="yes">Yes</option> | |
| 147 <option value="no" selected="true">No</option> | |
| 148 </param> | |
| 149 <when value="yes"> | |
| 150 <param argument="--P_ani" type="float" value="0.9" min="0." max="1." label="ANI threshold to form primary (MASH) clusters"/> | |
| 151 <param argument="--S_ani" type="float" value="0.99" min="0." max="1." label="ANI threshold to form secondary clusters"/> | |
| 152 | |
| 153 <param argument="--SkipMash" type="boolean" truevalue="--SkipMash" falsevalue="" checked="false" label="Skip MASH clustering, just do secondary clustering on all genomes"/> | |
| 154 <param argument="--SkipSecondary" type="boolean" truevalue="--SkipSecondary" falsevalue="" checked="false" label="Skip secondary clustering, just perform MASH clustering"/> | |
| 155 <param argument="--cov_thresh" type="float" value="0.1" min="0." max="1." label="Minmum level of overlap between genomes when doing secondary comparisons"/> | |
| 156 <param argument="--coverage_method" type="select" label="Method to calculate coverage of an alignment"> | |
| 157 <help>(for ANIn/ANImf only; gANI can only do larger method)</help> | |
| 158 <option value="larger" selected="true">arger = max((aligned length / genome 1), (aligned_length / genome2))</option> | |
| 159 <option value="total">total = 2*(aligned length) / (sum of total genome lengths)</option> | |
| 160 </param> | |
| 161 <param argument="--clusterAlg" type="select" label="Algorithm used to cluster genomes"> | |
| 162 <help>(passed to scipy.cluster.hierarchy.linkage)</help> | |
| 163 <option value="average" selected="true">average</option> | |
| 164 </param> | |
| 165 </when> | |
| 166 <when value="no"/> | |
| 167 </conditional> | |
| 168 </xml> | |
| 169 <token name="@CLUSTERING_OPTIONS@"><![CDATA[ | |
| 170 #if $clustering.set_options == 'yes': | |
| 171 --P_ani $clustering.P_ani | |
| 172 --S_ani $clustering.S_ani | |
| 173 $clustering.SkipMash | |
| 174 $clustering.SkipSecondary | |
| 175 --cov_thresh $clustering.cov_thresh | |
| 176 --coverage_method $clustering.coverage_method | |
| 177 --clusterAlg $clustering.clusterAlg | |
| 178 #end if | |
| 179 ]]></token> | |
| 180 | |
| 181 <xml name="scoring_options"> | |
| 182 <conditional name="scoring"> | |
| 183 <param name="set_options" type="select" label="set scoring options"> | |
| 184 <option value="yes">Yes</option> | |
| 185 <option value="no" selected="true">No</option> | |
| 186 </param> | |
| 187 <when value="yes"> | |
| 188 <param argument="--completeness_weight" type="float" value="1" label="completeness weight"> | |
| 189 <help> | |
| 190 Based off of the formula: | |
| 191 A*Completeness - B*Contamination + C*(Contamination * (strain_heterogeneity/100)) + D*log(N50) + E*log(size) | |
| 192 A = completeness_weight; B = contamination_weight; C = strain_heterogeneity_weight; D = N50_weight; E = size_weight; | |
| 193 </help> | |
| 194 </param> | |
| 195 <param argument="--contamination_weight" type="float" value="5" label="contamination weight"/> | |
| 196 <param argument="--strain_heterogeneity_weight" type="float" value="1" min="0." max="1." label="strain heterogeneity weight"/> | |
| 197 <param argument="--N50_weight" type="float" value=".5" label="weight of log(genome N50)"/> | |
| 198 <param argument="--size_weight" type="float" value="0" label="weight of log(genome size)"/> | |
| 199 </when> | |
| 200 <when value="no"/> | |
| 201 </conditional> | |
| 202 </xml> | |
| 203 <token name="@SCORING_OPTIONS@"><![CDATA[ | |
| 204 #if $scoring.set_options == 'yes': | |
| 205 --completeness_weight $scoring.completeness_weight | |
| 206 --contamination_weight $scoring.contamination_weight | |
| 207 --strain_heterogeneity_weight $scoring.strain_heterogeneity_weight | |
| 208 --N50_weight $scoring.N50_weight | |
| 209 --size_weight $scoring.size_weight | |
| 210 #end if | |
| 211 ]]></token> | |
| 212 | |
| 213 <xml name="taxonomy_options"> | |
| 214 <conditional name="taxonomy"> | |
| 215 <param name="set_options" type="select" label="generate taxonomy information"> | |
| 216 <option value="yes">Yes</option> | |
| 217 <option value="no" selected="true">No</option> | |
| 218 </param> | |
| 219 <when value="yes"> | |
| 220 <param argument="--tax_method" type="select" label="Method of determining taxonomy"> | |
| 221 <help>(for ANIn/ANImf only; gANI can only do larger method)</help> | |
| 222 <option value="percent" selected="true">percent = The most descriptive taxonimic level with at least (per) hits</option> | |
| 223 <option value="max">max = The centrifuge taxonomic level with the most overall hits</option> | |
| 224 </param> | |
| 225 <param argument="--percent" type="float" value="50" min="0" max="100" label="minimum percent for percent method"/> | |
| 226 <param argument="--cent_index" type="data" format="" label="centrifuge index"/> | |
| 227 </when> | |
| 228 <when value="no"/> | |
| 229 </conditional> | |
| 230 </xml> | |
| 231 <token name="@TAXONOMY_OPTIONS@"><![CDATA[ | |
| 232 #if $taxonomy.set_options == 'yes': | |
| 233 --run_tax | |
| 234 --tax_method $taxonomy.tax_method | |
| 235 --percent $taxonomy.percent | |
| 236 --cent_index $taxonomy.cent_index | |
| 237 #end if | |
| 238 ]]></token> | |
| 239 | |
| 240 <xml name="warning_options"> | |
| 241 <conditional name="warning"> | |
| 242 <param name="set_options" type="select" label="set warning options"> | |
| 243 <option value="yes">Yes</option> | |
| 244 <option value="no" selected="true">No</option> | |
| 245 </param> | |
| 246 <when value="yes"> | |
| 247 <param argument="--warn_dist" type="float" value="0.25" min="0" max="1" label="How far from the threshold to throw cluster warnings"/> | |
| 248 <param argument="--warn_sim" type="float" value="0.98" min="0" max="1" label="Similarity threshold for warnings between dereplicated genomes"/> | |
| 249 <param argument="--warn_aln" type="float" value="0.25" min="0" max="1" label="Minimum aligned fraction for warnings between dereplicated genomes (ANIn)"/> | |
| 250 </when> | |
| 251 <when value="no"/> | |
| 252 </conditional> | |
| 253 </xml> | |
| 254 <token name="@WARNING_OPTIONS@"><![CDATA[ | |
| 255 #if $warning.set_options == 'yes': | |
| 256 --warn_dist $warning.warn_dist | |
| 257 --warn_sim $warning.warn_sim | |
| 258 --warn_aln $warning.warn_aln | |
| 259 #end if | |
| 260 ]]></token> | |
| 261 | |
| 262 <xml name="select_outputs"> | |
| 263 <param name="select_outputs" type="select" multiple="true" optional="false" label="Select outputs"> | |
| 264 <option value="log" selected="true">log</option> | |
| 265 <option value="warnings" selected="true">Warnings</option> | |
| 266 <option value="Primary_clustering_dendrogram" selected="true">Primary_clustering_dendrogram.pdf</option> | |
| 267 <option value="Secondary_clustering_dendrograms">Secondary_clustering_dendrograms.pdf</option> | |
| 268 <option value="Secondary_clustering_MDS">Secondary_clustering_MDS.pdf</option> | |
| 269 <option value="Clustering_scatterplots" selected="true">Clustering_scatterplots.pdf</option> | |
| 270 <yield/> | |
| 271 </param> | |
| 272 </xml> | |
| 273 <xml name="select_drep_outputs"> | |
| 274 <expand macro="select_outputs"> | |
| 275 <option value="Cluster_scoring">Cluster_scoring.pdf</option> | |
| 276 <option value="Winning_genomes">Winning_genomes.pdf</option> | |
| 277 <option value="Widb">Widb.csv</option> | |
| 278 <option value="Chdb">Chdb.tsv</option> | |
| 279 </expand> | |
| 280 </xml> | |
| 281 | |
| 282 <xml name="common_outputs"> | |
| 283 <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="outdir/log/logger.log"> | |
| 284 <filter>'log' in select_outputs or not select_outputs</filter> | |
| 285 </data> | |
| 286 <data name="warnings" format="txt" label="${tool.name} on ${on_string}: Warnings" from_work_dir="outdir/log/warnings.txt"> | |
| 287 <filter>'warnings' in select_outputs</filter> | |
| 288 </data> | |
| 289 <data name="Primary_clustering_dendrogram" format="pdf" label="${tool.name} on ${on_string}: Primary_clustering_dendrogram.pdf" from_work_dir="outdir/figures/Primary_clustering_dendrogram.pdf"> | |
| 290 <filter>'Primary_clustering_dendrogram' in select_outputs</filter> | |
| 291 </data> | |
| 292 <data name="Secondary_clustering_dendrograms" format="pdf" label="${tool.name} on ${on_string}: Secondary_clustering_dendrograms.pdf" from_work_dir="outdir/figures/Secondary_clustering_dendrograms.pdf"> | |
| 293 <filter>'Secondary_clustering_dendrograms' in select_outputs</filter> | |
| 294 </data> | |
| 295 <data name="Secondary_clustering_MDS" format="pdf" label="${tool.name} on ${on_string}: Secondary_clustering_MDS.pdf" from_work_dir="outdir/figures/Secondary_clustering_MDS.pdf"> | |
| 296 <filter>'Secondary_clustering_MDS' in select_outputs</filter> | |
| 297 </data> | |
| 298 <data name="Clustering_scatterplots" format="pdf" label="${tool.name} on ${on_string}: Clustering_scatterplots.pdf" from_work_dir="outdir/figures/Clustering_scatterplots.pdf"> | |
| 299 <filter>'Clustering_scatterplots' in select_outputs</filter> | |
| 300 </data> | |
| 301 </xml> | |
| 302 | |
| 303 | |
| 304 <xml name="drep_outputs"> | |
| 305 <expand macro="common_outputs"/> | |
| 306 <data name="Cluster_scoring" format="pdf" label="${tool.name} on ${on_string}: Cluster_scoring.pdf" from_work_dir="outdir/figures/Cluster_scoring.pdf"> | |
| 307 <filter>'Cluster_scoring' in select_outputs</filter> | |
| 308 </data> | |
| 309 <data name="Winning_genomes" format="pdf" label="${tool.name} on ${on_string}: Winning_genomes.pdf" from_work_dir="outdir/figures/Winning_genomes.pdf"> | |
| 310 <filter>'Winning_genomes' in select_outputs</filter> | |
| 311 </data> | |
| 312 <data name="Widb" format="csv" label="${tool.name} on ${on_string}: Widb.csv" from_work_dir="outdir/data_tables/Widb.csv"> | |
| 313 <filter>'Widb' in select_outputs</filter> | |
| 314 </data> | |
| 315 <data name="Chdb" format="tabular" label="${tool.name} on ${on_string}: Chdb.tsv" from_work_dir="outdir/data/checkM/checkM_outdir/Chdb.tsv"> | |
| 316 <filter>'Chdb' in select_outputs</filter> | |
| 317 </data> | |
| 318 </xml> | |
| 319 | |
| 320 | |
| 321 <xml name="test_defaults_log"> | |
| 322 <test> | |
| 323 <param name="genomes" ftype="fasta" value="Enterococcus_casseliflavus_EC20.fasta,Enterococcus_faecalis_T2.fna,Enterococcus_faecalis_TX0104.fa"/> | |
| 324 <output name="log"> | |
| 325 <assert_contents> | |
| 326 <yield/> | |
| 327 </assert_contents> | |
| 328 </output> | |
| 329 </test> | |
| 330 </xml> | |
| 331 | |
| 332 <token name="@GENOMES_HELP@"><![CDATA[ | |
| 333 I/O PARAMETERS: | |
| 334 -g [GENOMES [GENOMES ...]], --genomes [GENOMES [GENOMES ...]] | |
| 335 genomes to cluster in .fasta format | |
| 336 (default: None) | |
| 337 | |
| 338 | |
| 339 ]]></token> | |
| 340 | |
| 341 <token name="@FILTERING_HELP@"><![CDATA[ | |
| 342 FILTERING OPTIONS: | |
| 343 -l LENGTH, --length LENGTH | |
| 344 Minimum genome length | |
| 345 (default: 50000) | |
| 346 | |
| 347 | |
| 348 -comp COMPLETENESS, --completeness COMPLETENESS | |
| 349 Minumum genome completeness | |
| 350 (default: 75) | |
| 351 | |
| 352 | |
| 353 -con CONTAMINATION, --contamination CONTAMINATION | |
| 354 Maximum genome contamination | |
| 355 (default: 25) | |
| 356 | |
| 357 | |
| 358 --ignoreGenomeQuality | |
| 359 Don't run checkM or do any quality filtering. NOT | |
| 360 RECOMMENDED! This is useful for use with | |
| 361 bacteriophages or eukaryotes or things where checkM | |
| 362 scoring does not work. Will only choose genomes based | |
| 363 on length and N50 (default: False) | |
| 364 | |
| 365 | |
| 366 ]]></token> | |
| 367 | |
| 368 <token name="@GENOME_COMPARISON_HELP@"><![CDATA[ | |
| 369 GENOME COMPARISON PARAMETERS: | |
| 370 -ms MASH_SKETCH, --MASH_sketch MASH_SKETCH | |
| 371 MASH sketch size (default: 1000) | |
| 372 | |
| 373 --S_algorithm {goANI,ANIn,ANImf,gANI} | |
| 374 Algorithm for secondary clustering comaprisons: | |
| 375 ANImf = (RECOMMENDED) Align whole genomes with nucmer; filter alignment; compare aligned regions | |
| 376 ANIn = Align whole genomes with nucmer; compare aligned regions | |
| 377 gANI = Identify and align ORFs; compare aligned ORFS | |
| 378 (default: ANImf) | |
| 379 | |
| 380 -n_PRESET {normal,tight} | |
| 381 Presets to pass to nucmer | |
| 382 tight = only align highly conserved regions | |
| 383 normal = default ANIn parameters (default: normal) | |
| 384 | |
| 385 ]]></token> | |
| 386 | |
| 387 <token name="@CLUSTERING_HELP@"><![CDATA[ | |
| 388 CLUSTERING PARAMETERS: | |
| 389 -pa P_ANI, --P_ani P_ANI | |
| 390 ANI threshold to form primary (MASH) clusters | |
| 391 (default: 0.9) | |
| 392 -sa S_ANI, --S_ani S_ANI | |
| 393 ANI threshold to form secondary clusters | |
| 394 (default: 0.99) | |
| 395 | |
| 396 --SkipMash Skip MASH clustering, just do secondary clustering on | |
| 397 all genomes (default: False) | |
| 398 --SkipSecondary Skip secondary clustering, just perform MASH clustering | |
| 399 (default: False) | |
| 400 | |
| 401 -nc COV_THRESH, --cov_thresh COV_THRESH | |
| 402 Minmum level of overlap between genomes when doing | |
| 403 secondary comparisons (default: 0.1) | |
| 404 -cm {total,larger}, --coverage_method {total,larger} | |
| 405 Method to calculate coverage of an alignment | |
| 406 (for ANIn/ANImf only; gANI can only do larger method) | |
| 407 total = 2*(aligned length) / (sum of total genome lengths) | |
| 408 larger = max((aligned length / genome 1), (aligned_length / genome2)) | |
| 409 (default: larger) | |
| 410 | |
| 411 --clusterAlg CLUSTERALG | |
| 412 Algorithm used to cluster genomes (passed to | |
| 413 scipy.cluster.hierarchy.linkage (default: average) | |
| 414 | |
| 415 ]]></token> | |
| 416 | |
| 417 <token name="@SCORING_HELP@"><![CDATA[ | |
| 418 SCORING CRITERIA | |
| 419 Based off of the formula: | |
| 420 A*Completeness - B*Contamination + C*(Contamination * (strain_heterogeneity/100)) + D*log(N50) + E*log(size) | |
| 421 | |
| 422 A = completeness_weight; B = contamination_weight; C = strain_heterogeneity_weight; D = N50_weight; E = size_weight: | |
| 423 -comW COMPLETENESS_WEIGHT, --completeness_weight COMPLETENESS_WEIGHT | |
| 424 completeness weight (default: 1) | |
| 425 -conW CONTAMINATION_WEIGHT, --contamination_weight CONTAMINATION_WEIGHT | |
| 426 contamination weight (default: 5) | |
| 427 -strW STRAIN_HETEROGENEITY_WEIGHT, --strain_heterogeneity_weight STRAIN_HETEROGENEITY_WEIGHT | |
| 428 strain heterogeneity weight (default: 1) | |
| 429 -N50W N50_WEIGHT, --N50_weight N50_WEIGHT | |
| 430 weight of log(genome N50) (default: 0.5) | |
| 431 -sizeW SIZE_WEIGHT, --size_weight SIZE_WEIGHT | |
| 432 weight of log(genome size) (default: 0) | |
| 433 | |
| 434 | |
| 435 ]]></token> | |
| 436 | |
| 437 <token name="@TAXONOMY_HELP@"><![CDATA[ | |
| 438 TAXONOMY: | |
| 439 --run_tax generate taxonomy information (Tdb) | |
| 440 (default: False) | |
| 441 | |
| 442 --tax_method {percent,max} | |
| 443 Method of determining taxonomy | |
| 444 percent = The most descriptive taxonimic level with at least (per) hits | |
| 445 max = The centrifuge taxonomic level with the most overall hits | |
| 446 (default: percent) | |
| 447 | |
| 448 | |
| 449 -per PERCENT, --percent PERCENT | |
| 450 minimum percent for percent method | |
| 451 (default: 50) | |
| 452 | |
| 453 | |
| 454 --cent_index CENT_INDEX | |
| 455 path to centrifuge index (for example, | |
| 456 /home/mattolm/download/centrifuge/indices/b+h+v | |
| 457 (default: None) | |
| 458 | |
| 459 ]]></token> | |
| 460 | |
| 461 <token name="@WARNINGS_HELP@"><![CDATA[ | |
| 462 WARNINGS: | |
| 463 --warn_dist WARN_DIST | |
| 464 How far from the threshold to throw cluster warnings | |
| 465 (default: 0.25) | |
| 466 --warn_sim WARN_SIM Similarity threshold for warnings between dereplicated | |
| 467 genomes (default: 0.98) | |
| 468 --warn_aln WARN_ALN Minimum aligned fraction for warnings between | |
| 469 dereplicated genomes (ANIn) (default: 0.25) | |
| 470 | |
| 471 ]]></token> | |
| 472 | |
| 473 | |
| 474 </macros> |
