Mercurial > repos > jjohnson > drep
comparison macros.xml @ 0:cb142f79f424 draft default tip
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/drep commit b155a1d533b7317ceb0ec642ffe3e986117df539"
| author | jjohnson |
|---|---|
| date | Mon, 06 Jan 2020 15:37:18 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:cb142f79f424 |
|---|---|
| 1 <macros> | |
| 2 <token name="@VERSION@">2.3.2</token> | |
| 3 <xml name="requirements"> | |
| 4 <requirements> | |
| 5 <requirement type="package" version="@VERSION@">drep</requirement> | |
| 6 <yield/> | |
| 7 </requirements> | |
| 8 </xml> | |
| 9 <xml name="citations"> | |
| 10 <citations> | |
| 11 <citation type="doi">10.1038/ismej.2017.126</citation> | |
| 12 <yield /> | |
| 13 </citations> | |
| 14 </xml> | |
| 15 | |
| 16 | |
| 17 <xml name="genomes"> | |
| 18 <param argument="--genomes" type="data" format="fasta" label="genomes fasta files" multiple="true"/> | |
| 19 </xml> | |
| 20 <token name="@PREPARE_GENOMES@"><![CDATA[ | |
| 21 #import re | |
| 22 #set $genomefiles = [] | |
| 23 #for $genome in $genomes | |
| 24 #set $input_name = $re.sub('[^\w\-_.]', '_',str($genome.element_identifier.split('/')[-1])) | |
| 25 ln -s '${genome}' '${input_name}' && | |
| 26 $genomefiles.append($input_name) | |
| 27 #end for | |
| 28 ]]></token> | |
| 29 <token name="@GENOMES@"><![CDATA[ | |
| 30 -g | |
| 31 #for $genomefile in $genomefiles | |
| 32 '${genomefile}' | |
| 33 #end for | |
| 34 ]]></token> | |
| 35 | |
| 36 | |
| 37 <xml name="checkm_method"> | |
| 38 <param argument="--checkM_method" type="select" label="checkm method" optional="true"> | |
| 39 <option value="lineage_wf">lineage_wf (more accurate)</option> | |
| 40 <option value="taxonomy_wf">taxonomy_wf (faster)</option> | |
| 41 </param> | |
| 42 </xml> | |
| 43 <token name="@CHECKM_METHOD@"><![CDATA[ | |
| 44 #if $checkM_method: | |
| 45 --checkM_method $checkM_method | |
| 46 #end if | |
| 47 ]]></token> | |
| 48 | |
| 49 <xml name="filtering_options"> | |
| 50 <conditional name="filter"> | |
| 51 <param name="set_options" type="select" label="set filtering options"> | |
| 52 <option value="yes">Yes</option> | |
| 53 <option value="no" selected="true">No</option> | |
| 54 </param> | |
| 55 <when value="yes"> | |
| 56 <param argument="--length" type="integer" value="50000" label="Minimum genome length"/> | |
| 57 <param argument="--completeness" type="integer" value="75" min="0" max="100" label="Minimum genome completeness percent"/> | |
| 58 <param argument="--contamination" type="integer" value="25" min="0" max="100" label="Maximum genome contamination percent"/> | |
| 59 | |
| 60 <conditional name="quality"> | |
| 61 <param argument="source" type="select" label="genome quality"> | |
| 62 <help> | |
| 63 --ignoreGenomeQuality is useful with | |
| 64 bacteriophages or eukaryotes or things where checkM | |
| 65 scoring does not work. Will only choose genomes based | |
| 66 on length and N50. | |
| 67 </help> | |
| 68 <option value="checkm" selected="true">Run checkM</option> | |
| 69 <option value="genomeInfo">User supplied genomeInfo csv file</option> | |
| 70 <option value="ignoreGenomeQuality">--ignoreGenomeQuality (NOT RECOMMENDED!)</option> | |
| 71 </param> | |
| 72 <when value="checkm"> | |
| 73 <param argument="--checkM_method" type="select" label="checkm method" optional="true"> | |
| 74 <option value="lineage_wf">lineage_wf (more accurate)</option> | |
| 75 <option value="taxonomy_wf">taxonomy_wf (faster)</option> | |
| 76 </param> | |
| 77 </when> | |
| 78 <when value="genomeInfo"> | |
| 79 <param argument="--genomeInfo" type="data" format="csv" label="genomes fasta files"> | |
| 80 <help><![CDATA[ | |
| 81 A CSV dataset that must contain: [ | |
| 82 "genome"(history dataset name of .fasta dataset of that genome), | |
| 83 "completeness"(0-100 value for completeness of the genome), | |
| 84 "contamination"(0-100 value of the contamination of the genome)] | |
| 85 ]]></help> | |
| 86 </param> | |
| 87 </when> | |
| 88 <when value="ignoreGenomeQuality"/> | |
| 89 </conditional> | |
| 90 | |
| 91 </when> | |
| 92 <when value="no"/> | |
| 93 </conditional> | |
| 94 </xml> | |
| 95 <token name="@FILTER_OPTIONS@"><![CDATA[ | |
| 96 #if $filter.set_options == 'yes': | |
| 97 --length $filter.length | |
| 98 --completeness $filter.completeness | |
| 99 --contamination $filter.contamination | |
| 100 #if $filter.quality.source == 'checkm' | |
| 101 --checkM_method $filter.quality.checkM_method | |
| 102 #elif $filter.quality.source == 'genomeInfo' | |
| 103 --genomeInfo $filter.quality.genomeInfo | |
| 104 #elif $filter.quality.source == 'ignoreGenomeQuality' | |
| 105 --ignoreGenomeQuality | |
| 106 #end if | |
| 107 #end if | |
| 108 ]]></token> | |
| 109 | |
| 110 <xml name="genome_comparison_options"> | |
| 111 <conditional name="genome_comparison"> | |
| 112 <param name="set_options" type="select" label="set genome comparison options"> | |
| 113 <option value="yes">Yes</option> | |
| 114 <option value="no" selected="true">No</option> | |
| 115 </param> | |
| 116 <when value="yes"> | |
| 117 <param argument="--MASH_sketch" type="integer" value="1000" label="MASH sketch size"/> | |
| 118 <param argument="--S_algorithm" type="select" label="Algorithm for secondary clustering comaprisons"> | |
| 119 <option value="ANImf" selected="true">ANImf = (RECOMMENDED) Align whole genomes with nucmer; filter alignment; compare aligned regions</option> | |
| 120 <option value="ANIn">ANIn = Align whole genomes with nucmer; compare aligned regions</option> | |
| 121 <option value="gANI">gANI = Identify and align ORFs; compare aligned ORFS</option> | |
| 122 </param> | |
| 123 <param argument="-n_PRESET" type="select" label="Presets to pass to nucmer"> | |
| 124 <option value="normal" selected="true">normal = default ANIn parameters (default: normal)</option> | |
| 125 <option value="tight">tight = only align highly conserved regions</option> | |
| 126 </param> | |
| 127 </when> | |
| 128 <when value="no"/> | |
| 129 </conditional> | |
| 130 </xml> | |
| 131 <token name="@GENOME_COMPARISON_OPTIONS@"><![CDATA[ | |
| 132 #if $genome_comparison.set_options == 'yes': | |
| 133 --MASH_sketch $genome_comparison.MASH_sketch | |
| 134 --S_algorithm $genome_comparison.S_algorithm | |
| 135 -n_PRESET $genome_comparison.n_PRESET | |
| 136 #end if | |
| 137 ]]></token> | |
| 138 | |
| 139 <xml name="clustering_options"> | |
| 140 <conditional name="clustering"> | |
| 141 <param name="set_options" type="select" label="set clustering options"> | |
| 142 <option value="yes">Yes</option> | |
| 143 <option value="no" selected="true">No</option> | |
| 144 </param> | |
| 145 <when value="yes"> | |
| 146 <param argument="--P_ani" type="float" value="0.9" min="0." max="1." label="ANI threshold to form primary (MASH) clusters"/> | |
| 147 <param argument="--S_ani" type="float" value="0.99" min="0." max="1." label="ANI threshold to form secondary clusters"/> | |
| 148 | |
| 149 <param argument="--SkipMash" type="boolean" truevalue="--SkipMash" falsevalue="" checked="false" label="Skip MASH clustering, just do secondary clustering on all genomes"/> | |
| 150 <param argument="--SkipSecondary" type="boolean" truevalue="--SkipSecondary" falsevalue="" checked="false" label="Skip secondary clustering, just perform MASH clustering"/> | |
| 151 <param argument="--cov_thresh" type="float" value="0.1" min="0." max="1." label="Minmum level of overlap between genomes when doing secondary comparisons"/> | |
| 152 <param argument="--coverage_method" type="select" label="Method to calculate coverage of an alignment"> | |
| 153 <help>(for ANIn/ANImf only; gANI can only do larger method)</help> | |
| 154 <option value="larger" selected="true">arger = max((aligned length / genome 1), (aligned_length / genome2))</option> | |
| 155 <option value="total">total = 2*(aligned length) / (sum of total genome lengths)</option> | |
| 156 </param> | |
| 157 <param argument="--clusterAlg" type="select" label="Algorithm used to cluster genomes"> | |
| 158 <help>(passed to scipy.cluster.hierarchy.linkage)</help> | |
| 159 <option value="average" selected="true">average</option> | |
| 160 </param> | |
| 161 </when> | |
| 162 <when value="no"/> | |
| 163 </conditional> | |
| 164 </xml> | |
| 165 <token name="@CLUSTERING_OPTIONS@"><![CDATA[ | |
| 166 #if $clustering.set_options == 'yes': | |
| 167 --P_ani $clustering.P_ani | |
| 168 --S_ani $clustering.S_ani | |
| 169 $clustering.SkipMash | |
| 170 $clustering.SkipSecondary | |
| 171 --cov_thresh $clustering.cov_thresh | |
| 172 --coverage_method $clustering.coverage_method | |
| 173 --clusterAlg $clustering.clusterAlg | |
| 174 #end if | |
| 175 ]]></token> | |
| 176 | |
| 177 <xml name="scoring_options"> | |
| 178 <conditional name="scoring"> | |
| 179 <param name="set_options" type="select" label="set scoring options"> | |
| 180 <option value="yes">Yes</option> | |
| 181 <option value="no" selected="true">No</option> | |
| 182 </param> | |
| 183 <when value="yes"> | |
| 184 <param argument="--completeness_weight" type="float" value="1" label="completeness weight"> | |
| 185 <help> | |
| 186 Based off of the formula: | |
| 187 A*Completeness - B*Contamination + C*(Contamination * (strain_heterogeneity/100)) + D*log(N50) + E*log(size) | |
| 188 A = completeness_weight; B = contamination_weight; C = strain_heterogeneity_weight; D = N50_weight; E = size_weight; | |
| 189 </help> | |
| 190 </param> | |
| 191 <param argument="--contamination_weight" type="float" value="5" label="contamination weight"/> | |
| 192 <param argument="--strain_heterogeneity_weight" type="float" value="1" min="0." max="1." label="strain heterogeneity weight"/> | |
| 193 <param argument="--N50_weight" type="float" value=".5" label="weight of log(genome N50)"/> | |
| 194 <param argument="--size_weight" type="float" value="0" label="weight of log(genome size)"/> | |
| 195 </when> | |
| 196 <when value="no"/> | |
| 197 </conditional> | |
| 198 </xml> | |
| 199 <token name="@SCORING_OPTIONS@"><![CDATA[ | |
| 200 #if $scoring.set_options == 'yes': | |
| 201 --completeness_weight $scoring.completeness_weight | |
| 202 --contamination_weight $scoring.contamination_weight | |
| 203 --strain_heterogeneity_weight $scoring.strain_heterogeneity_weight | |
| 204 --N50_weight $scoring.N50_weight | |
| 205 --size_weight $scoring.size_weight | |
| 206 #end if | |
| 207 ]]></token> | |
| 208 | |
| 209 <xml name="taxonomy_options"> | |
| 210 <conditional name="taxonomy"> | |
| 211 <param name="set_options" type="select" label="generate taxonomy information"> | |
| 212 <option value="yes">Yes</option> | |
| 213 <option value="no" selected="true">No</option> | |
| 214 </param> | |
| 215 <when value="yes"> | |
| 216 <param argument="--tax_method" type="select" label="Method of determining taxonomy"> | |
| 217 <help>(for ANIn/ANImf only; gANI can only do larger method)</help> | |
| 218 <option value="percent" selected="true">percent = The most descriptive taxonimic level with at least (per) hits</option> | |
| 219 <option value="max">max = The centrifuge taxonomic level with the most overall hits</option> | |
| 220 </param> | |
| 221 <param argument="--percent" type="float" value="50" min="0" max="100" label="minimum percent for percent method"/> | |
| 222 <param argument="--cent_index" type="data" format="" label="centrifuge index"/> | |
| 223 </when> | |
| 224 <when value="no"/> | |
| 225 </conditional> | |
| 226 </xml> | |
| 227 <token name="@TAXONOMY_OPTIONS@"><![CDATA[ | |
| 228 #if $taxonomy.set_options == 'yes': | |
| 229 --run_tax | |
| 230 --tax_method $taxonomy.tax_method | |
| 231 --percent $taxonomy.percent | |
| 232 --cent_index $taxonomy.cent_index | |
| 233 #end if | |
| 234 ]]></token> | |
| 235 | |
| 236 <xml name="warning_options"> | |
| 237 <conditional name="warning"> | |
| 238 <param name="set_options" type="select" label="set warning options"> | |
| 239 <option value="yes">Yes</option> | |
| 240 <option value="no" selected="true">No</option> | |
| 241 </param> | |
| 242 <when value="yes"> | |
| 243 <param argument="--warn_dist" type="float" value="0.25" min="0" max="1" label="How far from the threshold to throw cluster warnings"/> | |
| 244 <param argument="--warn_sim" type="float" value="0.98" min="0" max="1" label="Similarity threshold for warnings between dereplicated genomes"/> | |
| 245 <param argument="--warn_aln" type="float" value="0.25" min="0" max="1" label="Minimum aligned fraction for warnings between dereplicated genomes (ANIn)"/> | |
| 246 </when> | |
| 247 <when value="no"/> | |
| 248 </conditional> | |
| 249 </xml> | |
| 250 <token name="@WARNING_OPTIONS@"><![CDATA[ | |
| 251 #if $warning.set_options == 'yes': | |
| 252 --warn_dist $warning.warn_dist | |
| 253 --warn_sim $warning.warn_sim | |
| 254 --warn_aln $warning.warn_aln | |
| 255 #end if | |
| 256 ]]></token> | |
| 257 | |
| 258 <xml name="select_outputs"> | |
| 259 </xml> | |
| 260 | |
| 261 <xml name="common_outputs"> | |
| 262 <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="outdir/log/logger.log"/> | |
| 263 <data name="warnings" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="outdir/log/warnings.txt"/> | |
| 264 <data name="Primary_clustering_dendrogram" format="pdf" label="${tool.name} on ${on_string}: Primary_clustering_dendrogram.pdf" from_work_dir="outdir/figures/Primary_clustering_dendrogram.pdf"/> | |
| 265 <data name="Secondary_clustering_dendrograms" format="pdf" label="${tool.name} on ${on_string}: Secondary_clustering_dendrograms.pdf" from_work_dir="outdir/figures/Secondary_clustering_dendrograms.pdf"/> | |
| 266 <data name="Secondary_clustering_MDS" format="pdf" label="${tool.name} on ${on_string}: Secondary_clustering_MDS.pdf" from_work_dir="outdir/figures/Secondary_clustering_MDS.pdf"/> | |
| 267 <data name="Clustering_scatterplots" format="pdf" label="${tool.name} on ${on_string}: Clustering_scatterplots.pdf" from_work_dir="outdir/figures/Clustering_scatterplots.pdf"/> | |
| 268 </xml> | |
| 269 <xml name="common_outputs2"> | |
| 270 </xml> | |
| 271 | |
| 272 <token name="@GENOMES_HELP@"><![CDATA[ | |
| 273 I/O PARAMETERS: | |
| 274 -g [GENOMES [GENOMES ...]], --genomes [GENOMES [GENOMES ...]] | |
| 275 genomes to cluster in .fasta format (default: None) | |
| 276 ]]></token> | |
| 277 | |
| 278 <token name="@FILTERING_HELP@"><![CDATA[ | |
| 279 FILTERING OPTIONS: | |
| 280 -l LENGTH, --length LENGTH | |
| 281 Minimum genome length (default: 50000) | |
| 282 -comp COMPLETENESS, --completeness COMPLETENESS | |
| 283 Minumum genome completeness (default: 75) | |
| 284 -con CONTAMINATION, --contamination CONTAMINATION | |
| 285 Maximum genome contamination (default: 25) | |
| 286 --ignoreGenomeQuality | |
| 287 Don't run checkM or do any quality filtering. NOT | |
| 288 RECOMMENDED! This is useful for use with | |
| 289 bacteriophages or eukaryotes or things where checkM | |
| 290 scoring does not work. Will only choose genomes based | |
| 291 on length and N50 (default: False) | |
| 292 | |
| 293 | |
| 294 ]]></token> | |
| 295 | |
| 296 <token name="@GENOME_COMPARISON_HELP@"><![CDATA[ | |
| 297 GENOME COMPARISON PARAMETERS: | |
| 298 -ms MASH_SKETCH, --MASH_sketch MASH_SKETCH | |
| 299 MASH sketch size (default: 1000) | |
| 300 --S_algorithm {goANI,ANIn,ANImf,gANI} | |
| 301 Algorithm for secondary clustering comaprisons: | |
| 302 ANImf = (RECOMMENDED) Align whole genomes with nucmer; filter alignment; compare aligned regions | |
| 303 ANIn = Align whole genomes with nucmer; compare aligned regions | |
| 304 gANI = Identify and align ORFs; compare aligned ORFS | |
| 305 (default: ANImf) | |
| 306 -n_PRESET {normal,tight} | |
| 307 Presets to pass to nucmer | |
| 308 tight = only align highly conserved regions | |
| 309 normal = default ANIn parameters (default: normal) | |
| 310 | |
| 311 ]]></token> | |
| 312 | |
| 313 <token name="@CLUSTERING_HELP@"><![CDATA[ | |
| 314 CLUSTERING PARAMETERS: | |
| 315 -pa P_ANI, --P_ani P_ANI | |
| 316 ANI threshold to form primary (MASH) clusters | |
| 317 (default: 0.9) | |
| 318 -sa S_ANI, --S_ani S_ANI | |
| 319 ANI threshold to form secondary clusters (default: | |
| 320 0.99) | |
| 321 --SkipMash Skip MASH clustering, just do secondary clustering on | |
| 322 all genomes (default: False) | |
| 323 --SkipSecondary Skip secondary clustering, just perform MASH | |
| 324 clustering (default: False) | |
| 325 -nc COV_THRESH, --cov_thresh COV_THRESH | |
| 326 Minmum level of overlap between genomes when doing | |
| 327 secondary comparisons (default: 0.1) | |
| 328 -cm {total,larger}, --coverage_method {total,larger} | |
| 329 Method to calculate coverage of an alignment | |
| 330 (for ANIn/ANImf only; gANI can only do larger method) | |
| 331 total = 2*(aligned length) / (sum of total genome lengths) | |
| 332 larger = max((aligned length / genome 1), (aligned_length / genome2)) | |
| 333 (default: larger) | |
| 334 --clusterAlg CLUSTERALG | |
| 335 Algorithm used to cluster genomes (passed to | |
| 336 scipy.cluster.hierarchy.linkage (default: average) | |
| 337 | |
| 338 ]]></token> | |
| 339 | |
| 340 <token name="@SCORING_HELP@"><![CDATA[ | |
| 341 SCORING CRITERIA | |
| 342 Based off of the formula: | |
| 343 A*Completeness - B*Contamination + C*(Contamination * (strain_heterogeneity/100)) + D*log(N50) + E*log(size) | |
| 344 | |
| 345 A = completeness_weight; B = contamination_weight; C = strain_heterogeneity_weight; D = N50_weight; E = size_weight: | |
| 346 -comW COMPLETENESS_WEIGHT, --completeness_weight COMPLETENESS_WEIGHT | |
| 347 completeness weight (default: 1) | |
| 348 -conW CONTAMINATION_WEIGHT, --contamination_weight CONTAMINATION_WEIGHT | |
| 349 contamination weight (default: 5) | |
| 350 -strW STRAIN_HETEROGENEITY_WEIGHT, --strain_heterogeneity_weight STRAIN_HETEROGENEITY_WEIGHT | |
| 351 strain heterogeneity weight (default: 1) | |
| 352 -N50W N50_WEIGHT, --N50_weight N50_WEIGHT | |
| 353 weight of log(genome N50) (default: 0.5) | |
| 354 -sizeW SIZE_WEIGHT, --size_weight SIZE_WEIGHT | |
| 355 weight of log(genome size) (default: 0) | |
| 356 | |
| 357 ]]></token> | |
| 358 | |
| 359 <token name="@TAXONOMY_HELP@"><![CDATA[ | |
| 360 TAXONOMY: | |
| 361 --run_tax generate taxonomy information (Tdb) (default: False) | |
| 362 --tax_method {percent,max} | |
| 363 Method of determining taxonomy | |
| 364 percent = The most descriptive taxonimic level with at least (per) hits | |
| 365 max = The centrifuge taxonomic level with the most overall hits (default: percent) | |
| 366 -per PERCENT, --percent PERCENT | |
| 367 minimum percent for percent method (default: 50) | |
| 368 --cent_index CENT_INDEX | |
| 369 path to centrifuge index (for example, | |
| 370 /home/mattolm/download/centrifuge/indices/b+h+v | |
| 371 (default: None) | |
| 372 | |
| 373 ]]></token> | |
| 374 | |
| 375 <token name="@WARNINGS_HELP@"><![CDATA[ | |
| 376 WARNINGS: | |
| 377 --warn_dist WARN_DIST | |
| 378 How far from the threshold to throw cluster warnings | |
| 379 (default: 0.25) | |
| 380 --warn_sim WARN_SIM Similarity threshold for warnings between dereplicated | |
| 381 genomes (default: 0.98) | |
| 382 --warn_aln WARN_ALN Minimum aligned fraction for warnings between | |
| 383 dereplicated genomes (ANIn) (default: 0.25) | |
| 384 | |
| 385 ]]></token> | |
| 386 | |
| 387 | |
| 388 </macros> |
