Mercurial > repos > iuc > cat_prepare
comparison macros.xml @ 0:95f0873faec1 draft
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cat commit 863ad85836c80811d1d6b82eaf3ce903b273368a"
| author | iuc |
|---|---|
| date | Tue, 10 Dec 2019 21:03:35 +0000 |
| parents | |
| children | a94ddb3954ff |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:95f0873faec1 |
|---|---|
| 1 <macros> | |
| 2 <token name="@VERSION@">5.0.3</token> | |
| 3 <xml name="requirements"> | |
| 4 <requirements> | |
| 5 <requirement type="package" version="@VERSION@">cat</requirement> | |
| 6 <yield/> | |
| 7 </requirements> | |
| 8 </xml> | |
| 9 <xml name="version_command"> | |
| 10 <version_command><![CDATA[CAT --version]]></version_command> | |
| 11 </xml> | |
| 12 <token name="@DATABASE_FOLDER@">CAT_database</token> | |
| 13 <token name="@TAXONOMY_FOLDER@">taxonomy</token> | |
| 14 <xml name="cat_db"> | |
| 15 <conditional name="db"> | |
| 16 <param name="db_src" type="select" label="CAT database (--database_folder,--taxonomy_folder) from"> | |
| 17 <option value="cached">local cached database</option> | |
| 18 <option value="history">history</option> | |
| 19 </param> | |
| 20 <when value="cached"> | |
| 21 <param name="cat_builtin" type="select" label="Use a built-in CAT database" help="If the CAT database of interest is not listed, contact your Galaxy administrator"> | |
| 22 <options from_data_table="cat_database"> | |
| 23 <filter type="sort_by" column="2" /> | |
| 24 <validator type="no_options" message="No CAT database is available." /> | |
| 25 </options> | |
| 26 </param> | |
| 27 </when> | |
| 28 <when value="history"> | |
| 29 <param name="cat_db" type="data" format="txt" label="A history dataset from CAT prepare tool"/> | |
| 30 </when> | |
| 31 </conditional> | |
| 32 </xml> | |
| 33 <token name="@CAT_DB@"><![CDATA[ | |
| 34 #if $db.db_src == 'cached': | |
| 35 --database_folder '$db.cat_builtin.fields.database_folder' | |
| 36 --taxonomy_folder '$db.cat_builtin.fields.taxonomy_folder' | |
| 37 #else | |
| 38 #import os.path | |
| 39 #set $catdb = $db.cat_db.extra_files_path | |
| 40 --database_folder '$os.path.join($catdb,"@DATABASE_FOLDER@")' | |
| 41 --taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")' | |
| 42 #end if | |
| 43 ]]></token> | |
| 44 <token name="@CAT_TAXONOMY@"><![CDATA[ | |
| 45 #if $db.db_src == 'cached': | |
| 46 --taxonomy_folder '$db.cat_builtin.fields.taxonomy_folder' | |
| 47 #else | |
| 48 #import os.path | |
| 49 #set $catdb = $db.cat_db.extra_files_path | |
| 50 --taxonomy_folder '$os.path.join($catdb,"@TAXONOMY_FOLDER@")' | |
| 51 #end if | |
| 52 ]]></token> | |
| 53 <xml name="test_catdb"> | |
| 54 <conditional name="db"> | |
| 55 <param name="db_src" value="cached"/> | |
| 56 <param name="cat_builtin" value="CAT_prepare_test"/> | |
| 57 </conditional> | |
| 58 </xml> | |
| 59 <xml name="use_intermediates"> | |
| 60 <conditional name="previous"> | |
| 61 <param name="use_previous" type="select" label="Use previous prodigal gene prediction and diamond alignment"> | |
| 62 <help>predicted_proteins.faa and alignment.diamond from previous CAT run.</help> | |
| 63 <option value="yes">Yes</option> | |
| 64 <option value="no" selected="true">No</option> | |
| 65 </param> | |
| 66 <when value="yes"> | |
| 67 <param argument="--proteins_fasta" type="data" format="fasta" label="prodigal predicted proteins fasta"/> | |
| 68 <param argument="--diamond_alignment" type="data" format="tabular" label="alignment.diamond file"/> | |
| 69 </when> | |
| 70 <when value="no"/> | |
| 71 </conditional> | |
| 72 </xml> | |
| 73 <token name="@USE_INTERMEDIATES@"><![CDATA[ | |
| 74 #if $previous.use_previous == 'yes' | |
| 75 --proteins_fasta '$previous.proteins_fasta' | |
| 76 --diamond_alignment '$previous.diamond_alignment' | |
| 77 #end if | |
| 78 --out_prefix 'cat_output' | |
| 79 ]]></token> | |
| 80 <xml name="custom_settings"> | |
| 81 <param argument="--range" type="integer" value="10" min="0" max="49" label="range"/> | |
| 82 <param argument="--fraction" type="float" value="0.5" min="0" max="0.99" label="fraction"/> | |
| 83 </xml> | |
| 84 <token name="@CUSTOM_SETTINGS@"><![CDATA[ | |
| 85 --range '$range' | |
| 86 --fraction '$fraction' | |
| 87 ]]></token> | |
| 88 <xml name="diamond_options"> | |
| 89 <conditional name="diamond"> | |
| 90 <param name="set_diamond_opts" type="select" label="Set advanced diamond options"> | |
| 91 <option value="yes">Yes</option> | |
| 92 <option value="no" selected="true">No</option> | |
| 93 </param> | |
| 94 <when value="yes"> | |
| 95 <param argument="--sensitive" type="boolean" truevalue="--sensitive" falsevalue="" checked="false" | |
| 96 label="Run DIAMOND in sensitive mode (considerably slower)"/> | |
| 97 <param argument="--block_size" type="float" value="2.0" min="1" max="10" label="DIAMOND block-size parameter." | |
| 98 help="lower will decrease memory and temporary disk space usage, higher will increase performance."/> | |
| 99 <param argument="--index_chunks" type="integer" value="4" min="1" max="10" label="DIAMOND index-chunks parameter" | |
| 100 help="Set to 1 on high memory machines. The parameter has no effect on temporary disk space usage."/> | |
| 101 <param argument="--top" type="integer" value="50" min="1" max="50" label="DIAMOND top parameter" | |
| 102 help="Governs hits within range of best hit that are written to the alignment file. This implies you know what you are doing."/> | |
| 103 </when> | |
| 104 <when value="no"/> | |
| 105 </conditional> | |
| 106 </xml> | |
| 107 <token name="@DIAMOND_OPTIONS@"><![CDATA[ | |
| 108 #if $diamond.set_diamond_opts == 'yes': | |
| 109 $diamond.sensitive | |
| 110 --block_size '$diamond.block_size' | |
| 111 --index_chunks '$diamond.index_chunks' | |
| 112 #if $diamond.top < 50: | |
| 113 --I_know_what_Im_doing | |
| 114 --top '$diamond.top' | |
| 115 #end if | |
| 116 #end if | |
| 117 ]]></token> | |
| 118 | |
| 119 <xml name="add_names_options"> | |
| 120 <param argument="--only_official" type="boolean" truevalue="--only_official" falsevalue="" checked="true" | |
| 121 label="Only output official level names."/> | |
| 122 <param argument="--exclude_scores" type="boolean" truevalue="--exclude_scores" falsevalue="" checked="false" | |
| 123 label="Exclude bit-score support scores in the lineage."/> | |
| 124 </xml> | |
| 125 <token name="@ADD_NAMES_OPTIONS@"><![CDATA[ | |
| 126 $only_official $exclude_scores | |
| 127 ]]></token> | |
| 128 <xml name="add_names"> | |
| 129 <conditional name="names"> | |
| 130 <param name="add_names" type="select" label="CAT add_names for" | |
| 131 help="annotate with taxonomic names."> | |
| 132 <option value="no">No</option> | |
| 133 <option value="orf2lca">ORF2LCA.names.txt</option> | |
| 134 <option value="classification">classification.names.txt</option> | |
| 135 <option value="both">ORF2LCA.names.txt and classification.names.txt</option> | |
| 136 </param> | |
| 137 <when value="no"/> | |
| 138 <when value="orf2lca"> | |
| 139 <expand macro="add_names_options"/> | |
| 140 </when> | |
| 141 <when value="classification"> | |
| 142 <expand macro="add_names_options"/> | |
| 143 </when> | |
| 144 <when value="both"> | |
| 145 <expand macro="add_names_options"/> | |
| 146 </when> | |
| 147 </conditional> | |
| 148 </xml> | |
| 149 <token name="@TXT2TSV@">${__tool_directory__}/tabpad.py</token> | |
| 150 <token name="@ADD_NAMES@"><![CDATA[ | |
| 151 #if $names.add_names in ['classification','both']: | |
| 152 && CAT add_names $names.only_official $names.exclude_scores | |
| 153 @CAT_TAXONOMY@ | |
| 154 #if $bcat == 'CAT' | |
| 155 -i 'cat_output.contig2classification.tsv' | |
| 156 #else | |
| 157 -i 'cat_output.bin2classification.tsv' | |
| 158 #end if | |
| 159 -o 'classification_names.txt' | |
| 160 && ${__tool_directory__}/tabpad.py -i 'classification_names.txt' -o '$classification_names' | |
| 161 #end if | |
| 162 #if $names.add_names in ['orf2lca','both']: | |
| 163 && CAT add_names $names.only_official $names.exclude_scores | |
| 164 @CAT_TAXONOMY@ | |
| 165 -i 'cat_output.ORF2LCA.tsv' | |
| 166 -o 'orf2lca_names.txt' | |
| 167 && ${__tool_directory__}/tabpad.py -i 'orf2lca_names.txt' -o '$orf2lca_names' | |
| 168 #end if | |
| 169 ]]></token> | |
| 170 <xml name="summarise"> | |
| 171 <param name="summarise" type="select" label="CAT summarise report" | |
| 172 help="Report the number of assignments to each taxonomic name"> | |
| 173 <option value="no">No</option> | |
| 174 <option value="classification">classification.summary.txt</option> | |
| 175 </param> | |
| 176 </xml> | |
| 177 <token name="@SUMMARISE@"><![CDATA[ | |
| 178 #if $summarise in ['classification']: | |
| 179 #if $names.add_names in ['classification','both'] and $names.only_official: | |
| 180 #set $summary_input = $classification_names | |
| 181 #else | |
| 182 #set $summary_input = 'classification_offical_names' | |
| 183 && CAT add_names --only_official | |
| 184 @CAT_TAXONOMY@ | |
| 185 #if $bcat == 'CAT' | |
| 186 -i 'cat_output.contig2classification.tsv' | |
| 187 #else | |
| 188 -i 'cat_output.bin2classification.tsv' | |
| 189 #end if | |
| 190 -o '$summary_input' | |
| 191 #end if | |
| 192 && CAT summarise | |
| 193 #if $bcat == 'CAT' | |
| 194 -c '$contigs_fasta' | |
| 195 #end if | |
| 196 -i '$summary_input' | |
| 197 -o 'classification_summary.txt' | |
| 198 && ${__tool_directory__}/tabpad.py -i 'classification_summary.txt' -o '$classification_summary' | |
| 199 #end if | |
| 200 ]]></token> | |
| 201 | |
| 202 <xml name="select_outputs"> | |
| 203 <param name="select_outputs" type="select" multiple="true" optional="false" label="Select outputs"> | |
| 204 <option value="log" selected="true">log</option> | |
| 205 <option value="predicted_proteins_faa" selected="true">Prodigal predicted_proteins.faa</option> | |
| 206 <option value="predicted_proteins_gff">Prodigal predicted_proteins.gff</option> | |
| 207 <option value="alignment_diamond">Diamond blastp alignment.diamond</option> | |
| 208 <option value="orf2lca" selected="true">ORF2LCA.txt (taxonomic assignment per predicted ORF)</option> | |
| 209 <yield/> | |
| 210 </param> | |
| 211 </xml> | |
| 212 <xml name="select_cat_outputs"> | |
| 213 <param name="bcat" type="hidden" value="CAT"/> | |
| 214 <param name="seqtype" type="hidden" value="contig"/> | |
| 215 <param name="sum_titles" type="hidden" value="contigs,number of ORFs,number of positions"/> | |
| 216 <param name="bin_col" type="hidden" value=""/> | |
| 217 <expand macro="select_outputs"> | |
| 218 <option value="contig2classification" selected="true">contig2classification.txt (taxonomic assignment per contig)</option> | |
| 219 </expand> | |
| 220 </xml> | |
| 221 <xml name="select_bat_outputs"> | |
| 222 <param name="bcat" type="hidden" value="BAT"/> | |
| 223 <param name="seqtype" type="hidden" value="bin"/> | |
| 224 <param name="sum_titles" type="hidden" value="bins"/> | |
| 225 <param name="bin_col" type="hidden" value="bin,"/> | |
| 226 <expand macro="select_outputs"> | |
| 227 <option value="bin2classification" selected="true">bin2classification.txt (taxonomic assignment per metagenome assembly)</option> | |
| 228 </expand> | |
| 229 </xml> | |
| 230 <xml name="outputs"> | |
| 231 <data name="log" format="txt" label="${bcat}.log" from_work_dir="cat_output.log"> | |
| 232 <filter>'log' in select_outputs or not select_outputs</filter> | |
| 233 </data> | |
| 234 <data name="predicted_proteins_faa" format="fasta" label="${bcat}.predicted_proteins.faa" from_work_dir="cat_output.predicted_proteins.faa"> | |
| 235 <filter>'predicted_proteins_faa' in select_outputs and previous['use_previous'] == 'no'</filter> | |
| 236 </data> | |
| 237 <data name="predicted_proteins_gff" format="gff" label="${bcat}.predicted_proteins.gff" from_work_dir="cat_output.predicted_proteins.gff"> | |
| 238 <filter>'predicted_proteins_gff' in select_outputs and previous['use_previous'] == 'no'</filter> | |
| 239 </data> | |
| 240 <data name="alignment_diamond" format="tabular" label="${bcat}.alignment.diamond" from_work_dir="cat_output.alignment.diamond"> | |
| 241 <filter>'alignment_diamond' in select_outputs and previous['use_previous'] == 'no'</filter> | |
| 242 <actions> | |
| 243 <action name="comment_lines" type="metadata" default="1" /> | |
| 244 <action name="column_names" type="metadata" default="qseqid,sseqid,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore" /> | |
| 245 </actions> | |
| 246 </data> | |
| 247 <data name="orf2lca" format="tabular" label="${bcat}.ORF2LCA.txt" from_work_dir="cat_output.ORF2LCA.tsv"> | |
| 248 <filter>'orf2lca' in select_outputs</filter> | |
| 249 <actions> | |
| 250 <action name="comment_lines" type="metadata" default="1" /> | |
| 251 <action name="column_names" type="metadata" default="ORF,${bin_col}lineage,bit-score" /> | |
| 252 </actions> | |
| 253 </data> | |
| 254 <data name="contig2classification" format="tabular" label="${bcat}.contig2classification.txt" from_work_dir="cat_output.contig2classification.tsv"> | |
| 255 <filter>'contig2classification' in select_outputs</filter> | |
| 256 <actions> | |
| 257 <action name="comment_lines" type="metadata" default="1" /> | |
| 258 <action name="column_names" type="metadata" default="contig,classification,reason,lineage,lineage scores" /> | |
| 259 </actions> | |
| 260 </data> | |
| 261 <data name="bin2classification" format="tabular" label="${bcat}.bin2classification.txt" from_work_dir="cat_output.bin2classification.tsv"> | |
| 262 <filter>'bin2classification' in select_outputs</filter> | |
| 263 <actions> | |
| 264 <action name="comment_lines" type="metadata" default="1" /> | |
| 265 <action name="column_names" type="metadata" default="bin,classification,reason,lineage,lineage scores" /> | |
| 266 </actions> | |
| 267 </data> | |
| 268 <data name="orf2lca_names" format="tabular" label="${bcat}.ORF2LCA.names.txt"> | |
| 269 <filter>names['add_names'] in ['both','orf2lca']</filter> | |
| 270 <actions> | |
| 271 <action name="comment_lines" type="metadata" default="1" /> | |
| 272 <action name="column_names" type="metadata" default="ORF,${bin_col}lineage,bit-score,superkingdom,phylum,class,order,family,genus,species" /> | |
| 273 </actions> | |
| 274 </data> | |
| 275 <data name="classification_names" format="tabular" label="${bcat}.${seqtype}2classification.names.txt"> | |
| 276 <filter>names['add_names'] in ['both','classification']</filter> | |
| 277 <actions> | |
| 278 <action name="comment_lines" type="metadata" default="1" /> | |
| 279 <action name="column_names" type="metadata" default="${seqtype},classification,reason,lineage,lineage scores,superkingdom,phylum,class,order,family,genus,species" /> | |
| 280 </actions> | |
| 281 </data> | |
| 282 <data name="classification_summary" format="tabular" label="${bcat}.${seqtype}2classification.summary.txt"> | |
| 283 <filter>'classification' in summarise</filter> | |
| 284 <actions> | |
| 285 <action name="comment_lines" type="metadata" default="4" /> | |
| 286 <action name="column_names" type="metadata" default="rank,clade,number of ${sum_titles}" /> | |
| 287 </actions> | |
| 288 </data> | |
| 289 </xml> | |
| 290 <token name="@COMMON_HELP@"><![CDATA[ | |
| 291 The Contig Annotation Tool (CAT) and Bin Annotation Tool (BAT) workflows are described at: https://github.com/dutilh/CAT | |
| 292 | |
| 293 - CAT contigs/CAT bins - runs Prodigal_ prokaryotic protein prediction on the fasta input. | |
| 294 - CAT contigs/CAT bins - runs Diamond_ to align predicted proteins to the reference proteins in the CAT database. | |
| 295 - CAT contigs/CAT bins - assigns taxonomic classification to fasta entries and ORFs based on alignments. | |
| 296 - CAT add_names - annotates outputs with taxonomic names. | |
| 297 - CAT summerise - reports number of assignments to each taxonomic name. | |
| 298 | |
| 299 A CAT database can either be installed by data_manager_cat or in the local history by CAT prepare tool. | |
| 300 | |
| 301 .. _Prodigal: https://github.com/hyattpd/Prodigal | |
| 302 .. _Diamond: https://github.com/bbuchfink/diamond | |
| 303 | |
| 304 ]]></token> | |
| 305 <token name="@OUTPUTS_HELP@"><![CDATA[ | |
| 306 | |
| 307 **OUTPUTS** | |
| 308 | |
| 309 Any of the files produced by the CAT workflow are available as outputs | |
| 310 - Prodigal | |
| 311 | |
| 312 - predicted_proteins.faa | |
| 313 - predicted_proteins.gff | |
| 314 | |
| 315 - Diamond | |
| 316 | |
| 317 - alignment.diamond | |
| 318 | |
| 319 - CAT contigs/bins | |
| 320 | |
| 321 - contigs/bin2classification.txt | |
| 322 - ORF2LCA.txt | |
| 323 | |
| 324 - CAT add_names (optional) | |
| 325 | |
| 326 - contigs/bin2classification.names.txt | |
| 327 - ORF2LCA.names.txt | |
| 328 | |
| 329 - CAT summarise (optional) | |
| 330 | |
| 331 - contigs/bin2classification.summary.txt | |
| 332 | |
| 333 | |
| 334 ]]></token> | |
| 335 | |
| 336 <token name="@OPTIONS_HELP@"><![CDATA[ | |
| 337 | |
| 338 Optional arguments: | |
| 339 -r, --range cut-off range after alignment [0-49] (default: 10). | |
| 340 -f, --fraction fraction of bit-score support for each classification | |
| 341 [0-0.99] (default: 0.5). | |
| 342 -p, --proteins_fasta | |
| 343 Path to predicted proteins fasta file. If supplied, | |
| 344 CAT will skip the protein prediction step. | |
| 345 -a, --diamond_alignment | |
| 346 Path to DIAMOND alignment table. If supplied, CAT will | |
| 347 skip the DIAMOND alignment step and directly classify | |
| 348 the sequences. A predicted proteins fasta file should | |
| 349 also be supplied with argument [-p / --proteins]. | |
| 350 | |
| 351 | |
| 352 DIAMOND specific optional arguments: | |
| 353 --sensitive Run DIAMOND in sensitive mode (default: not enabled). | |
| 354 | |
| 355 --block_size DIAMOND block-size parameter (default: 2.0). Lower | |
| 356 numbers will decrease memory and temporary disk space | |
| 357 usage. | |
| 358 | |
| 359 --index_chunks | |
| 360 DIAMOND index-chunks parameter (default: 4). Set to 1 | |
| 361 on high memory machines. The parameter has no effect | |
| 362 on temporary disk space usage. | |
| 363 | |
| 364 --top | |
| 365 DIAMOND top parameter [0-50] (default: 50). Governs | |
| 366 hits within range of best hit that are written to the | |
| 367 alignment file. This is not the [-r / --range] | |
| 368 parameter! | |
| 369 | |
| 370 | |
| 371 Setting the DIAMOND --top parameter | |
| 372 | |
| 373 You can speed up DIAMOND considerably, and at the same time greatly reduce disk usage, by setting the DIAMOND --top parameter to lower values. This will govern hits within range of the best hit that are written to the alignment file. | |
| 374 | |
| 375 You have to be very carefull to 1) not confuse this parameter with the r / --range parameter, which does a similar cut-off but after alignment and 2) be aware that if you want to run CAT or BAT again afterwards with different values of the -r / --range parameter, your options will be limited to the range you have chosen with --top earlier, because all hits that fall outside this range will not be included in the alignment file. Importantly, CAT and BAT currently do not warn you if you choose -r / --range in a second run higher than --top in a previous one, so it's up to you to remember this! | |
| 376 | |
| 377 If you have understood all this, or you do not plan to tune -r / --range at all afterwards, you can enjoy a huge speedup with much smaller alignment files! For CAT you can for example set --top 11 and for BAT --top 6. | |
| 378 | |
| 379 ]]></token> | |
| 380 <xml name="citations"> | |
| 381 <citations> | |
| 382 <citation type="doi">https://doi.org/10.1101/072868</citation> | |
| 383 <citation type="doi">https://doi.org/10.1186/s13059-019-1817-x</citation> | |
| 384 <citation type="doi">https://doi.org/10.1038/nmeth.3176</citation> | |
| 385 <citation type="doi">https://doi.org/10.1186/1471-2105-11-119</citation> | |
| 386 <yield /> | |
| 387 </citations> | |
| 388 </xml> | |
| 389 </macros> |
