Mercurial > repos > iuc > ncbi_datasets
comparison datasets_gene.xml @ 20:35d32c807c23 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ncbi_datasets commit 5a65a62588a36d757f96681bf72f537c12c91beb
| author | iuc |
|---|---|
| date | Fri, 26 Dec 2025 17:16:51 +0000 |
| parents | 9a10a6449901 |
| children |
comparison
equal
deleted
inserted
replaced
| 19:ced734560c9d | 20:35d32c807c23 |
|---|---|
| 2 <description>download gene sequences and metadata</description> | 2 <description>download gene sequences and metadata</description> |
| 3 <macros> | 3 <macros> |
| 4 <import>macros.xml</import> | 4 <import>macros.xml</import> |
| 5 </macros> | 5 </macros> |
| 6 <expand macro="bio_tools"/> | 6 <expand macro="bio_tools"/> |
| 7 <expand macro="requirements"></expand> | 7 <expand macro="requirements"/> |
| 8 <expand macro="version_command"/> | 8 <expand macro="version_command"/> |
| 9 <command><![CDATA[ | 9 <command><![CDATA[ |
| 10 #import re | 10 #import re |
| 11 @SETUP_CERTIFICATES@ | 11 @SETUP_CERTIFICATES@ |
| 12 datasets download gene $query.subcommand.download_by | 12 datasets download gene $query.subcommand.download_by |
| 39 #end if | 39 #end if |
| 40 #end if | 40 #end if |
| 41 | 41 |
| 42 #if $filters.fasta_filter_cond.fasta_filter_select | 42 #if $filters.fasta_filter_cond.fasta_filter_select |
| 43 #if $filters.fasta_filter_cond.fasta_filter_select == 'text' | 43 #if $filters.fasta_filter_cond.fasta_filter_select == 'text' |
| 44 --fasta-filter #echo ",".join(f"'{x}'" for x in $filters.fasta_filter_cond.fasta_filter.split(',') if x) | 44 --fasta-filter #echo ",".join(f"'{x}'" for x in str($filters.fasta_filter_cond.fasta_filter).split(',') if x) |
| 45 #else | 45 #else |
| 46 --fasta-filter-file '$filters.fasta_filter_cond.fasta_filter_file' | 46 --fasta-filter-file '$filters.fasta_filter_cond.fasta_filter_file' |
| 47 #end if | 47 #end if |
| 48 #end if | 48 #end if |
| 49 | 49 |
| 95 <expand macro="text_or_file" what="Gene Symbol" what_extended="NCBI Gene Symbol" help=""/> | 95 <expand macro="text_or_file" what="Gene Symbol" what_extended="NCBI Gene Symbol" help=""/> |
| 96 <expand macro="ortholog"/> | 96 <expand macro="ortholog"/> |
| 97 <param argument="--taxon" type="text" value="human" label="Species for gene symbol" help="NCBI taxid, common or scientific name"> | 97 <param argument="--taxon" type="text" value="human" label="Species for gene symbol" help="NCBI taxid, common or scientific name"> |
| 98 <sanitizer invalid_char=""> | 98 <sanitizer invalid_char=""> |
| 99 <valid initial="string.letters"> | 99 <valid initial="string.letters"> |
| 100 <add value=" " /> | 100 <add value=" "/> |
| 101 <add value="-" /> | 101 <add value="-"/> |
| 102 </valid> | 102 </valid> |
| 103 </sanitizer> | 103 </sanitizer> |
| 104 </param> | 104 </param> |
| 105 </when> | 105 </when> |
| 106 <when value="accession"> | 106 <when value="accession"> |
| 107 <expand macro="text_or_file" what="Gene Accession" what_extended="NCBI Gene Accession" help=""/> | 107 <expand macro="text_or_file" what="Gene Accession" what_extended="NCBI Gene Accession" help=""/> |
| 108 <expand macro="ortholog"/> | 108 <expand macro="ortholog"/> |
| 109 <param argument="--taxon-filter" type="text" value="" label="Limit gene sequences and annotation report file to specified taxon" help="any rank, only available for WP accessions"> | 109 <param argument="--taxon-filter" type="text" value="" label="Limit gene sequences and annotation report file to specified taxon" help="any rank, only available for WP accessions"> |
| 110 <sanitizer invalid_char=""> | 110 <sanitizer invalid_char=""> |
| 111 <valid initial="string.letters"> | 111 <valid initial="string.letters"> |
| 112 <add value=" " /> | 112 <add value=" "/> |
| 113 <add value="-" /> | 113 <add value="-"/> |
| 114 </valid> | 114 </valid> |
| 115 </sanitizer> | 115 </sanitizer> |
| 116 </param> | 116 </param> |
| 117 <param argument="--include-flanks-bp" type="integer" optional="true" min="0" value="" label="Length of flanking nucleotides" help="WP accessions only"/> | 117 <param argument="--include-flanks-bp" type="integer" optional="true" min="0" value="" label="Length of flanking nucleotides" help="WP accessions only"/> |
| 118 </when> | 118 </when> |
| 131 <when value=""/> | 131 <when value=""/> |
| 132 <when value="text"> | 132 <when value="text"> |
| 133 <param argument="--fasta-filter" type="text" label="RefSeq nucleotide and protein accessions" help="Comma separated"> | 133 <param argument="--fasta-filter" type="text" label="RefSeq nucleotide and protein accessions" help="Comma separated"> |
| 134 <sanitizer invalid_char=""> | 134 <sanitizer invalid_char=""> |
| 135 <valid initial="string.letters,string.digits"> | 135 <valid initial="string.letters,string.digits"> |
| 136 <add value="," /> | 136 <add value=","/> |
| 137 </valid> | 137 </valid> |
| 138 </sanitizer> | 138 </sanitizer> |
| 139 </param> | 139 </param> |
| 140 </when> | 140 </when> |
| 141 <when value="file"> | 141 <when value="file"> |
| 207 </data> | 207 </data> |
| 208 <data name="cds_fasta" label="NCBI Gene Datasets: CDS fasta" format="fasta" from_work_dir="ncbi_dataset/data/cds.fna"> | 208 <data name="cds_fasta" label="NCBI Gene Datasets: CDS fasta" format="fasta" from_work_dir="ncbi_dataset/data/cds.fna"> |
| 209 <filter>file_choices['kingdom_cond']['include'] and "cds" in file_choices['kingdom_cond']['include']</filter> | 209 <filter>file_choices['kingdom_cond']['include'] and "cds" in file_choices['kingdom_cond']['include']</filter> |
| 210 </data> | 210 </data> |
| 211 <data name="threep_utr_fasta" label="NCBI Gene Datasets: 3' UTR fasta" format="fasta" from_work_dir="ncbi_dataset/data/3p_utr.fna"> | 211 <data name="threep_utr_fasta" label="NCBI Gene Datasets: 3' UTR fasta" format="fasta" from_work_dir="ncbi_dataset/data/3p_utr.fna"> |
| 212 <filter>file_choices['kingdom_cond']['include'] and "5p-utr" in file_choices['kingdom_cond']['include']</filter> | 212 <filter>file_choices['kingdom_cond']['include'] and "3p-utr" in file_choices['kingdom_cond']['include']</filter> |
| 213 </data> | 213 </data> |
| 214 <data name="fivep_utr_fasta" label="NCBI Gene Datasets: 5' UTR fasta" format="fasta" from_work_dir="ncbi_dataset/data/5p_utr.fna"> | 214 <data name="fivep_utr_fasta" label="NCBI Gene Datasets: 5' UTR fasta" format="fasta" from_work_dir="ncbi_dataset/data/5p_utr.fna"> |
| 215 <filter>file_choices['kingdom_cond']['include'] and "5p-utr" in file_choices['kingdom_cond']['include']</filter> | 215 <filter>file_choices['kingdom_cond']['include'] and "5p-utr" in file_choices['kingdom_cond']['include']</filter> |
| 216 </data> | 216 </data> |
| 217 </outputs> | 217 </outputs> |
| 233 <has_n_columns n="8"/> | 233 <has_n_columns n="8"/> |
| 234 </assert_contents> | 234 </assert_contents> |
| 235 </output> | 235 </output> |
| 236 <output name="rna_fasta"> | 236 <output name="rna_fasta"> |
| 237 <assert_contents> | 237 <assert_contents> |
| 238 <has_text text=">"/> | 238 <has_text text=">"/> |
| 239 </assert_contents> | 239 </assert_contents> |
| 240 </output> | 240 </output> |
| 241 <output name="protein_fasta"> | 241 <output name="protein_fasta"> |
| 242 <assert_contents> | 242 <assert_contents> |
| 243 <has_text text=">"/> | 243 <has_text text=">"/> |
| 244 </assert_contents> | 244 </assert_contents> |
| 245 </output> | 245 </output> |
| 246 </test> | 246 </test> |
| 247 <!-- 2: datasets download gene gene-id 2597 14433 --> | 247 <!-- 2: datasets download gene gene-id 2597 14433 --> |
| 248 <test expect_num_outputs="3"> | 248 <test expect_num_outputs="3"> |
| 261 <has_n_columns n="8"/> | 261 <has_n_columns n="8"/> |
| 262 </assert_contents> | 262 </assert_contents> |
| 263 </output> | 263 </output> |
| 264 <output name="rna_fasta"> | 264 <output name="rna_fasta"> |
| 265 <assert_contents> | 265 <assert_contents> |
| 266 <has_text text=">"/> | 266 <has_text text=">"/> |
| 267 </assert_contents> | 267 </assert_contents> |
| 268 </output> | 268 </output> |
| 269 <output name="protein_fasta"> | 269 <output name="protein_fasta"> |
| 270 <assert_contents> | 270 <assert_contents> |
| 271 <has_text text=">"/> | 271 <has_text text=">"/> |
| 272 </assert_contents> | 272 </assert_contents> |
| 273 </output> | 273 </output> |
| 274 </test> | 274 </test> |
| 275 <!-- 3: same as above + give accessions by file, 2 different outputs and ortholog--> | 275 <!-- 3: same as above + give accessions by file, 2 different outputs and ortholog--> |
| 276 <test expect_num_outputs="3"> | 276 <test expect_num_outputs="3"> |
| 282 </conditional> | 282 </conditional> |
| 283 <param name="ortholog" value="Haplorrhini,Strepsirrhini"/> | 283 <param name="ortholog" value="Haplorrhini,Strepsirrhini"/> |
| 284 </conditional> | 284 </conditional> |
| 285 <section name="file_choices"> | 285 <section name="file_choices"> |
| 286 <conditional name="kingdom_cond"> | 286 <conditional name="kingdom_cond"> |
| 287 <param name="kingdom_sel" value="gene"/> | |
| 287 <param name="include" value="gene,cds"/> | 288 <param name="include" value="gene,cds"/> |
| 288 </conditional> | 289 </conditional> |
| 289 </section> | 290 </section> |
| 290 <output name="gene_data_report"> | 291 <output name="gene_data_report"> |
| 291 <assert_contents> | 292 <assert_contents> |
| 295 <has_n_columns n="8"/> | 296 <has_n_columns n="8"/> |
| 296 </assert_contents> | 297 </assert_contents> |
| 297 </output> | 298 </output> |
| 298 <output name="gene_fasta"> | 299 <output name="gene_fasta"> |
| 299 <assert_contents> | 300 <assert_contents> |
| 300 <has_text text=">"/> | 301 <has_text text=">"/> |
| 301 </assert_contents> | 302 </assert_contents> |
| 302 </output> | 303 </output> |
| 303 <output name="cds_fasta"> | 304 <output name="cds_fasta"> |
| 304 <assert_contents> | 305 <assert_contents> |
| 305 <has_text text=">"/> | 306 <has_text text=">"/> |
| 306 </assert_contents> | 307 </assert_contents> |
| 307 </output> | 308 </output> |
| 308 </test> | 309 </test> |
| 309 <!-- 4: datasets download gene symbol tp53 --> | 310 <!-- 4: datasets download gene symbol tp53 --> |
| 310 <test expect_num_outputs="1"> | 311 <test expect_num_outputs="3"> |
| 311 <conditional name="query|subcommand"> | 312 <conditional name="query|subcommand"> |
| 312 <param name="download_by" value="symbol"/> | 313 <param name="download_by" value="symbol"/> |
| 313 <conditional name="text_or_file"> | 314 <conditional name="text_or_file"> |
| 314 <param name="text_or_file" value="text"/> | 315 <param name="text_or_file" value="text"/> |
| 315 <param name="accession" value="tp53"/> | 316 <param name="accession" value="tp53"/> |
| 316 </conditional> | 317 </conditional> |
| 317 </conditional> | 318 </conditional> |
| 318 <section name="file_choices"> | |
| 319 <conditional name="kingdom_cond"> | |
| 320 <param name="include" value=""/> | |
| 321 </conditional> | |
| 322 </section> | |
| 323 <output name="gene_data_report"> | 319 <output name="gene_data_report"> |
| 324 <assert_contents> | 320 <assert_contents> |
| 325 <has_text text="human"/> | 321 <has_text text="human"/> |
| 326 <has_n_lines n="2"/> | 322 <has_n_lines n="2"/> |
| 327 <has_n_columns n="8"/> | 323 <has_n_columns n="8"/> |
| 359 <has_n_columns n="39"/> | 355 <has_n_columns n="39"/> |
| 360 </assert_contents> | 356 </assert_contents> |
| 361 </output> | 357 </output> |
| 362 <output name="threep_utr_fasta"> | 358 <output name="threep_utr_fasta"> |
| 363 <assert_contents> | 359 <assert_contents> |
| 364 <has_text text=">"/> | 360 <has_text text=">"/> |
| 365 </assert_contents> | 361 </assert_contents> |
| 366 </output> | 362 </output> |
| 367 <output name="fivep_utr_fasta"> | 363 <output name="fivep_utr_fasta"> |
| 368 <assert_contents> | 364 <assert_contents> |
| 369 <has_text text=">"/> | 365 <has_text text=">"/> |
| 370 </assert_contents> | 366 </assert_contents> |
| 371 </output> | 367 </output> |
| 372 </test> | 368 </test> |
| 373 <!-- 6: datasets download gene symbol brca1 \-\-ortholog --> | 369 <!-- 6: datasets download gene symbol brca1 \-\-ortholog --> |
| 374 <test expect_num_outputs="1"> | 370 <test expect_num_outputs="3"> |
| 375 <conditional name="query|subcommand"> | 371 <conditional name="query|subcommand"> |
| 376 <param name="download_by" value="symbol"/> | 372 <param name="download_by" value="symbol"/> |
| 377 <conditional name="text_or_file"> | 373 <conditional name="text_or_file"> |
| 378 <param name="text_or_file" value="text"/> | 374 <param name="text_or_file" value="text"/> |
| 379 <param name="accession" value="brca1"/> | 375 <param name="accession" value="brca1"/> |
| 380 </conditional> | 376 </conditional> |
| 381 <param name="ortholog" value="rodentia"/> | 377 <param name="ortholog" value="rodentia"/> |
| 382 </conditional> | 378 </conditional> |
| 383 <section name="file_choices"> | |
| 384 <conditional name="kingdom_cond"> | |
| 385 <param name="include" value=""/> | |
| 386 </conditional> | |
| 387 </section> | |
| 388 <output name="gene_data_report"> | 379 <output name="gene_data_report"> |
| 389 <assert_contents> | 380 <assert_contents> |
| 390 <has_text text="rat"/> | 381 <has_text text="rat"/> |
| 391 <has_text text="Brca1"/> | 382 <has_text text="Brca1"/> |
| 392 <has_n_lines min="30"/> | 383 <has_n_lines min="30"/> |
| 393 <has_n_columns n="8"/> | 384 <has_n_columns n="8"/> |
| 394 </assert_contents> | 385 </assert_contents> |
| 395 </output> | 386 </output> |
| 396 </test> | 387 </test> |
| 397 <!-- 7: datasets download gene accession NP_000483.3 --> | 388 <!-- 7: datasets download gene accession NP_000483.3 --> |
| 398 <test expect_num_outputs="1"> | 389 <test expect_num_outputs="3"> |
| 399 <conditional name="query|subcommand"> | 390 <conditional name="query|subcommand"> |
| 400 <param name="download_by" value="accession"/> | 391 <param name="download_by" value="accession"/> |
| 401 <conditional name="text_or_file"> | 392 <conditional name="text_or_file"> |
| 402 <param name="text_or_file" value="text"/> | 393 <param name="text_or_file" value="text"/> |
| 403 <param name="accession" value="NP_000483.3"/> | 394 <param name="accession" value="NP_000483.3"/> |
| 404 </conditional> | 395 </conditional> |
| 405 </conditional> | 396 </conditional> |
| 406 <section name="file_choices"> | |
| 407 <conditional name="kingdom_cond"> | |
| 408 <param name="include" value=""/> | |
| 409 </conditional> | |
| 410 </section> | |
| 411 <output name="gene_data_report"> | 397 <output name="gene_data_report"> |
| 412 <assert_contents> | 398 <assert_contents> |
| 413 <has_text text="human"/> | 399 <has_text text="human"/> |
| 414 <has_n_lines n="2"/> | 400 <has_n_lines n="2"/> |
| 415 <has_n_columns n="8"/> | 401 <has_n_columns n="8"/> |
| 416 </assert_contents> | 402 </assert_contents> |
| 417 </output> | 403 </output> |
| 418 </test> | 404 </test> |
| 419 <!-- 8: datasets download gene accession NM_000546.6 NM_000492.4 + ortholog--> | 405 <!-- 8: datasets download gene accession NM_000546.6 NM_000492.4 + ortholog--> |
| 420 <test expect_num_outputs="1"> | 406 <test expect_num_outputs="3"> |
| 421 <conditional name="query|subcommand"> | 407 <conditional name="query|subcommand"> |
| 422 <param name="download_by" value="accession"/> | 408 <param name="download_by" value="accession"/> |
| 423 <conditional name="text_or_file"> | 409 <conditional name="text_or_file"> |
| 424 <param name="text_or_file" value="text"/> | 410 <param name="text_or_file" value="text"/> |
| 425 <param name="accession" value="NM_000546.6 NM_000492.4"/> | 411 <param name="accession" value="NM_000546.6 NM_000492.4"/> |
| 426 </conditional> | 412 </conditional> |
| 427 <param name="ortholog" value="all"/> | 413 <param name="ortholog" value="all"/> |
| 428 </conditional> | 414 </conditional> |
| 429 <section name="file_choices"> | |
| 430 <conditional name="kingdom_cond"> | |
| 431 <param name="include" value=""/> | |
| 432 </conditional> | |
| 433 </section> | |
| 434 <output name="gene_data_report"> | 415 <output name="gene_data_report"> |
| 435 <assert_contents> | 416 <assert_contents> |
| 436 <has_text text="human"/> | 417 <has_text text="human"/> |
| 437 <has_n_lines min="800"/> | 418 <has_n_lines min="800"/> |
| 438 <has_n_columns n="8"/> | 419 <has_n_columns n="8"/> |
| 439 </assert_contents> | 420 </assert_contents> |
| 440 </output> | 421 </output> |
| 441 </test> | 422 </test> |
| 442 | |
| 443 <!-- 9: datasets download gene accession WP_003249567.1 + include_flanks_bp --> | 423 <!-- 9: datasets download gene accession WP_003249567.1 + include_flanks_bp --> |
| 444 <test expect_num_outputs="4"> | 424 <test expect_num_outputs="4"> |
| 445 <conditional name="query|subcommand"> | 425 <conditional name="query|subcommand"> |
| 446 <param name="download_by" value="accession"/> | 426 <param name="download_by" value="accession"/> |
| 447 <conditional name="text_or_file"> | 427 <conditional name="text_or_file"> |
| 464 <has_n_columns n="7"/> | 444 <has_n_columns n="7"/> |
| 465 </assert_contents> | 445 </assert_contents> |
| 466 </output> | 446 </output> |
| 467 <output name="gene_fasta"> | 447 <output name="gene_fasta"> |
| 468 <assert_contents> | 448 <assert_contents> |
| 469 <has_text text=">"/> | 449 <has_text text=">"/> |
| 470 </assert_contents> | 450 </assert_contents> |
| 471 </output> | 451 </output> |
| 472 <output name="gene_flanks"> | 452 <output name="gene_flanks"> |
| 473 <assert_contents> | 453 <assert_contents> |
| 474 <has_text text=">"/> | 454 <has_text text=">"/> |
| 475 </assert_contents> | 455 </assert_contents> |
| 476 </output> | 456 </output> |
| 477 <output name="protein_fasta"> | 457 <output name="protein_fasta"> |
| 478 <assert_contents> | 458 <assert_contents> |
| 479 <has_text text=">"/> | 459 <has_text text=">"/> |
| 480 </assert_contents> | 460 </assert_contents> |
| 481 </output> | 461 </output> |
| 482 <assert_command> | 462 <assert_command> |
| 483 <has_text text="include-flanks-bp 100"/> | 463 <has_text text="include-flanks-bp 100"/> |
| 484 </assert_command> | 464 </assert_command> |
| 485 </test> | 465 </test> |
| 486 | |
| 487 <!-- 10: datasets download gene taxon human --> | 466 <!-- 10: datasets download gene taxon human --> |
| 488 <!-- <test expect_num_outputs="1"> | 467 <!-- <test expect_num_outputs="1"> |
| 489 <conditional name="query|subcommand"> | 468 <conditional name="query|subcommand"> |
| 490 <param name="download_by" value="taxon"/> | 469 <param name="download_by" value="taxon"/> |
| 491 <param name="taxon_positional" value="human"/> | 470 <param name="taxon_positional" value="human"/> |
| 532 <has_text text=">" n="1" /> | 511 <has_text text=">" n="1" /> |
| 533 </assert_contents> | 512 </assert_contents> |
| 534 </output> | 513 </output> |
| 535 </test> --> | 514 </test> --> |
| 536 </tests> | 515 </tests> |
| 537 <help> | 516 <help><![CDATA[ |
| 538 <![CDATA[ | 517 .. class:: infomark |
| 539 **Download Gene Datasets from NCBI** | 518 |
| 540 | 519 **What it does** |
| 541 Download a gene dataset (gene sequence, transcipt, amino acid sequences, | 520 |
| 542 nucleotide coding sequences, 5'-UTR, 3'-UTR) as well as gene and gene | 521 Downloads gene data from NCBI using the `datasets`_ command-line tool. |
| 543 product reports. Genes can be referred by gene id, symbol, accession, | 522 Retrieve gene sequences, transcripts, proteins, and annotation reports. |
| 544 or taxon. | 523 |
| 545 ]]> | 524 **Query Options** |
| 546 </help> | 525 |
| 526 ============= ================================================================ | |
| 527 Method Description | |
| 528 ============= ================================================================ | |
| 529 Gene ID NCBI Gene ID (e.g., 672 for BRCA1) | |
| 530 Symbol Gene symbol with taxon (e.g., TP53 in human) | |
| 531 Accession RefSeq nucleotide (NM\_) or protein (NP\_/WP\_) accession | |
| 532 Taxon All genes for a taxon (large downloads) | |
| 533 ============= ================================================================ | |
| 534 | |
| 535 ---- | |
| 536 | |
| 537 **Key Options** | |
| 538 | |
| 539 - **Ortholog retrieval**: Get orthologous genes across taxa (vertebrates/insects) | |
| 540 - **Taxon filter**: Limit WP\_ accession results to specific organisms | |
| 541 - **Flanking sequence**: Include nucleotides upstream/downstream (WP\_ only) | |
| 542 - **FASTA filter**: Subset output to specific accessions | |
| 543 | |
| 544 **Outputs (Eukaryote)** | |
| 545 | |
| 546 - **Gene Data Report**: Tabular metadata (ID, symbol, description, coordinates) | |
| 547 - **Gene Product Report**: Detailed transcript/protein information | |
| 548 - **Sequences**: Gene, RNA, protein, CDS, 5'/3' UTR FASTA files | |
| 549 | |
| 550 **Outputs (Prokaryote)** | |
| 551 | |
| 552 Prokaryotic genes (WP\_ accessions) use a different report format with: | |
| 553 accession, description, EC number, gene symbol, protein info. | |
| 554 | |
| 555 **Examples** | |
| 556 | |
| 557 Download human BRCA1:: | |
| 558 | |
| 559 Query by: Gene ID | |
| 560 Gene ID: 672 | |
| 561 | |
| 562 Download TP53 orthologs in rodents:: | |
| 563 | |
| 564 Query by: Symbol | |
| 565 Symbol: tp53 | |
| 566 Ortholog: rodentia | |
| 567 | |
| 568 | |
| 569 .. _datasets: https://www.ncbi.nlm.nih.gov/datasets/ | |
| 570 ]]></help> | |
| 547 <expand macro="citations"/> | 571 <expand macro="citations"/> |
| 548 </tool> | 572 </tool> |
