Mercurial > repos > iuc > ncbi_datasets
diff datasets_gene.xml @ 20:35d32c807c23 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/ncbi_datasets commit 5a65a62588a36d757f96681bf72f537c12c91beb
| author | iuc |
|---|---|
| date | Fri, 26 Dec 2025 17:16:51 +0000 |
| parents | 9a10a6449901 |
| children |
line wrap: on
line diff
--- a/datasets_gene.xml Mon Mar 17 11:05:34 2025 +0000 +++ b/datasets_gene.xml Fri Dec 26 17:16:51 2025 +0000 @@ -4,7 +4,7 @@ <import>macros.xml</import> </macros> <expand macro="bio_tools"/> - <expand macro="requirements"></expand> + <expand macro="requirements"/> <expand macro="version_command"/> <command><![CDATA[ #import re @@ -41,7 +41,7 @@ #if $filters.fasta_filter_cond.fasta_filter_select #if $filters.fasta_filter_cond.fasta_filter_select == 'text' - --fasta-filter #echo ",".join(f"'{x}'" for x in $filters.fasta_filter_cond.fasta_filter.split(',') if x) + --fasta-filter #echo ",".join(f"'{x}'" for x in str($filters.fasta_filter_cond.fasta_filter).split(',') if x) #else --fasta-filter-file '$filters.fasta_filter_cond.fasta_filter_file' #end if @@ -97,8 +97,8 @@ <param argument="--taxon" type="text" value="human" label="Species for gene symbol" help="NCBI taxid, common or scientific name"> <sanitizer invalid_char=""> <valid initial="string.letters"> - <add value=" " /> - <add value="-" /> + <add value=" "/> + <add value="-"/> </valid> </sanitizer> </param> @@ -109,8 +109,8 @@ <param argument="--taxon-filter" type="text" value="" label="Limit gene sequences and annotation report file to specified taxon" help="any rank, only available for WP accessions"> <sanitizer invalid_char=""> <valid initial="string.letters"> - <add value=" " /> - <add value="-" /> + <add value=" "/> + <add value="-"/> </valid> </sanitizer> </param> @@ -133,7 +133,7 @@ <param argument="--fasta-filter" type="text" label="RefSeq nucleotide and protein accessions" help="Comma separated"> <sanitizer invalid_char=""> <valid initial="string.letters,string.digits"> - <add value="," /> + <add value=","/> </valid> </sanitizer> </param> @@ -209,7 +209,7 @@ <filter>file_choices['kingdom_cond']['include'] and "cds" in file_choices['kingdom_cond']['include']</filter> </data> <data name="threep_utr_fasta" label="NCBI Gene Datasets: 3' UTR fasta" format="fasta" from_work_dir="ncbi_dataset/data/3p_utr.fna"> - <filter>file_choices['kingdom_cond']['include'] and "5p-utr" in file_choices['kingdom_cond']['include']</filter> + <filter>file_choices['kingdom_cond']['include'] and "3p-utr" in file_choices['kingdom_cond']['include']</filter> </data> <data name="fivep_utr_fasta" label="NCBI Gene Datasets: 5' UTR fasta" format="fasta" from_work_dir="ncbi_dataset/data/5p_utr.fna"> <filter>file_choices['kingdom_cond']['include'] and "5p-utr" in file_choices['kingdom_cond']['include']</filter> @@ -235,12 +235,12 @@ </output> <output name="rna_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> <output name="protein_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> </test> @@ -263,12 +263,12 @@ </output> <output name="rna_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> <output name="protein_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> </test> @@ -284,6 +284,7 @@ </conditional> <section name="file_choices"> <conditional name="kingdom_cond"> + <param name="kingdom_sel" value="gene"/> <param name="include" value="gene,cds"/> </conditional> </section> @@ -297,17 +298,17 @@ </output> <output name="gene_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> <output name="cds_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> </test> <!-- 4: datasets download gene symbol tp53 --> - <test expect_num_outputs="1"> + <test expect_num_outputs="3"> <conditional name="query|subcommand"> <param name="download_by" value="symbol"/> <conditional name="text_or_file"> @@ -315,11 +316,6 @@ <param name="accession" value="tp53"/> </conditional> </conditional> - <section name="file_choices"> - <conditional name="kingdom_cond"> - <param name="include" value=""/> - </conditional> - </section> <output name="gene_data_report"> <assert_contents> <has_text text="human"/> @@ -361,17 +357,17 @@ </output> <output name="threep_utr_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> <output name="fivep_utr_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> </test> <!-- 6: datasets download gene symbol brca1 \-\-ortholog --> - <test expect_num_outputs="1"> + <test expect_num_outputs="3"> <conditional name="query|subcommand"> <param name="download_by" value="symbol"/> <conditional name="text_or_file"> @@ -380,11 +376,6 @@ </conditional> <param name="ortholog" value="rodentia"/> </conditional> - <section name="file_choices"> - <conditional name="kingdom_cond"> - <param name="include" value=""/> - </conditional> - </section> <output name="gene_data_report"> <assert_contents> <has_text text="rat"/> @@ -395,7 +386,7 @@ </output> </test> <!-- 7: datasets download gene accession NP_000483.3 --> - <test expect_num_outputs="1"> + <test expect_num_outputs="3"> <conditional name="query|subcommand"> <param name="download_by" value="accession"/> <conditional name="text_or_file"> @@ -403,11 +394,6 @@ <param name="accession" value="NP_000483.3"/> </conditional> </conditional> - <section name="file_choices"> - <conditional name="kingdom_cond"> - <param name="include" value=""/> - </conditional> - </section> <output name="gene_data_report"> <assert_contents> <has_text text="human"/> @@ -417,7 +403,7 @@ </output> </test> <!-- 8: datasets download gene accession NM_000546.6 NM_000492.4 + ortholog--> - <test expect_num_outputs="1"> + <test expect_num_outputs="3"> <conditional name="query|subcommand"> <param name="download_by" value="accession"/> <conditional name="text_or_file"> @@ -426,11 +412,6 @@ </conditional> <param name="ortholog" value="all"/> </conditional> - <section name="file_choices"> - <conditional name="kingdom_cond"> - <param name="include" value=""/> - </conditional> - </section> <output name="gene_data_report"> <assert_contents> <has_text text="human"/> @@ -439,7 +420,6 @@ </assert_contents> </output> </test> - <!-- 9: datasets download gene accession WP_003249567.1 + include_flanks_bp --> <test expect_num_outputs="4"> <conditional name="query|subcommand"> @@ -466,24 +446,23 @@ </output> <output name="gene_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> <output name="gene_flanks"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> <output name="protein_fasta"> <assert_contents> - <has_text text=">"/> + <has_text text=">"/> </assert_contents> </output> <assert_command> <has_text text="include-flanks-bp 100"/> </assert_command> - </test> - + </test> <!-- 10: datasets download gene taxon human --> <!-- <test expect_num_outputs="1"> <conditional name="query|subcommand"> @@ -534,15 +513,60 @@ </output> </test> --> </tests> - <help> -<![CDATA[ -**Download Gene Datasets from NCBI** + <help><![CDATA[ +.. class:: infomark + +**What it does** + +Downloads gene data from NCBI using the `datasets`_ command-line tool. +Retrieve gene sequences, transcripts, proteins, and annotation reports. + +**Query Options** + +============= ================================================================ +Method Description +============= ================================================================ +Gene ID NCBI Gene ID (e.g., 672 for BRCA1) +Symbol Gene symbol with taxon (e.g., TP53 in human) +Accession RefSeq nucleotide (NM\_) or protein (NP\_/WP\_) accession +Taxon All genes for a taxon (large downloads) +============= ================================================================ + +---- + +**Key Options** + +- **Ortholog retrieval**: Get orthologous genes across taxa (vertebrates/insects) +- **Taxon filter**: Limit WP\_ accession results to specific organisms +- **Flanking sequence**: Include nucleotides upstream/downstream (WP\_ only) +- **FASTA filter**: Subset output to specific accessions -Download a gene dataset (gene sequence, transcipt, amino acid sequences, -nucleotide coding sequences, 5'-UTR, 3'-UTR) as well as gene and gene -product reports. Genes can be referred by gene id, symbol, accession, -or taxon. -]]> - </help> +**Outputs (Eukaryote)** + +- **Gene Data Report**: Tabular metadata (ID, symbol, description, coordinates) +- **Gene Product Report**: Detailed transcript/protein information +- **Sequences**: Gene, RNA, protein, CDS, 5'/3' UTR FASTA files + +**Outputs (Prokaryote)** + +Prokaryotic genes (WP\_ accessions) use a different report format with: +accession, description, EC number, gene symbol, protein info. + +**Examples** + +Download human BRCA1:: + + Query by: Gene ID + Gene ID: 672 + +Download TP53 orthologs in rodents:: + + Query by: Symbol + Symbol: tp53 + Ortholog: rodentia + + +.. _datasets: https://www.ncbi.nlm.nih.gov/datasets/ +]]></help> <expand macro="citations"/> </tool>
