Mercurial > repos > iuc > ncbi_datasets
diff datasets_gene.xml @ 0:c6009f4d7261 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit fd91cf3000d556d8219426eddb8a3059071a2009"
| author | iuc |
|---|---|
| date | Thu, 15 Jul 2021 13:31:56 +0000 |
| parents | |
| children | 48e0814f250a |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datasets_gene.xml Thu Jul 15 13:31:56 2021 +0000 @@ -0,0 +1,206 @@ +<tool id="datasets-download-gene" name="NCBI datasets download gene" profile="@PROFILE@" license="@LICENSE" version="@TOOL_VERSION@"> + <description>Download genes from NCBI</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"></expand> + <command><![CDATA[ +@SETUP_CERTIFICATES@ +datasets download gene $subcommand.download_by +#if $subcommand.download_by != 'taxon': + #if $subcommand.text_or_file.text_or_file == 'text': + #if $subcommand.download_by == 'gene-id': + $subcommand.text_or_file.accession + #else if $subcommand.download_by == 'taxon': + '$subcommand.taxon' + #else + #echo " ".join(f"'{x}'" for x in $subcommand.text_or_file.accession.split(' ') if x) + #end if + #if $subcommand.download_by == 'accession' and $subcommand.text_or_file.taxon_filter: + --taxon-filter '$subcommand.taxon_filter' + #end if + #else + --inputfile '$subcommand.text_or_file.inputfile' + #end if +#else: + '$subcommand.taxon' +#end if +@EXCLUDES_GENE@ +#if $subcommand.download_by == 'accession' and $subcommand.include_flanks_bp: + --include-flanks-bp $subcommand.include_flanks_bp +#end if +&& 7z x ncbi_dataset.zip +]]></command> + <inputs> + <conditional name="subcommand"> + <param name="download_by" type="select" label="Choose how to find genomes to download"> + <option value="gene-id">Download a gene dataset by NCBI Gene ID</option> + <option value="symbol">Download a gene dataset by gene symbol</option> + <option value="accession">Download a gene dataset by RefSeq nucleotide or protein accession</option> + <option value="taxon">Download a gene dataset by taxon</option> + </param> + <when value="gene-id"> + <expand macro="text_or_file" what="gene-id" what_extended="NCBI Gene ID" help="Should be valid NCBI Gene ID"> + <sanitizer invalid_char=""> + <valid initial="string.digits"> + <add value=" " /> + </valid> + </sanitizer> + </expand> + </when> + <when value="symbol"> + <expand macro="text_or_file" what="symbol" what_extended="gene symbol" help="Should be valid gene symbol"/> + <param argument="--taxon" type="text" value="human" label="Specify a species name" help="Species name can be common or scientific name or species-level NCBI Taxonomy ID"/> + </when> + <when value="accession"> + <expand macro="text_or_file" what="accession" what_extended="RefSeq nucleotide or protein accession" help="Should be RefSeq nucleotide or protein accession"/> + <param argument="--include-flanks-bp" type="integer" optional="true" min="0" label="Include gene flanking sequence, limited to prokaryotic genes" help="If not specified flanking gene sequences will not be downloaded. Accession must start with WP"/> + <param argument="--taxon-filter" type="text" optional="true" label="limit genes to a specified taxon" help="any rank"/> + </when> + <when value="taxon"> + <param name="taxon" type="text" label="Enter taxon" help="e.g. human, mouse, bos taurs, etc."></param> + </when> + </conditional> + <expand macro="excludes_gene"></expand> + <conditional name="limit_fasta" label="Limit fasta by accession?"> + <param name="limit" type="select" label="Select limit method"> + <option value="none">None</option> + <option value="text">Enter list of accessions</option> + <option value="file">Read list of accessions from file</option> + </param> + <when value="none"> + </when> + <when value="text"> + <param argument="--fasta-filter" type="text" label="Limit gene fasta download to these accessions"/> + </when> + <when value="file"> + <param argument="--fasta-filter-file" type="data" format="txt" label="File of accessions to limit gene fasta download"/> + </when> + </conditional> + </inputs> + <outputs> + <data name="gene_fasta" format="fasta" label="NCBI datasets gene: gene fasta" from_work_dir="ncbi_dataset/data/gene.fna"> + <filter>not exclude_gene</filter> + </data> + <data name="protein_fasta" format="fasta" label="NCBI datasets gene: protein fasta" from_work_dir="ncbi_dataset/data/protein.faa"> + <filter>not exclude_protein</filter> + </data> + <data name="rna_fasta" format="fasta" label="NCBI datasets gene: rna fasta" from_work_dir="ncbi_dataset/data/rna.fna"> + <filter>not exclude_rna</filter> + </data> + <data name="gene_flanks" format="fasta" label="NCBI datasets gene: flanking sequence fasta" from_work_dir="ncbi_dataset/data/gene_flank.fna"> + <filter><![CDATA[subcommand['include_flanks_bp']]]></filter> + </data> + </outputs> + <tests> + <test title="test download by gene-id"> + <conditional name="subcommand"> + <param name="download_by" value="gene-id"></param> + <conditional name="text_or_file"> + <param name="text_or_file" value="text"></param> + <param name="accession" value="472 672"></param> + </conditional> + </conditional> + <output name="gene_fasta"> + <assert_contents> + <has_line line="CCGCGTCCGCGCTTACCCAATACAAGCCGGGCTACGTCCGAGGGTAACAACATGATCAAAACCACAGCAG"/> + <has_line line="GCTGAGACTTCCTGGACGGGGGACAGGCTGTGGGGTTTCTCAGATAACTGGGCCCCTGCGCTCAGGAGGC"/> + </assert_contents> + </output> + </test> + <test title="test download by gene-id, test sanitizer"> + <conditional name="subcommand"> + <param name="download_by" value="gene-id"></param> + <conditional name="text_or_file"> + <param name="text_or_file" value="text"></param> + <param name="accession" value="472 672"></param> + </conditional> + </conditional> + <output name="gene_fasta"> + <assert_contents> + <has_line line="CCGCGTCCGCGCTTACCCAATACAAGCCGGGCTACGTCCGAGGGTAACAACATGATCAAAACCACAGCAG"/> + <has_line line="GCTGAGACTTCCTGGACGGGGGACAGGCTGTGGGGTTTCTCAGATAACTGGGCCCCTGCGCTCAGGAGGC"/> + </assert_contents> + </output> + <assert_command> + <not_has_text text="exit"/> + </assert_command> + </test> + <test title="test download by gene symbol"> + <conditional name="subcommand"> + <param name="download_by" value="symbol"></param> + <conditional name="text_or_file"> + <param name="text_or_file" value="text"></param> + <param name="accession" value="BRCA1 ATM"></param> + </conditional> + </conditional> + <output name="gene_fasta"> + <assert_contents> + <has_line line="CCGCGTCCGCGCTTACCCAATACAAGCCGGGCTACGTCCGAGGGTAACAACATGATCAAAACCACAGCAG"/> + <has_line line="GCTGAGACTTCCTGGACGGGGGACAGGCTGTGGGGTTTCTCAGATAACTGGGCCCCTGCGCTCAGGAGGC"/> + </assert_contents> + </output> + </test> + <test title="test download by accession"> + <conditional name="subcommand"> + <param name="download_by" value="accession"></param> + <conditional name="text_or_file"> + <param name="text_or_file" value="text"></param> + <param name="accession" value="NM_000546.6 NM_000492.4"></param> + </conditional> + </conditional> + <output name="gene_fasta"> + <assert_contents> + <has_line line="GTAGTAGGTCTTTGGCATTAGGAGCTTGAGCCCAGACGGCCCTAGCAGGGACCCCAGCGCCCGAGAGACC"/> + <has_line line="CTCAAAAGTCTAGAGCCACCGTCCAGGGAGCAGGTAGCTGCTGGGCTCCGGGGACACTTTGCGTTCGGGC"/> + </assert_contents> + </output> + <assert_command> + <has_text text="'NM_000546.6' 'NM_000492.4'"/> + </assert_command> + </test> + <test title="test download by accession with flanking sequence"> + <conditional name="subcommand"> + <param name="download_by" value="accession"></param> + <conditional name="text_or_file"> + <param name="text_or_file" value="text"></param> + <param name="accession" value="WP_004675351.1"></param> + </conditional> + <param name="include_flanks_bp" value="10"/> + </conditional> + <output name="gene_flanks"> + <assert_contents> + <has_line line="gccctgccgcATGATCGATCTGATGCCGACGAGCGAGGAACAGGCGGCGGCGATCGTCCGCACCCATGCG"/> + </assert_contents> + </output> + <assert_command> + <has_text text="--include-flanks-bp 10"/> + </assert_command> + </test> + <test title="test download by taxon"> + <conditional name="subcommand"> + <param name="download_by" value="taxon"></param> + <param name="taxon" value="Mycobacterium tuberculosis H37Rv"></param> + </conditional> + <param name="exclude_rna" value="true"/> + <param name="exclude_protein" value="true"/> + <output name="gene_fasta"> + <assert_contents> + <has_line line="GTGGCGCTGAATATCAAAGACCCTGAGGTAGACCGACTAGCCGCCGAACTCGCTGACCGGCTGCACACCA"/> + </assert_contents> + </output> + </test> + </tests> + <help> +Download a gene dataset including gene, transcript and protein sequence, a data table and a data report. Gene datasets can be specified by NCBI Gene ID, symbol or RefSeq accession. Datasets are downloaded as a zip file. + +The default gene dataset includes the following files: + * gene.fna (gene sequences) + * rna.fna (transcript sequences) + * protein.faa (protein sequences) + * data_report.jsonl (data report with gene metadata) + * data_table.tsv (data table with gene metadata, one transcript per row) + * dataset_catalog.json (a list of files and file types included in the dataset) + </help> + +</tool>
