Mercurial > repos > iuc > ncbi_datasets
diff datasets_genome.xml @ 0:c6009f4d7261 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit fd91cf3000d556d8219426eddb8a3059071a2009"
| author | iuc |
|---|---|
| date | Thu, 15 Jul 2021 13:31:56 +0000 |
| parents | |
| children | 2753a5786114 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/datasets_genome.xml Thu Jul 15 13:31:56 2021 +0000 @@ -0,0 +1,175 @@ +<tool id="datasets-download-genome" name="NCBI datasets download genome" profile="@PROFILE@" license="@LICENSE" version="@TOOL_VERSION@"> + <description>Download assembled genomes from NCBI</description> + <macros> + <import>macros.xml</import> + </macros> + <expand macro="requirements"></expand> + <command><![CDATA[ +@SETUP_CERTIFICATES@ +datasets download genome $subcommand.download_by +#if $subcommand.download_by == 'accession': + #if $subcommand.text_or_file.text_or_file == 'text': + '$subcommand.text_or_file.accession' + #else + --inputfile '$subcommand.text_or_file.inputfile' + #end if +#else: + '$subcommand.taxon' +#end if +$annotated +$dehydrated +#if $assembly_level: +--assembly_level $assembly_level +#end if +#if $assembly_source: +--assembly_source $assembly_source +#end if +--chromosomes '$chromosomes' +@EXCLUDES_GENOME@ +@INCLUDES_GENOME@ +$reference +@RELEASED_BEFORE@ +@RELEASED_SINCE@ +#for search_term in $search: + --search '$search_term' +#end for +#if not $dehydrated: + && 7z x ncbi_dataset.zip +#end if +]]></command> + <inputs> + <conditional name="subcommand"> + <param name="download_by" type="select" label="Choose how to find genomes to download"> + <option value="accession">Download by NCBI assembly or BioProject accession</option> + <option value="taxon">Download by taxon</option> + </param> + <when value="accession"> + <expand macro="text_or_file"/> + </when> + <when value="taxon"> + <param name="taxon" type="text" label="Enter taxon" help="e.g. human, mouse, bos taurs, etc."></param> + </when> + </conditional> + <expand macro="annotation"></expand> + <expand macro="dehydrated"></expand> + <expand macro="assembly_level"></expand> + <expand macro="assembly_source"></expand> + <expand macro="chromosomes"></expand> + <expand macro="excludes_genome"></expand> + <expand macro="includes_genome"></expand> + <expand macro="released_options"></expand> + <expand macro="released_options" before_or_after="since"></expand> + <param argument="--reference" type="boolean" truevalue="--reference" falsevalue="" label="Limit to reference and representative (GCF_ and GCA_) assemblies"/> + <repeat name="search" title="Add search terms"> + <param argument="--search" type="text" label="Only include genomes that have the specified text in the searchable fields" help="Searchable fields are species and infraspecies, assembly name and submitter"/> + </repeat> + </inputs> + <outputs> + <data name="dehydrated_archive" format="zip" label="Dehydrated Archive" from_work_dir="ncbi_dataset.zip"> + <filter>dehydrated</filter> + </data> + <collection name="genome_fasta" label="NCBI genome datasets: genome fasta" type="list"> + <discover_datasets pattern="(?P<identifier_0>.*?)\/.*_genomic\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> + <filter>not dehydrated and not exclude_seq</filter> + </collection> + <collection name="protein_fasta" label="NCBI genome datasets: protein fasta" type="list"> + <discover_datasets pattern="(?P<identifier_0>.*?)\/protein\.faa" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> + <filter>not dehydrated and not exclude_protein</filter> + </collection> + <collection name="genomic_gff" label="NCBI genome datasets: genomic gff" type="list"> + <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gff" ext="gff3" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> + <filter>not dehydrated and not exclude_gff3</filter> + </collection> + <collection name="genomic_gtf" label="NCBI genome datasets: genomic gtf" type="list"> + <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gtf" ext="gtf" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> + <filter>not dehydrated and include_gtf</filter> + </collection> + <collection name="genomic_gbff" label="NCBI genome datasets: genomic gbff" type="list"> + <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gbff" ext="genbank" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> + <filter>not dehydrated and include_gbff</filter> + </collection> + </outputs> + <tests> + <test title="test dehydrated download by taxon"> + <conditional name="subcommand"> + <param name="download_by" value="taxon"></param> + <param name="text_or_file" value="text"></param> + <param name="taxon" value="human"></param> + </conditional> + <param name="chromosomes" value="21"></param> + <param name="dehydrated" value="true"/> + <param name="released_before" value="01/01/2018"></param> + <output name="dehydrated_archive" value="human_chrom_21_dehydrated.zip" compare="sim_size" delta="10000"/> + </test> + <test title="test download by comma-separated accession"> + <conditional name="subcommand"> + <param name="download_by" value="accession"></param> + <conditional name="text_or_file"> + <param name="text_or_file" value="text"></param> + <param name="accession" value="GCF_000013305.1,GCF_000007445.1"></param> + </conditional> + </conditional> + <param name="dehydrated" value="false"/> + <param name="released_before" value="01/01/2007"></param> + <output_collection name="genome_fasta" type="list"> + <element name="GCF_000013305.1" file="GCF_000013305.1.genome.fa" compare="contains"/> + <element name="GCF_000007445.1" file="GCF_000007445.1.genome.fa" compare="contains"/> + </output_collection> + <output_collection name="protein_fasta" type="list"> + <element name="GCF_000013305.1" file="GCF_000013305.1.protein.fa" compare="contains"/> + <element name="GCF_000007445.1" file="GCF_000007445.1.protein.fa" compare="contains"/> + </output_collection> + <output_collection name="genomic_gff" type="list"> + <element name="GCF_000013305.1" file="GCF_000013305.1.genomic.gff" compare="contains"/> + <element name="GCF_000007445.1" file="GCF_000007445.1.genomic.gff" compare="contains"/> + </output_collection> + </test> + <test title="test download by accessions listed in file"> + <conditional name="subcommand"> + <param name="download_by" value="accession"></param> + <conditional name="text_or_file"> + <param name="text_or_file" value="file"></param> + <param name="inputfile" value="accessions.txt"></param> + </conditional> + </conditional> + <param name="include_gbff" value="true"/> + <param name="include_gtf" value="true"/> + <param name="dehydrated" value="false"/> + <param name="released_before" value="01/01/2007"></param> + <output_collection name="genome_fasta" type="list"> + <element name="GCF_000013305.1" file="GCF_000013305.1.genome.fa" compare="contains"/> + <element name="GCF_000007445.1" file="GCF_000007445.1.genome.fa" compare="contains"/> + </output_collection> + <output_collection name="protein_fasta" type="list"> + <element name="GCF_000013305.1" file="GCF_000013305.1.protein.fa" compare="contains"/> + <element name="GCF_000007445.1" file="GCF_000007445.1.protein.fa" compare="contains"/> + </output_collection> + <output_collection name="genomic_gff" type="list"> + <element name="GCF_000013305.1" file="GCF_000013305.1.genomic.gff" compare="contains"/> + <element name="GCF_000007445.1" file="GCF_000007445.1.genomic.gff" compare="contains"/> + </output_collection> + <output_collection name="genomic_gtf" type="list"> + <element name="GCF_000013305.1" file="GCF_000013305.1.genomic.gtf" compare="contains"/> + <element name="GCF_000007445.1" file="GCF_000007445.1.genomic.gtf" compare="contains"/> + </output_collection> + <output_collection name="genomic_gbff" type="list"> + <element name="GCF_000013305.1" file="GCF_000013305.1.genomic.gbff" compare="contains"/> + <element name="GCF_000007445.1" file="GCF_000007445.1.genomic.gbff" compare="contains"/> + </output_collection> + </test> + </tests> + <help> + +Download a genome dataset including genome, transcript and protein sequence, annotation and a detailed data report. +Genome datasets can be specified by NCBI Assembly or BioProject accession or taxon. Datasets are downloaded as a zip file. + +The default genome dataset includes the following files (if available): +* genomic.fna (genomic sequences) +* rna.fna (transcript sequences) +* protein.faa (protein sequences) +* genomic.gff (genome annotation in gff3 format) +* data_report.jsonl (data report with genome assembly and annotation metadata) +* dataset_catalog.json (a list of files and file types included in the dataset) + </help> + +</tool>
