Mercurial > repos > iuc > ncbi_datasets
comparison datasets_gene.xml @ 0:c6009f4d7261 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit fd91cf3000d556d8219426eddb8a3059071a2009"
| author | iuc |
|---|---|
| date | Thu, 15 Jul 2021 13:31:56 +0000 |
| parents | |
| children | 48e0814f250a |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:c6009f4d7261 |
|---|---|
| 1 <tool id="datasets-download-gene" name="NCBI datasets download gene" profile="@PROFILE@" license="@LICENSE" version="@TOOL_VERSION@"> | |
| 2 <description>Download genes from NCBI</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="requirements"></expand> | |
| 7 <command><![CDATA[ | |
| 8 @SETUP_CERTIFICATES@ | |
| 9 datasets download gene $subcommand.download_by | |
| 10 #if $subcommand.download_by != 'taxon': | |
| 11 #if $subcommand.text_or_file.text_or_file == 'text': | |
| 12 #if $subcommand.download_by == 'gene-id': | |
| 13 $subcommand.text_or_file.accession | |
| 14 #else if $subcommand.download_by == 'taxon': | |
| 15 '$subcommand.taxon' | |
| 16 #else | |
| 17 #echo " ".join(f"'{x}'" for x in $subcommand.text_or_file.accession.split(' ') if x) | |
| 18 #end if | |
| 19 #if $subcommand.download_by == 'accession' and $subcommand.text_or_file.taxon_filter: | |
| 20 --taxon-filter '$subcommand.taxon_filter' | |
| 21 #end if | |
| 22 #else | |
| 23 --inputfile '$subcommand.text_or_file.inputfile' | |
| 24 #end if | |
| 25 #else: | |
| 26 '$subcommand.taxon' | |
| 27 #end if | |
| 28 @EXCLUDES_GENE@ | |
| 29 #if $subcommand.download_by == 'accession' and $subcommand.include_flanks_bp: | |
| 30 --include-flanks-bp $subcommand.include_flanks_bp | |
| 31 #end if | |
| 32 && 7z x ncbi_dataset.zip | |
| 33 ]]></command> | |
| 34 <inputs> | |
| 35 <conditional name="subcommand"> | |
| 36 <param name="download_by" type="select" label="Choose how to find genomes to download"> | |
| 37 <option value="gene-id">Download a gene dataset by NCBI Gene ID</option> | |
| 38 <option value="symbol">Download a gene dataset by gene symbol</option> | |
| 39 <option value="accession">Download a gene dataset by RefSeq nucleotide or protein accession</option> | |
| 40 <option value="taxon">Download a gene dataset by taxon</option> | |
| 41 </param> | |
| 42 <when value="gene-id"> | |
| 43 <expand macro="text_or_file" what="gene-id" what_extended="NCBI Gene ID" help="Should be valid NCBI Gene ID"> | |
| 44 <sanitizer invalid_char=""> | |
| 45 <valid initial="string.digits"> | |
| 46 <add value=" " /> | |
| 47 </valid> | |
| 48 </sanitizer> | |
| 49 </expand> | |
| 50 </when> | |
| 51 <when value="symbol"> | |
| 52 <expand macro="text_or_file" what="symbol" what_extended="gene symbol" help="Should be valid gene symbol"/> | |
| 53 <param argument="--taxon" type="text" value="human" label="Specify a species name" help="Species name can be common or scientific name or species-level NCBI Taxonomy ID"/> | |
| 54 </when> | |
| 55 <when value="accession"> | |
| 56 <expand macro="text_or_file" what="accession" what_extended="RefSeq nucleotide or protein accession" help="Should be RefSeq nucleotide or protein accession"/> | |
| 57 <param argument="--include-flanks-bp" type="integer" optional="true" min="0" label="Include gene flanking sequence, limited to prokaryotic genes" help="If not specified flanking gene sequences will not be downloaded. Accession must start with WP"/> | |
| 58 <param argument="--taxon-filter" type="text" optional="true" label="limit genes to a specified taxon" help="any rank"/> | |
| 59 </when> | |
| 60 <when value="taxon"> | |
| 61 <param name="taxon" type="text" label="Enter taxon" help="e.g. human, mouse, bos taurs, etc."></param> | |
| 62 </when> | |
| 63 </conditional> | |
| 64 <expand macro="excludes_gene"></expand> | |
| 65 <conditional name="limit_fasta" label="Limit fasta by accession?"> | |
| 66 <param name="limit" type="select" label="Select limit method"> | |
| 67 <option value="none">None</option> | |
| 68 <option value="text">Enter list of accessions</option> | |
| 69 <option value="file">Read list of accessions from file</option> | |
| 70 </param> | |
| 71 <when value="none"> | |
| 72 </when> | |
| 73 <when value="text"> | |
| 74 <param argument="--fasta-filter" type="text" label="Limit gene fasta download to these accessions"/> | |
| 75 </when> | |
| 76 <when value="file"> | |
| 77 <param argument="--fasta-filter-file" type="data" format="txt" label="File of accessions to limit gene fasta download"/> | |
| 78 </when> | |
| 79 </conditional> | |
| 80 </inputs> | |
| 81 <outputs> | |
| 82 <data name="gene_fasta" format="fasta" label="NCBI datasets gene: gene fasta" from_work_dir="ncbi_dataset/data/gene.fna"> | |
| 83 <filter>not exclude_gene</filter> | |
| 84 </data> | |
| 85 <data name="protein_fasta" format="fasta" label="NCBI datasets gene: protein fasta" from_work_dir="ncbi_dataset/data/protein.faa"> | |
| 86 <filter>not exclude_protein</filter> | |
| 87 </data> | |
| 88 <data name="rna_fasta" format="fasta" label="NCBI datasets gene: rna fasta" from_work_dir="ncbi_dataset/data/rna.fna"> | |
| 89 <filter>not exclude_rna</filter> | |
| 90 </data> | |
| 91 <data name="gene_flanks" format="fasta" label="NCBI datasets gene: flanking sequence fasta" from_work_dir="ncbi_dataset/data/gene_flank.fna"> | |
| 92 <filter><![CDATA[subcommand['include_flanks_bp']]]></filter> | |
| 93 </data> | |
| 94 </outputs> | |
| 95 <tests> | |
| 96 <test title="test download by gene-id"> | |
| 97 <conditional name="subcommand"> | |
| 98 <param name="download_by" value="gene-id"></param> | |
| 99 <conditional name="text_or_file"> | |
| 100 <param name="text_or_file" value="text"></param> | |
| 101 <param name="accession" value="472 672"></param> | |
| 102 </conditional> | |
| 103 </conditional> | |
| 104 <output name="gene_fasta"> | |
| 105 <assert_contents> | |
| 106 <has_line line="CCGCGTCCGCGCTTACCCAATACAAGCCGGGCTACGTCCGAGGGTAACAACATGATCAAAACCACAGCAG"/> | |
| 107 <has_line line="GCTGAGACTTCCTGGACGGGGGACAGGCTGTGGGGTTTCTCAGATAACTGGGCCCCTGCGCTCAGGAGGC"/> | |
| 108 </assert_contents> | |
| 109 </output> | |
| 110 </test> | |
| 111 <test title="test download by gene-id, test sanitizer"> | |
| 112 <conditional name="subcommand"> | |
| 113 <param name="download_by" value="gene-id"></param> | |
| 114 <conditional name="text_or_file"> | |
| 115 <param name="text_or_file" value="text"></param> | |
| 116 <param name="accession" value="472 672"></param> | |
| 117 </conditional> | |
| 118 </conditional> | |
| 119 <output name="gene_fasta"> | |
| 120 <assert_contents> | |
| 121 <has_line line="CCGCGTCCGCGCTTACCCAATACAAGCCGGGCTACGTCCGAGGGTAACAACATGATCAAAACCACAGCAG"/> | |
| 122 <has_line line="GCTGAGACTTCCTGGACGGGGGACAGGCTGTGGGGTTTCTCAGATAACTGGGCCCCTGCGCTCAGGAGGC"/> | |
| 123 </assert_contents> | |
| 124 </output> | |
| 125 <assert_command> | |
| 126 <not_has_text text="exit"/> | |
| 127 </assert_command> | |
| 128 </test> | |
| 129 <test title="test download by gene symbol"> | |
| 130 <conditional name="subcommand"> | |
| 131 <param name="download_by" value="symbol"></param> | |
| 132 <conditional name="text_or_file"> | |
| 133 <param name="text_or_file" value="text"></param> | |
| 134 <param name="accession" value="BRCA1 ATM"></param> | |
| 135 </conditional> | |
| 136 </conditional> | |
| 137 <output name="gene_fasta"> | |
| 138 <assert_contents> | |
| 139 <has_line line="CCGCGTCCGCGCTTACCCAATACAAGCCGGGCTACGTCCGAGGGTAACAACATGATCAAAACCACAGCAG"/> | |
| 140 <has_line line="GCTGAGACTTCCTGGACGGGGGACAGGCTGTGGGGTTTCTCAGATAACTGGGCCCCTGCGCTCAGGAGGC"/> | |
| 141 </assert_contents> | |
| 142 </output> | |
| 143 </test> | |
| 144 <test title="test download by accession"> | |
| 145 <conditional name="subcommand"> | |
| 146 <param name="download_by" value="accession"></param> | |
| 147 <conditional name="text_or_file"> | |
| 148 <param name="text_or_file" value="text"></param> | |
| 149 <param name="accession" value="NM_000546.6 NM_000492.4"></param> | |
| 150 </conditional> | |
| 151 </conditional> | |
| 152 <output name="gene_fasta"> | |
| 153 <assert_contents> | |
| 154 <has_line line="GTAGTAGGTCTTTGGCATTAGGAGCTTGAGCCCAGACGGCCCTAGCAGGGACCCCAGCGCCCGAGAGACC"/> | |
| 155 <has_line line="CTCAAAAGTCTAGAGCCACCGTCCAGGGAGCAGGTAGCTGCTGGGCTCCGGGGACACTTTGCGTTCGGGC"/> | |
| 156 </assert_contents> | |
| 157 </output> | |
| 158 <assert_command> | |
| 159 <has_text text="'NM_000546.6' 'NM_000492.4'"/> | |
| 160 </assert_command> | |
| 161 </test> | |
| 162 <test title="test download by accession with flanking sequence"> | |
| 163 <conditional name="subcommand"> | |
| 164 <param name="download_by" value="accession"></param> | |
| 165 <conditional name="text_or_file"> | |
| 166 <param name="text_or_file" value="text"></param> | |
| 167 <param name="accession" value="WP_004675351.1"></param> | |
| 168 </conditional> | |
| 169 <param name="include_flanks_bp" value="10"/> | |
| 170 </conditional> | |
| 171 <output name="gene_flanks"> | |
| 172 <assert_contents> | |
| 173 <has_line line="gccctgccgcATGATCGATCTGATGCCGACGAGCGAGGAACAGGCGGCGGCGATCGTCCGCACCCATGCG"/> | |
| 174 </assert_contents> | |
| 175 </output> | |
| 176 <assert_command> | |
| 177 <has_text text="--include-flanks-bp 10"/> | |
| 178 </assert_command> | |
| 179 </test> | |
| 180 <test title="test download by taxon"> | |
| 181 <conditional name="subcommand"> | |
| 182 <param name="download_by" value="taxon"></param> | |
| 183 <param name="taxon" value="Mycobacterium tuberculosis H37Rv"></param> | |
| 184 </conditional> | |
| 185 <param name="exclude_rna" value="true"/> | |
| 186 <param name="exclude_protein" value="true"/> | |
| 187 <output name="gene_fasta"> | |
| 188 <assert_contents> | |
| 189 <has_line line="GTGGCGCTGAATATCAAAGACCCTGAGGTAGACCGACTAGCCGCCGAACTCGCTGACCGGCTGCACACCA"/> | |
| 190 </assert_contents> | |
| 191 </output> | |
| 192 </test> | |
| 193 </tests> | |
| 194 <help> | |
| 195 Download a gene dataset including gene, transcript and protein sequence, a data table and a data report. Gene datasets can be specified by NCBI Gene ID, symbol or RefSeq accession. Datasets are downloaded as a zip file. | |
| 196 | |
| 197 The default gene dataset includes the following files: | |
| 198 * gene.fna (gene sequences) | |
| 199 * rna.fna (transcript sequences) | |
| 200 * protein.faa (protein sequences) | |
| 201 * data_report.jsonl (data report with gene metadata) | |
| 202 * data_table.tsv (data table with gene metadata, one transcript per row) | |
| 203 * dataset_catalog.json (a list of files and file types included in the dataset) | |
| 204 </help> | |
| 205 | |
| 206 </tool> |
