comparison datasets_gene.xml @ 0:c6009f4d7261 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit fd91cf3000d556d8219426eddb8a3059071a2009"
author iuc
date Thu, 15 Jul 2021 13:31:56 +0000
parents
children 48e0814f250a
comparison
equal deleted inserted replaced
-1:000000000000 0:c6009f4d7261
1 <tool id="datasets-download-gene" name="NCBI datasets download gene" profile="@PROFILE@" license="@LICENSE" version="@TOOL_VERSION@">
2 <description>Download genes from NCBI</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"></expand>
7 <command><![CDATA[
8 @SETUP_CERTIFICATES@
9 datasets download gene $subcommand.download_by
10 #if $subcommand.download_by != 'taxon':
11 #if $subcommand.text_or_file.text_or_file == 'text':
12 #if $subcommand.download_by == 'gene-id':
13 $subcommand.text_or_file.accession
14 #else if $subcommand.download_by == 'taxon':
15 '$subcommand.taxon'
16 #else
17 #echo " ".join(f"'{x}'" for x in $subcommand.text_or_file.accession.split(' ') if x)
18 #end if
19 #if $subcommand.download_by == 'accession' and $subcommand.text_or_file.taxon_filter:
20 --taxon-filter '$subcommand.taxon_filter'
21 #end if
22 #else
23 --inputfile '$subcommand.text_or_file.inputfile'
24 #end if
25 #else:
26 '$subcommand.taxon'
27 #end if
28 @EXCLUDES_GENE@
29 #if $subcommand.download_by == 'accession' and $subcommand.include_flanks_bp:
30 --include-flanks-bp $subcommand.include_flanks_bp
31 #end if
32 && 7z x ncbi_dataset.zip
33 ]]></command>
34 <inputs>
35 <conditional name="subcommand">
36 <param name="download_by" type="select" label="Choose how to find genomes to download">
37 <option value="gene-id">Download a gene dataset by NCBI Gene ID</option>
38 <option value="symbol">Download a gene dataset by gene symbol</option>
39 <option value="accession">Download a gene dataset by RefSeq nucleotide or protein accession</option>
40 <option value="taxon">Download a gene dataset by taxon</option>
41 </param>
42 <when value="gene-id">
43 <expand macro="text_or_file" what="gene-id" what_extended="NCBI Gene ID" help="Should be valid NCBI Gene ID">
44 <sanitizer invalid_char="">
45 <valid initial="string.digits">
46 <add value=" " />
47 </valid>
48 </sanitizer>
49 </expand>
50 </when>
51 <when value="symbol">
52 <expand macro="text_or_file" what="symbol" what_extended="gene symbol" help="Should be valid gene symbol"/>
53 <param argument="--taxon" type="text" value="human" label="Specify a species name" help="Species name can be common or scientific name or species-level NCBI Taxonomy ID"/>
54 </when>
55 <when value="accession">
56 <expand macro="text_or_file" what="accession" what_extended="RefSeq nucleotide or protein accession" help="Should be RefSeq nucleotide or protein accession"/>
57 <param argument="--include-flanks-bp" type="integer" optional="true" min="0" label="Include gene flanking sequence, limited to prokaryotic genes" help="If not specified flanking gene sequences will not be downloaded. Accession must start with WP"/>
58 <param argument="--taxon-filter" type="text" optional="true" label="limit genes to a specified taxon" help="any rank"/>
59 </when>
60 <when value="taxon">
61 <param name="taxon" type="text" label="Enter taxon" help="e.g. human, mouse, bos taurs, etc."></param>
62 </when>
63 </conditional>
64 <expand macro="excludes_gene"></expand>
65 <conditional name="limit_fasta" label="Limit fasta by accession?">
66 <param name="limit" type="select" label="Select limit method">
67 <option value="none">None</option>
68 <option value="text">Enter list of accessions</option>
69 <option value="file">Read list of accessions from file</option>
70 </param>
71 <when value="none">
72 </when>
73 <when value="text">
74 <param argument="--fasta-filter" type="text" label="Limit gene fasta download to these accessions"/>
75 </when>
76 <when value="file">
77 <param argument="--fasta-filter-file" type="data" format="txt" label="File of accessions to limit gene fasta download"/>
78 </when>
79 </conditional>
80 </inputs>
81 <outputs>
82 <data name="gene_fasta" format="fasta" label="NCBI datasets gene: gene fasta" from_work_dir="ncbi_dataset/data/gene.fna">
83 <filter>not exclude_gene</filter>
84 </data>
85 <data name="protein_fasta" format="fasta" label="NCBI datasets gene: protein fasta" from_work_dir="ncbi_dataset/data/protein.faa">
86 <filter>not exclude_protein</filter>
87 </data>
88 <data name="rna_fasta" format="fasta" label="NCBI datasets gene: rna fasta" from_work_dir="ncbi_dataset/data/rna.fna">
89 <filter>not exclude_rna</filter>
90 </data>
91 <data name="gene_flanks" format="fasta" label="NCBI datasets gene: flanking sequence fasta" from_work_dir="ncbi_dataset/data/gene_flank.fna">
92 <filter><![CDATA[subcommand['include_flanks_bp']]]></filter>
93 </data>
94 </outputs>
95 <tests>
96 <test title="test download by gene-id">
97 <conditional name="subcommand">
98 <param name="download_by" value="gene-id"></param>
99 <conditional name="text_or_file">
100 <param name="text_or_file" value="text"></param>
101 <param name="accession" value="472 672"></param>
102 </conditional>
103 </conditional>
104 <output name="gene_fasta">
105 <assert_contents>
106 <has_line line="CCGCGTCCGCGCTTACCCAATACAAGCCGGGCTACGTCCGAGGGTAACAACATGATCAAAACCACAGCAG"/>
107 <has_line line="GCTGAGACTTCCTGGACGGGGGACAGGCTGTGGGGTTTCTCAGATAACTGGGCCCCTGCGCTCAGGAGGC"/>
108 </assert_contents>
109 </output>
110 </test>
111 <test title="test download by gene-id, test sanitizer">
112 <conditional name="subcommand">
113 <param name="download_by" value="gene-id"></param>
114 <conditional name="text_or_file">
115 <param name="text_or_file" value="text"></param>
116 <param name="accession" value="472 672"></param>
117 </conditional>
118 </conditional>
119 <output name="gene_fasta">
120 <assert_contents>
121 <has_line line="CCGCGTCCGCGCTTACCCAATACAAGCCGGGCTACGTCCGAGGGTAACAACATGATCAAAACCACAGCAG"/>
122 <has_line line="GCTGAGACTTCCTGGACGGGGGACAGGCTGTGGGGTTTCTCAGATAACTGGGCCCCTGCGCTCAGGAGGC"/>
123 </assert_contents>
124 </output>
125 <assert_command>
126 <not_has_text text="exit"/>
127 </assert_command>
128 </test>
129 <test title="test download by gene symbol">
130 <conditional name="subcommand">
131 <param name="download_by" value="symbol"></param>
132 <conditional name="text_or_file">
133 <param name="text_or_file" value="text"></param>
134 <param name="accession" value="BRCA1 ATM"></param>
135 </conditional>
136 </conditional>
137 <output name="gene_fasta">
138 <assert_contents>
139 <has_line line="CCGCGTCCGCGCTTACCCAATACAAGCCGGGCTACGTCCGAGGGTAACAACATGATCAAAACCACAGCAG"/>
140 <has_line line="GCTGAGACTTCCTGGACGGGGGACAGGCTGTGGGGTTTCTCAGATAACTGGGCCCCTGCGCTCAGGAGGC"/>
141 </assert_contents>
142 </output>
143 </test>
144 <test title="test download by accession">
145 <conditional name="subcommand">
146 <param name="download_by" value="accession"></param>
147 <conditional name="text_or_file">
148 <param name="text_or_file" value="text"></param>
149 <param name="accession" value="NM_000546.6 NM_000492.4"></param>
150 </conditional>
151 </conditional>
152 <output name="gene_fasta">
153 <assert_contents>
154 <has_line line="GTAGTAGGTCTTTGGCATTAGGAGCTTGAGCCCAGACGGCCCTAGCAGGGACCCCAGCGCCCGAGAGACC"/>
155 <has_line line="CTCAAAAGTCTAGAGCCACCGTCCAGGGAGCAGGTAGCTGCTGGGCTCCGGGGACACTTTGCGTTCGGGC"/>
156 </assert_contents>
157 </output>
158 <assert_command>
159 <has_text text="'NM_000546.6' 'NM_000492.4'"/>
160 </assert_command>
161 </test>
162 <test title="test download by accession with flanking sequence">
163 <conditional name="subcommand">
164 <param name="download_by" value="accession"></param>
165 <conditional name="text_or_file">
166 <param name="text_or_file" value="text"></param>
167 <param name="accession" value="WP_004675351.1"></param>
168 </conditional>
169 <param name="include_flanks_bp" value="10"/>
170 </conditional>
171 <output name="gene_flanks">
172 <assert_contents>
173 <has_line line="gccctgccgcATGATCGATCTGATGCCGACGAGCGAGGAACAGGCGGCGGCGATCGTCCGCACCCATGCG"/>
174 </assert_contents>
175 </output>
176 <assert_command>
177 <has_text text="--include-flanks-bp 10"/>
178 </assert_command>
179 </test>
180 <test title="test download by taxon">
181 <conditional name="subcommand">
182 <param name="download_by" value="taxon"></param>
183 <param name="taxon" value="Mycobacterium tuberculosis H37Rv"></param>
184 </conditional>
185 <param name="exclude_rna" value="true"/>
186 <param name="exclude_protein" value="true"/>
187 <output name="gene_fasta">
188 <assert_contents>
189 <has_line line="GTGGCGCTGAATATCAAAGACCCTGAGGTAGACCGACTAGCCGCCGAACTCGCTGACCGGCTGCACACCA"/>
190 </assert_contents>
191 </output>
192 </test>
193 </tests>
194 <help>
195 Download a gene dataset including gene, transcript and protein sequence, a data table and a data report. Gene datasets can be specified by NCBI Gene ID, symbol or RefSeq accession. Datasets are downloaded as a zip file.
196
197 The default gene dataset includes the following files:
198 * gene.fna (gene sequences)
199 * rna.fna (transcript sequences)
200 * protein.faa (protein sequences)
201 * data_report.jsonl (data report with gene metadata)
202 * data_table.tsv (data table with gene metadata, one transcript per row)
203 * dataset_catalog.json (a list of files and file types included in the dataset)
204 </help>
205
206 </tool>