Mercurial > repos > iuc > ncbi_datasets
comparison datasets_genome.xml @ 8:d64df2210624 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_datasets commit a58a3198ea1b60b6aa9567c6c65f00f8361794f6"
| author | iuc |
|---|---|
| date | Wed, 04 May 2022 13:15:06 +0000 |
| parents | c87df3f9e19d |
| children | fcd6769c5b7d |
comparison
equal
deleted
inserted
replaced
| 7:80f65b5e0e24 | 8:d64df2210624 |
|---|---|
| 17 '$query.subcommand.taxon' | 17 '$query.subcommand.taxon' |
| 18 #end if | 18 #end if |
| 19 $filters.reference | 19 $filters.reference |
| 20 $filters.annotated | 20 $filters.annotated |
| 21 #if $filters.assembly_level: | 21 #if $filters.assembly_level: |
| 22 --assembly_level $filters.assembly_level | 22 --assembly-level $filters.assembly_level |
| 23 #end if | 23 #end if |
| 24 #if $filters.assembly_source: | 24 #if $filters.assembly_source: |
| 25 --assembly_source $filters.assembly_source | 25 --assembly-source $filters.assembly_source |
| 26 #end if | 26 #end if |
| 27 #if $filters.chromosomes: | 27 #if $filters.chromosomes: |
| 28 --chromosomes '$filters.chromosomes' | 28 --chromosomes '$filters.chromosomes' |
| 29 #end if | 29 #end if |
| 30 @EXCLUDES_GENOME@ | 30 @EXCLUDES_GENOME@ |
| 66 | 66 |
| 67 <repeat name="search" title="Add search terms"> | 67 <repeat name="search" title="Add search terms"> |
| 68 <param argument="--search" type="text" label="Only include genomes that have the specified text in the searchable fields" help="Searchable fields are species and infraspecies, assembly name and submitter"/> | 68 <param argument="--search" type="text" label="Only include genomes that have the specified text in the searchable fields" help="Searchable fields are species and infraspecies, assembly name and submitter"/> |
| 69 </repeat> | 69 </repeat> |
| 70 </section> | 70 </section> |
| 71 <section name="file_choices" title="File Choices"> | 71 <section name="file_choices" title="File Choices" expanded="true"> |
| 72 <expand macro="excludes_genome"></expand> | 72 <expand macro="excludes_genome"></expand> |
| 73 <expand macro="includes_genome"></expand> | 73 <expand macro="includes_genome"></expand> |
| 74 </section> | 74 </section> |
| 75 <param name="uncompressed" type="boolean" label="Uncompress the dataset archive" checked="true"/> | 75 <param name="uncompressed" type="boolean" label="Uncompress the dataset archive" checked="true"/> |
| 76 </inputs> | 76 </inputs> |
| 88 <discover_datasets pattern="(?P<identifier_0>.*?)\/sequence_report.jsonl" ext="json" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | 88 <discover_datasets pattern="(?P<identifier_0>.*?)\/sequence_report.jsonl" ext="json" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> |
| 89 <filter>uncompressed</filter> | 89 <filter>uncompressed</filter> |
| 90 </collection> | 90 </collection> |
| 91 <collection name="genome_fasta" label="NCBI Genome Datasets: genome fasta" type="list"> | 91 <collection name="genome_fasta" label="NCBI Genome Datasets: genome fasta" type="list"> |
| 92 <discover_datasets pattern="(?P<identifier_0>.*?)\/.*(?<!cds_from)(chr|unplaced|_genomic)*fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | 92 <discover_datasets pattern="(?P<identifier_0>.*?)\/.*(?<!cds_from)(chr|unplaced|_genomic)*fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> |
| 93 <filter>uncompressed and file_choices['exclude_seq']</filter> | 93 <filter>uncompressed and not file_choices['exclude_seq']</filter> |
| 94 </collection> | 94 </collection> |
| 95 <collection name="genomic_cds" label="NCBI Genome Datasets: genomic cds fasta" type="list"> | 95 <collection name="genomic_cds" label="NCBI Genome Datasets: genomic cds fasta" type="list"> |
| 96 <discover_datasets pattern="(?P<identifier_0>.*?)\/cds_from_genomic\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | 96 <discover_datasets pattern="(?P<identifier_0>.*?)\/cds_from_genomic\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> |
| 97 <filter>uncompressed and file_choices['exclude_genomic_cds']</filter> | 97 <filter>uncompressed and not file_choices['exclude_genomic_cds']</filter> |
| 98 </collection> | 98 </collection> |
| 99 <collection name="genomic_gff" label="NCBI Genome Datasets: genomic gff3" type="list"> | 99 <collection name="genomic_gff" label="NCBI Genome Datasets: genomic gff3" type="list"> |
| 100 <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gff" ext="gff3" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | 100 <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gff" ext="gff3" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> |
| 101 <filter>uncompressed and file_choices['exclude_gff3']</filter> | 101 <filter>uncompressed and not file_choices['exclude_gff3']</filter> |
| 102 </collection> | 102 </collection> |
| 103 <collection name="rna_fasta" label="NCBI Genome Datasets: RNA fasta" type="list"> | 103 <collection name="rna_fasta" label="NCBI Genome Datasets: RNA fasta" type="list"> |
| 104 <discover_datasets pattern="(?P<identifier_0>.*?)\/rna\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | 104 <discover_datasets pattern="(?P<identifier_0>.*?)\/rna\.fna" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> |
| 105 <filter>uncompressed and file_choices['exclude_rna']</filter> | 105 <filter>uncompressed and not file_choices['exclude_rna']</filter> |
| 106 </collection> | 106 </collection> |
| 107 <collection name="protein_fasta" label="NCBI Genome Datasets: protein fasta" type="list"> | 107 <collection name="protein_fasta" label="NCBI Genome Datasets: protein fasta" type="list"> |
| 108 <discover_datasets pattern="(?P<identifier_0>.*?)\/protein\.faa" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | 108 <discover_datasets pattern="(?P<identifier_0>.*?)\/protein\.faa" ext="fasta" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> |
| 109 <filter>uncompressed and file_choices['exclude_protein']</filter> | 109 <filter>uncompressed and not file_choices['exclude_protein']</filter> |
| 110 </collection> | 110 </collection> |
| 111 <collection name="genomic_gbff" label="NCBI Genome Datasets: GenBank flatfile" type="list"> | 111 <collection name="genomic_gbff" label="NCBI Genome Datasets: GenBank flatfile" type="list"> |
| 112 <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gbff" ext="txt" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> | 112 <discover_datasets pattern="(?P<identifier_0>.*?)\/genomic\.gbff" ext="txt" directory="ncbi_dataset/data" recurse="true" match_relative_path="true"></discover_datasets> |
| 113 <filter>uncompressed and file_choices['include_gbff']</filter> | 113 <filter>uncompressed and file_choices['include_gbff']</filter> |
| 114 </collection> | 114 </collection> |
| 123 <param name="download_by" value="taxon"></param> | 123 <param name="download_by" value="taxon"></param> |
| 124 <param name="text_or_file" value="text"></param> | 124 <param name="text_or_file" value="text"></param> |
| 125 <param name="taxon" value="human"></param> | 125 <param name="taxon" value="human"></param> |
| 126 </conditional> | 126 </conditional> |
| 127 <param name="chromosomes" value="21"></param> | 127 <param name="chromosomes" value="21"></param> |
| 128 <param name="exclude_protein" value="true"/> | |
| 129 <param name="exclude_rna" value="true"/> | |
| 130 <param name="exclude_seq" value="true"/> | |
| 131 <param name="exclude_genomic_cds" value="true"/> | |
| 132 <param name="exclude_gff3" value="true"/> | |
| 128 <param name="uncompressed" value="false"/> | 133 <param name="uncompressed" value="false"/> |
| 129 <param name="released_before" value="01/01/2018"></param> | 134 <param name="released_before" value="01/01/2018"></param> |
| 130 <output name="archive_contents"> | 135 <output name="archive_contents"> |
| 131 <assert_contents> | 136 <assert_contents> |
| 132 <has_text text="ncbi_dataset/data/dataset_catalog.json"/> | 137 <has_text text="ncbi_dataset/data/dataset_catalog.json"/> |
| 133 </assert_contents> | 138 </assert_contents> |
| 134 </output> | 139 </output> |
| 135 </test> | 140 </test> |
| 136 <test expect_num_outputs="5"> | 141 <test expect_num_outputs="2"> |
| 142 <conditional name="query|subcommand"> | |
| 143 <param name="download_by" value="taxon"></param> | |
| 144 <param name="text_or_file" value="text"></param> | |
| 145 <param name="taxon" value="human"></param> | |
| 146 </conditional> | |
| 147 <param name="chromosomes" value="21"></param> | |
| 148 <param name="uncompressed" value="false"/> | |
| 149 <param name="exclude_protein" value="true"/> | |
| 150 <param name="exclude_rna" value="true"/> | |
| 151 <param name="exclude_seq" value="true"/> | |
| 152 <param name="exclude_genomic_cds" value="true"/> | |
| 153 <param name="exclude_gff3" value="true"/> | |
| 154 <param name="assembly_level" value="chromosome"/> | |
| 155 <param name="released_before" value="01/01/2018"></param> | |
| 156 <output name="archive_contents"> | |
| 157 <assert_contents> | |
| 158 <has_text text="ncbi_dataset/data/dataset_catalog.json"/> | |
| 159 </assert_contents> | |
| 160 </output> | |
| 161 </test> | |
| 162 <test expect_num_outputs="4"> | |
| 137 <conditional name="query|subcommand"> | 163 <conditional name="query|subcommand"> |
| 138 <param name="download_by" value="accession"></param> | 164 <param name="download_by" value="accession"></param> |
| 139 <conditional name="text_or_file"> | 165 <conditional name="text_or_file"> |
| 140 <param name="text_or_file" value="text"></param> | 166 <param name="text_or_file" value="text"></param> |
| 141 <param name="accession" value="GCF_000013305.1 GCF_000007445.1"></param> | 167 <param name="accession" value="GCF_000013305.1 GCF_000007445.1"></param> |
| 142 </conditional> | 168 </conditional> |
| 143 </conditional> | 169 </conditional> |
| 144 <param name="uncompressed" value="true"/> | 170 <param name="uncompressed" value="true"/> |
| 145 <param name="released_before" value="01/01/2007"></param> | 171 <param name="released_before" value="01/01/2007"></param> |
| 146 <param name="exclude_genomic_cds" value="true"/> | 172 <param name="exclude_protein" value="true"/> |
| 173 <param name="exclude_rna" value="true"/> | |
| 174 <param name="exclude_seq" value="true"/> | |
| 175 <param name="exclude_gff3" value="true"/> | |
| 147 <param name="include_gtf" value="true"/> | 176 <param name="include_gtf" value="true"/> |
| 148 <output name="genome_data_report"> | 177 <output name="genome_data_report"> |
| 149 <assert_contents> | 178 <assert_contents> |
| 150 <has_text text="GCF_000013305.1"/> | 179 <has_text text="GCF_000013305.1"/> |
| 151 </assert_contents> | 180 </assert_contents> |
| 152 </output> | 181 </output> |
| 153 <output_collection name="sequence_report" type="list"> | |
| 154 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.seq.rpt.jsonl" compare="contains"/> | |
| 155 <element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.seq.rpt.jsonl" compare="contains"/> | |
| 156 </output_collection> | |
| 157 <output_collection name="genomic_gtf" type="list"> | 182 <output_collection name="genomic_gtf" type="list"> |
| 158 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.gtf" compare="contains"/> | 183 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.genomic.gtf" compare="contains"/> |
| 159 <element name="GCF_000007445.1" file="GCF_000007445.1.genomic.gtf" compare="contains"/> | 184 <element name="GCF_000007445.1" file="GCF_000007445.1.genomic.gtf" compare="contains"/> |
| 160 </output_collection> | 185 </output_collection> |
| 161 <output_collection name="genomic_cds" type="list"> | 186 <output_collection name="genomic_cds" type="list"> |
| 170 <param name="text_or_file" value="file"></param> | 195 <param name="text_or_file" value="file"></param> |
| 171 <param name="inputfile" value="accessions.txt"></param> | 196 <param name="inputfile" value="accessions.txt"></param> |
| 172 </conditional> | 197 </conditional> |
| 173 </conditional> | 198 </conditional> |
| 174 <param name="include_gbff" value="true"/> | 199 <param name="include_gbff" value="true"/> |
| 175 <param name="exclude_seq" value="false"/> | 200 <param name="exclude_protein" value="true"/> |
| 176 <param name="exclude_gff3" value="true"/> | 201 <param name="exclude_rna" value="true"/> |
| 202 <param name="exclude_seq" value="true"/> | |
| 203 <param name="exclude_genomic_cds" value="true"/> | |
| 177 <param name="uncompressed" value="true"/> | 204 <param name="uncompressed" value="true"/> |
| 178 <param name="released_before" value="01/02/2007"></param> | 205 <param name="released_before" value="01/02/2007"></param> |
| 179 <output name="genome_data_report"> | 206 <output name="genome_data_report"> |
| 180 <assert_contents> | 207 <assert_contents> |
| 181 <has_text text="SAMN02604181"/> | 208 <has_text text="SAMN02604181"/> |
| 182 </assert_contents> | 209 </assert_contents> |
| 183 </output> | 210 </output> |
| 184 <output_collection name="sequence_report" type="list"> | |
| 185 <element name="GCF_000013305.1" file="genome.2.GCF_000013305.1.seq.rpt.jsonl" compare="contains"/> | |
| 186 <element name="GCF_000007445.1" file="genome.2.GCF_000007445.1.seq.rpt.jsonl" compare="contains"/> | |
| 187 </output_collection> | |
| 188 <output_collection name="genomic_gff" type="list"> | 211 <output_collection name="genomic_gff" type="list"> |
| 189 <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gff" compare="contains"/> | 212 <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gff" compare="contains"/> |
| 190 <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gff" compare="contains"/> | 213 <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gff" compare="contains"/> |
| 191 </output_collection> | 214 </output_collection> |
| 192 <output_collection name="genomic_gbff" type="list"> | 215 <output_collection name="genomic_gbff" type="list"> |
| 193 <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gbff" compare="contains"/> | 216 <element name="GCF_000013305.1" file="genome.3.GCF_000013305.1.genomic.gbff" compare="contains"/> |
| 194 <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gbff" compare="contains"/> | 217 <element name="GCF_000007445.1" file="genome.3.GCF_000007445.1.genomic.gbff" compare="contains"/> |
| 218 </output_collection> | |
| 219 </test> | |
| 220 <test expect_num_outputs="2"> | |
| 221 <conditional name="query|subcommand"> | |
| 222 <param name="download_by" value="accession"></param> | |
| 223 <conditional name="text_or_file"> | |
| 224 <param name="text_or_file" value="text"></param> | |
| 225 <param name="accession" value="GCF_000001405.40"></param> | |
| 226 </conditional> | |
| 227 </conditional> | |
| 228 <param name="exclude_protein" value="true"/> | |
| 229 <param name="exclude_rna" value="true"/> | |
| 230 <param name="exclude_seq" value="true"/> | |
| 231 <param name="exclude_genomic_cds" value="true"/> | |
| 232 <param name="exclude_gff3" value="true"/> | |
| 233 <param name="uncompressed" value="true"/> | |
| 234 <param name="released_before" value="01/02/2007"></param> | |
| 235 <output_collection name="sequence_report"> | |
| 236 <element name="GCF_000001405.40" file="GCF_000001405.40.seq.rpt.jsonl" compare="contains"/> | |
| 195 </output_collection> | 237 </output_collection> |
| 196 </test> | 238 </test> |
| 197 </tests> | 239 </tests> |
| 198 <help> | 240 <help> |
| 199 <![CDATA[ | 241 <![CDATA[ |
| 201 | 243 |
| 202 Download a genome dataset including genome, transcript and protein sequence, annotation and a detailed data report. | 244 Download a genome dataset including genome, transcript and protein sequence, annotation and a detailed data report. |
| 203 Genome datasets can be specified by NCBI Assembly or BioProject accession or taxon. Datasets are downloaded as a zip file. | 245 Genome datasets can be specified by NCBI Assembly or BioProject accession or taxon. Datasets are downloaded as a zip file. |
| 204 | 246 |
| 205 Tthe default genome dataset includes the following files (if available): | 247 Tthe default genome dataset includes the following files (if available): |
| 248 * data_report.jsonl (genome assembly and annotation metadata, not always available) | |
| 206 * genomic.fna (genomic sequences) | 249 * genomic.fna (genomic sequences) |
| 207 * rna.fna (transcript sequences) | 250 * rna.fna (transcript sequences) |
| 208 * protein.faa (protein sequences) | 251 * protein.faa (protein sequences) |
| 209 * genomic.gff (genome annotation in gff3 format) | 252 * genomic.gff (genome annotation in gff3 format) |
| 210 * data_report.jsonl (data report with genome assembly and annotation metadata) | |
| 211 * dataset_catalog.json (a list of files and file types included in the dataset) | 253 * dataset_catalog.json (a list of files and file types included in the dataset) |
| 212 ]]> | 254 ]]> |
| 213 </help> | 255 </help> |
| 214 | 256 |
| 215 </tool> | 257 </tool> |
