Mercurial > repos > dfornika > ncbi_acc_download
changeset 11:c3b515a4a680 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_acc_download commit 0e2a85f7fd091be4b0b45520cc0284d9fc463793-dirty"
author | dfornika |
---|---|
date | Thu, 28 Nov 2019 20:42:04 +0000 |
parents | 1abc6c72287e |
children | 4c5a3a1ab361 |
files | ncbi_acc_download.xml |
diffstat | 1 files changed, 124 insertions(+), 41 deletions(-) [+] |
line wrap: on
line diff
--- a/ncbi_acc_download.xml Mon Nov 25 21:24:08 2019 +0000 +++ b/ncbi_acc_download.xml Thu Nov 28 20:42:04 2019 +0000 @@ -4,13 +4,13 @@ <import>macros.xml</import> </macros> <requirements> - <requirement type="package" version="@TOOL_VERSION@">ncbi-acc-download</requirement> + <requirement type="package" version="@TOOL_VERSION@">ncbi-acc-download</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ #if $query_source.select == "accession_file": cp $query_source.accession_file accessions && #else if $query_source.select == "accession_list": - echo '$query_source.accession_list' | sed 's/\,/\n/g' > accessions && + echo '$query_source.accession_list' | sed -r 's/(\,|__cn__)/\n/g' > accessions && #end if mkdir outdir && cd outdir && @@ -20,7 +20,7 @@ --format '${molecule.format}' --extended-validation all \${accession} && sleep 5; - done < ../accessions + done < ../accessions 2>> ../error.log ]]></command> <inputs> <conditional name="query_source"> @@ -32,7 +32,7 @@ <param label="Accession File" name="accession_file" type="data" format="text,tabular"/> </when> <when value="accession_list"> - <param label="ID List" name="accession_list" type="text" area="true" help="Newline/Comma separated list of IDs"/> + <param label="ID List" name="accession_list" type="text" area="true" help="Newline/Comma separated list of IDs"/> </when> </conditional> <conditional name="molecule"> @@ -56,23 +56,31 @@ </conditional> </inputs> <outputs> - <collection name="output" type="list" label="Output"> - <filter>options['fasta']</filter> - <discover_datasets pattern="__designation_and_ext__" directory="outdir" /> + <collection name="output" type="list" label="Downloaded Files"> + <discover_datasets pattern="(?P<name>.+)\.fa$" directory="outdir" format="fasta"/> + <discover_datasets pattern="(?P<name>.+)\.gbk$" directory="outdir" format="genbank"/> + <discover_datasets pattern="(?P<name>.+)\.gff$" directory="outdir" format="gff"/> + <discover_datasets pattern="(?P<name>.+)\.ft$" directory="outdir" format="text"/> </collection> + <data name="error_log" from_workdir="error.log" /> </outputs> <tests> <test> - <param name="molecule" value="nucleotide"/> - <param name="format" value="fasta"/> - <param name="accession_file" value="accessions_1.tsv"/> + <conditional name="molecule"> + <param name="select" value="nucleotide"/> + <param name="format" value="fasta"/> + </conditional> + <conditional name="query_source"> + <param name="select" value="accession_file" /> + <param name="accession_file" value="accessions_1.tsv"/> + </conditional> <output_collection name="output" type="list"> - <element name="CP011064"> + <element name="CP011064" ftype="fasta"> <assert_contents> <has_line line=">CP011064.1 Escherichia coli str. Sanji plasmid pSJ_94, complete sequence" /> </assert_contents> </element> - <element name="CP021680"> + <element name="CP021680" ftype="fasta"> <assert_contents> <has_line line=">CP021680.1 Escherichia coli strain AR_0162 plasmid tig00002623, complete sequence" /> </assert_contents> @@ -80,16 +88,21 @@ </output_collection> </test> <test> - <param name="molecule" value="nucleotide"/> - <param name="format" value="genbank"/> - <param name="accession_file" value="accessions_1.tsv"/> + <conditional name="molecule"> + <param name="select" value="nucleotide"/> + <param name="format" value="genbank"/> + </conditional> + <conditional name="query_source"> + <param name="select" value="accession_file" /> + <param name="accession_file" value="accessions_1.tsv"/> + </conditional> <output_collection name="output" type="list"> - <element name="CP011064" > + <element name="CP011064" ftype="genbank"> <assert_contents> <has_line line="DEFINITION Escherichia coli str. Sanji plasmid pSJ_94, complete sequence." /> </assert_contents> </element> - <element name="CP021680"> + <element name="CP021680" ftype="genbank"> <assert_contents> <has_line line="DEFINITION Escherichia coli strain AR_0162 plasmid tig00002623, complete" /> </assert_contents> @@ -97,17 +110,65 @@ </output_collection> </test> <test> - <param name="molecule" value="nucleotide"/> - <param name="format" value="fasta"/> - <param name="select" value="accession_list"/> - <param name="accession_list" value="CP011064,CP021680"/> + <conditional name="molecule"> + <param name="select" value="nucleotide"/> + <param name="format" value="gff3"/> + </conditional> + <conditional name="query_source"> + <param name="select" value="accession_file" /> + <param name="accession_file" value="accessions_1.tsv"/> + </conditional> <output_collection name="output" type="list"> - <element name="CP011064"> + <element name="CP011064" ftype="gff"> + <assert_contents> + <has_line line="##sequence-region CP011064.1 1 94712" /> + </assert_contents> + </element> + <element name="CP021680" ftype="gff"> + <assert_contents> + <has_line line="##sequence-region CP021680.1 1 23332" /> + </assert_contents> + </element> + </output_collection> + </test> + <test> + <conditional name="molecule"> + <param name="select" value="nucleotide"/> + <param name="format" value="featuretable"/> + </conditional> + <conditional name="query_source"> + <param name="select" value="accession_file" /> + <param name="accession_file" value="accessions_1.tsv"/> + </conditional> + <output_collection name="output" type="list"> + <element name="CP011064" ftype="text"> + <assert_contents> + <has_line line=">Feature gb|CP011064.1|" /> + </assert_contents> + </element> + <element name="CP021680" ftype="text"> + <assert_contents> + <has_line line=">Feature gb|CP021680.1|" /> + </assert_contents> + </element> + </output_collection> + </test> + <test> + <conditional name="molecule"> + <param name="select" value="nucleotide"/> + <param name="format" value="fasta"/> + </conditional> + <conditional name="query_source"> + <param name="select" value="accession_list" /> + <param name="accession_list" value="CP011064,CP021680"/> + </conditional> + <output_collection name="output" type="list"> + <element name="CP011064" ftype="fasta"> <assert_contents> <has_line line=">CP011064.1 Escherichia coli str. Sanji plasmid pSJ_94, complete sequence" /> </assert_contents> </element> - <element name="CP021680"> + <element name="CP021680" ftype="fasta"> <assert_contents> <has_line line=">CP021680.1 Escherichia coli strain AR_0162 plasmid tig00002623, complete sequence" /> </assert_contents> @@ -115,33 +176,55 @@ </output_collection> </test> <test> - <param name="molecule" value="protein"/> - <param name="format" value="fasta"/> - <param name="select" value="accession_list"/> - <param name="accession_list" value="NP_003192"/> + <conditional name="molecule"> + <param name="select" value="nucleotide"/> + <param name="format" value="fasta"/> + </conditional> + <conditional name="query_source"> + <param name="select" value="accession_list" /> + <param name="accession_list" value="CP011064 CP021680"/> + </conditional> <output_collection name="output" type="list"> - <element name="NP_003192"> + <element name="CP011064" ftype="fasta"> + <assert_contents> + <has_line line=">CP011064.1 Escherichia coli str. Sanji plasmid pSJ_94, complete sequence" /> + </assert_contents> + </element> + <element name="CP021680" ftype="fasta"> + <assert_contents> + <has_line line=">CP021680.1 Escherichia coli strain AR_0162 plasmid tig00002623, complete sequence" /> + </assert_contents> + </element> + </output_collection> + </test> + <test> + <conditional name="molecule"> + <param name="select" value="protein"/> + <param name="format" value="fasta"/> + </conditional> + <conditional name="query_source"> + <param name="select" value="accession_list" /> + <param name="accession_list" value="NP_003192"/> + </conditional> + <output_collection name="output" type="list"> + <element name="NP_003192" ftype="fasta"> <assert_contents> <has_line line=">NP_003192.1 transcription factor A, mitochondrial isoform 1 precursor [Homo sapiens]" /> </assert_contents> </element> </output_collection> </test> - <test> - <param name="molecule" value="protein"/> - <param name="format" value="genbank"/> - <param name="select" value="accession_list"/> - <param name="accession_list" value="NP_003192"/> - <output_collection name="output" type="list"> - <element name="NP_003192"> - <assert_contents> - <has_line line="DEFINITION transcription factor A, mitochondrial isoform 1 precursor [Homo" /> - </assert_contents> - </element> - </output_collection> - </test> </tests> <help><![CDATA[ +**What it does** +Given a file containing a list of NCBI accession numbers or a direct entry of accession numbers in the tool text input box, this tool will download the corresponding sequence records via the NCBI API. + +**Limitations** +- For protein sequence downloads, only fasta format is supported +- To avoid rate-limits imposed by the NCBI API, records are downloaded sequentially with a delay between requests. This may make it impractical to use this tool to download many (>100) records. + +**Output** +A collection of sequence records in the desired format. ]]></help> <citations> </citations>