Mercurial > repos > iuc > ncbi_eutils_efetch
changeset 5:e269b3b5185b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ncbi_entrez_eutils commit 15bcc5104c577b4b9c761f2854fc686c07ffa9db
| author | iuc |
|---|---|
| date | Thu, 07 Jul 2016 02:37:49 -0400 |
| parents | c26d1863f3f3 |
| children | 2ff5369b4b51 |
| files | __efetch_build_options.py ecitmatch.py efetch.py efetch.xml eutils.py eutils.pyc macros.xml test test-data/ecitmatch.results.tsv test-data/esearch.pubmed.2014-01-pnas.xml test-data/esearch.pubmed.xml test-data/esummary.tax.xml test-data/pm-tax-neighbor.xml tmp |
| diffstat | 14 files changed, 51 insertions(+), 88 deletions(-) [+] |
line wrap: on
line diff
--- a/__efetch_build_options.py Fri Jun 10 15:03:49 2016 -0400 +++ b/__efetch_build_options.py Thu Jul 07 02:37:49 2016 -0400 @@ -210,7 +210,7 @@ </param> </when>''' -FORMAT_OPTION_TPL = '''<option value="{name_type}">{name_type_human}</option>''' +FORMAT_OPTION_TPL = '''<option value="{name_type}">{name_type_human}</option>''' format_names = {}
--- a/ecitmatch.py Fri Jun 10 15:03:49 2016 -0400 +++ b/ecitmatch.py Thu Jul 07 02:37:49 2016 -0400 @@ -5,7 +5,7 @@ if __name__ == '__main__': parser = argparse.ArgumentParser(description='ECitMatch', epilog='') - parser.add_argument('--file', help='Tabular file containing citations to search') + parser.add_argument('--file', type=argparse.FileType('r'), help='Tabular file containing citations to search') parser.add_argument('--key', nargs='*', help='Citation Key') parser.add_argument('--journal_title', nargs='*', help='Journal Title') @@ -35,6 +35,7 @@ }) else: for line in args.file: + line = line.strip() if not line.startswith('#'): tmp = line.split('\t') try:
--- a/efetch.py Fri Jun 10 15:03:49 2016 -0400 +++ b/efetch.py Thu Jul 07 02:37:49 2016 -0400 @@ -17,8 +17,6 @@ # Output parser.add_argument('--retmode', help='Retmode') parser.add_argument('--rettype', help='Rettype') - parser.add_argument('--whole', action='store_true', - help='Download all records associated with query') args = parser.parse_args() c = eutils.Client(history_file=args.history_file, user_email=args.user_email, admin_email=args.admin_email) @@ -34,4 +32,4 @@ if getattr(args, attr, None) is not None: payload[attr] = getattr(args, attr) - c.fetch(args.db, whole=args.whole, **payload) + c.fetch(args.db, ftype=args.retmode, **payload)
--- a/efetch.xml Fri Jun 10 15:03:49 2016 -0400 +++ b/efetch.xml Thu Jul 07 02:37:49 2016 -0400 @@ -19,18 +19,15 @@ --rettype $rettype @EMAIL_ARGUMENTS@ -$whole -> $default]]></command> +]]></command> <inputs> <expand macro="db"/> <expand macro="list_or_hist"/> - <param checked="false" label="Download all records associated with query" name="whole" type="boolean" truevalue="--whole" falsevalue=""/> </inputs> <outputs> - <data format="txt" name="default" label="NCBI EFetch Results"> - <discover_datasets pattern="__designation__.out" ext="txt"/> - <expand macro="efetch_formats" /> - </data> + <collection type="list" label="NCBI EFetch Results" name="output1"> + <discover_datasets pattern="__designation_and_ext__" directory="downloads"/> + </collection> </outputs> <tests> <test> @@ -38,7 +35,9 @@ <param name="output_format" value="full-xml"/> <param name="qss" value="id_list"/> <param name="id_list" value="10239"/> - <output name="default" file="viruses.tax.xml"> + <output name="default"> + <discovered_dataset designation="EFetch Results Chunk 0" ftype="xml" file="viruses.tax.xml" lines_diff="2"> + </discovered_dataset> </output> </test> </tests>
--- a/eutils.py Fri Jun 10 15:03:49 2016 -0400 +++ b/eutils.py Thu Jul 07 02:37:49 2016 -0400 @@ -43,27 +43,23 @@ def post(self, database, **payload): return json.dumps(Entrez.read(Entrez.epost(database, **payload)), indent=4) - def fetch(self, db, whole=False, **payload): - if whole: - if 'id' in payload: - summary = self.id_summary(db, payload['id']) - else: - summary = self.history_summary(db) + def fetch(self, db, ftype=None, **payload): + os.makedirs("downloads") - count = len(summary) - - payload['retmax'] = BATCH_SIZE + if 'id' in payload: + summary = self.id_summary(db, payload['id']) + else: + summary = self.history_summary(db) - # Print the first one - print Entrez.efetch(db, **payload).read() - # Then write subsequent to files for <discover datasets> - for i in range(BATCH_SIZE, count, BATCH_SIZE): - payload['retstart'] = i - # TODO: output multiple files??? Collection? - with open('%s.out' % i, 'w') as handle: - handle.write(Entrez.efetch(db, **payload).read()) - else: - print Entrez.efetch(db, **payload).read() + count = len(summary) + payload['retmax'] = BATCH_SIZE + + # This may be bad. I'm not sure yet. I think it will be ... but UGH. + for i in range(0, count, BATCH_SIZE): + payload['retstart'] = i + file_path = os.path.join('downloads', 'EFetch Results Chunk %s.%s' % (i, ftype)) + with open(file_path, 'w') as handle: + handle.write(Entrez.efetch(db, **payload).read()) def id_summary(self, db, id_list): payload = { @@ -108,7 +104,7 @@ return Entrez.egquery(**kwargs).read() def citmatch(self, **kwargs): - return Entrez.ECitMatch(**kwargs).read() + return Entrez.ecitmatch(**kwargs).read() @classmethod def parse_ids(cls, id_list, id, history_file):
--- a/macros.xml Fri Jun 10 15:03:49 2016 -0400 +++ b/macros.xml Thu Jul 07 02:37:49 2016 -0400 @@ -835,6 +835,7 @@ </xml> <xml name="requirements"> <requirements> + <requirement type="package" version="2.7">python</requirement> <requirement type="package" version="1.66">biopython</requirement> </requirements> </xml>
--- a/test Fri Jun 10 15:03:49 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,4 +0,0 @@ -{ - "QueryKey": "1", - "WebEnv": "NCID_1_29968407_130.14.22.215_9001_1465585167_557791559_0MetA0_S_MegaStore_F_1" -} \ No newline at end of file
--- a/test-data/ecitmatch.results.tsv Fri Jun 10 15:03:49 2016 -0400 +++ b/test-data/ecitmatch.results.tsv Thu Jul 07 02:37:49 2016 -0400 @@ -1,1 +1,2 @@ -proc natl acad sci u s a 1991 88 3248 mann bj citation_1 2014248 + 1991 88 3248 mann bj citation_1 2014248 +
--- a/test-data/esearch.pubmed.2014-01-pnas.xml Fri Jun 10 15:03:49 2016 -0400 +++ b/test-data/esearch.pubmed.2014-01-pnas.xml Thu Jul 07 02:37:49 2016 -0400 @@ -21,5 +21,5 @@ <Id>24481252</Id> <Id>24477693</Id> <Id>24477692</Id> -</IdList><TranslationSet><Translation> <From>PNAS[ta]</From> <To>"Proc Natl Acad Sci U S A"[Journal]</To> </Translation></TranslationSet><TranslationStack> <TermSet> <Term>"Proc Natl Acad Sci U S A"[Journal]</Term> <Field>Journal</Field> <Count>120385</Count> <Explode>N</Explode> </TermSet> <TermSet> <Term>2014/01/01[PDAT]</Term> <Field>PDAT</Field> <Count>0</Count> <Explode>N</Explode> </TermSet> <TermSet> <Term>2014/02/01[PDAT]</Term> <Field>PDAT</Field> <Count>0</Count> <Explode>N</Explode> </TermSet> <OP>RANGE</OP> <OP>AND</OP> </TranslationStack><QueryTranslation>"Proc Natl Acad Sci U S A"[Journal] AND 2014/01/01[PDAT] : 2014/02/01[PDAT]</QueryTranslation></eSearchResult> +</IdList><TranslationSet><Translation> <From>PNAS[ta]</From> <To>"Proc Natl Acad Sci U S A"[Journal]</To> </Translation></TranslationSet><TranslationStack> <TermSet> <Term>"Proc Natl Acad Sci U S A"[Journal]</Term> <Field>Journal</Field> <Count>124812</Count> <Explode>N</Explode> </TermSet> <TermSet> <Term>2014/01/01[PDAT]</Term> <Field>PDAT</Field> <Count>0</Count> <Explode>N</Explode> </TermSet> <TermSet> <Term>2014/02/01[PDAT]</Term> <Field>PDAT</Field> <Count>0</Count> <Explode>N</Explode> </TermSet> <OP>RANGE</OP> <OP>AND</OP> </TranslationStack><QueryTranslation>"Proc Natl Acad Sci U S A"[Journal] AND 2014/01/01[PDAT] : 2014/02/01[PDAT]</QueryTranslation></eSearchResult>
--- a/test-data/esearch.pubmed.xml Fri Jun 10 15:03:49 2016 -0400 +++ b/test-data/esearch.pubmed.xml Thu Jul 07 02:37:49 2016 -0400 @@ -21,5 +21,5 @@ <Id>11121066</Id> <Id>11121065</Id> <Id>11121064</Id> -</IdList><TranslationSet><Translation> <From>PNAS[ta]</From> <To>"Proc Natl Acad Sci U S A"[Journal]</To> </Translation></TranslationSet><TranslationStack> <TermSet> <Term>"Proc Natl Acad Sci U S A"[Journal]</Term> <Field>Journal</Field> <Count>120385</Count> <Explode>N</Explode> </TermSet> <TermSet> <Term>97[vi]</Term> <Field>vi</Field> <Count>74742</Count> <Explode>N</Explode> </TermSet> <OP>AND</OP> <OP>GROUP</OP> </TranslationStack><QueryTranslation>"Proc Natl Acad Sci U S A"[Journal] AND 97[vi]</QueryTranslation></eSearchResult> +</IdList><TranslationSet><Translation> <From>PNAS[ta]</From> <To>"Proc Natl Acad Sci U S A"[Journal]</To> </Translation></TranslationSet><TranslationStack> <TermSet> <Term>"Proc Natl Acad Sci U S A"[Journal]</Term> <Field>Journal</Field> <Count>124812</Count> <Explode>N</Explode> </TermSet> <TermSet> <Term>97[vi]</Term> <Field>vi</Field> <Count>77218</Count> <Explode>N</Explode> </TermSet> <OP>AND</OP> <OP>GROUP</OP> </TranslationStack><QueryTranslation>"Proc Natl Acad Sci U S A"[Journal] AND 97[vi]</QueryTranslation></eSearchResult>
--- a/test-data/esummary.tax.xml Fri Jun 10 15:03:49 2016 -0400 +++ b/test-data/esummary.tax.xml Thu Jul 07 02:37:49 2016 -0400 @@ -1,5 +1,5 @@ <?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE eSummaryResult PUBLIC "-//NLM//DTD esummary v1 20060131//EN" "http://eutils.ncbi.nlm.nih.gov/eutils/dtd/20060131/esummary-v1.dtd"> +<!DOCTYPE eSummaryResult PUBLIC "-//NLM//DTD esummary v1 20041029//EN" "http://eutils.ncbi.nlm.nih.gov/eutils/dtd/20041029/esummary-v1.dtd"> <eSummaryResult> <DocSum> <Id>10239</Id>
--- a/test-data/pm-tax-neighbor.xml Fri Jun 10 15:03:49 2016 -0400 +++ b/test-data/pm-tax-neighbor.xml Thu Jul 07 02:37:49 2016 -0400 @@ -1,24 +1,24 @@ -<?xml version="1.0"?> -<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD eLinkResult, 23 November 2010//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/eLink_101123.dtd"> +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE eLinkResult PUBLIC "-//NLM//DTD elink 20101123//EN" "http://eutils.ncbi.nlm.nih.gov/eutils/dtd/20101123/elink.dtd"> <eLinkResult> - <LinkSet> - <DbFrom>taxonomy</DbFrom> - <IdList> - <Id>510899</Id> - </IdList> - - <LinkSetDb> - <DbTo>pubmed</DbTo> - <LinkName>taxonomy_pubmed_entrez</LinkName> - - <Link> + <LinkSet> + <DbFrom>taxonomy</DbFrom> + <IdList> + <Id>510899</Id> + </IdList> + + <LinkSetDb> + <DbTo>pubmed</DbTo> + <LinkName>taxonomy_pubmed_entrez</LinkName> + + <Link> <Id>22241621</Id> </Link> - - </LinkSetDb> - - - </LinkSet> + + </LinkSetDb> + + + </LinkSet> </eLinkResult>
--- a/tmp Fri Jun 10 15:03:49 2016 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,29 +0,0 @@ -<?xml version="1.0"?> -<!DOCTYPE TaxaSet PUBLIC "-//NLM//DTD Taxon, 14th January 2002//EN" "http://www.ncbi.nlm.nih.gov/entrez/query/DTD/taxon.dtd"> -<TaxaSet><Taxon> - <TaxId>10239</TaxId> - <ScientificName>Viruses</ScientificName> - <OtherNames> - <BlastName>viruses</BlastName> - <Synonym>Vira</Synonym> - <Synonym>Viridae</Synonym> - </OtherNames> - <ParentTaxId>1</ParentTaxId> - <Rank>superkingdom</Rank> - <Division>Viruses</Division> - <GeneticCode> - <GCId>1</GCId> - <GCName>Standard</GCName> - </GeneticCode> - <MitoGeneticCode> - <MGCId>0</MGCId> - <MGCName>Unspecified</MGCName> - </MitoGeneticCode> - <Lineage/> - <CreateDate>1995/02/27 09:24:00</CreateDate> - <UpdateDate>2010/11/23 11:40:11</UpdateDate> - <PubDate>1993/04/20 01:00:00</PubDate> -</Taxon> - -</TaxaSet> -
