annotate uniprotxml_downloader.py @ 3:b0abab8e78eb draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
author galaxyp
date Tue, 01 Jun 2021 11:54:16 +0000
parents 366bf2635603
children e1ffb00a0436
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
1 #!/usr/bin/env python
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
2 """
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
3 #
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
4 #------------------------------------------------------------------------------
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
5 # University of Minnesota
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
6 # Copyright 2016, Regents of the University of Minnesota
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
7 #------------------------------------------------------------------------------
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
8 # Author:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
9 #
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
10 # James E Johnson
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
11 #
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
12 #------------------------------------------------------------------------------
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
13 """
3
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
14 import optparse
0
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
15 import re
3
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
16 import sys
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
17 from urllib import parse
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
18
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
19 import requests
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
20 from requests.adapters import HTTPAdapter
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
21 from requests.packages.urllib3.util.retry import Retry
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
22
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
23 DEFAULT_TIMEOUT = 5 # seconds
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
24 retry_strategy = Retry(
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
25 total=5,
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
26 backoff_factor=2,
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
27 status_forcelist=[429, 500, 502, 503, 504],
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
28 allowed_methods=["HEAD", "GET", "OPTIONS", "POST"]
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
29 )
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
30
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
31
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
32 class TimeoutHTTPAdapter(HTTPAdapter):
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
33 def __init__(self, *args, **kwargs):
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
34 self.timeout = DEFAULT_TIMEOUT
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
35 if "timeout" in kwargs:
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
36 self.timeout = kwargs["timeout"]
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
37 del kwargs["timeout"]
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
38 super().__init__(*args, **kwargs)
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
39
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
40 def send(self, request, **kwargs):
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
41 timeout = kwargs.get("timeout")
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
42 if timeout is None:
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
43 kwargs["timeout"] = self.timeout
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
44 return super().send(request, **kwargs)
0
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
45
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
46
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
47 def __main__():
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
48 # Parse Command Line
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
49 parser = optparse.OptionParser()
1
7fd760c99ec5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
50 parser.add_option('-i', '--input', dest='input', default=None, help='Tabular file containing a column of NCBI Taxon IDs')
3
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
51 parser.add_option('-c', '--column', dest='column', type='int', default=0, help='The column (zero-based) in the tabular file that contains Taxon IDs')
0
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
52 parser.add_option('-t', '--taxon', dest='taxon', action='append', default=[], help='NCBI taxon ID to download')
1
7fd760c99ec5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
53 parser.add_option('-r', '--reviewed', dest='reviewed', help='Only uniprot reviewed entries')
2
366bf2635603 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 6aac77a68426533c8c18c9f6aabd2df56a82de24
galaxyp
parents: 1
diff changeset
54 parser.add_option('-f', '--format', dest='format', choices=['xml', 'fasta'], default='xml', help='output format')
366bf2635603 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 6aac77a68426533c8c18c9f6aabd2df56a82de24
galaxyp
parents: 1
diff changeset
55 parser.add_option('-o', '--output', dest='output', help='file path for the downloaded uniprot xml')
0
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
56 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr')
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
57 (options, args) = parser.parse_args()
1
7fd760c99ec5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
58 taxids = set(options.taxon)
7fd760c99ec5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
59 if options.input:
3
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
60 with open(options.input, 'r') as inputFile:
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
61 for linenum, line in enumerate(inputFile):
1
7fd760c99ec5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
62 if line.startswith('#'):
7fd760c99ec5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
63 continue
7fd760c99ec5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
64 fields = line.rstrip('\r\n').split('\t')
7fd760c99ec5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
65 if len(fields) > abs(options.column):
7fd760c99ec5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
66 taxid = fields[options.column].strip()
7fd760c99ec5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
67 if taxid:
3
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
68 taxids.add(taxid)
0
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
69 taxon_queries = ['taxonomy:"%s"' % taxid for taxid in taxids]
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
70 taxon_query = ' OR '.join(taxon_queries)
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
71 if options.output:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
72 dest_path = options.output
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
73 else:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
74 dest_path = "uniprot_%s.xml" % '_'.join(taxids)
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
75 reviewed = " reviewed:%s" % options.reviewed if options.reviewed else ''
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
76 try:
2
366bf2635603 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 6aac77a68426533c8c18c9f6aabd2df56a82de24
galaxyp
parents: 1
diff changeset
77 url = 'https://www.uniprot.org/uniprot/'
1
7fd760c99ec5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
78 query = "%s%s" % (taxon_query, reviewed)
3
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
79 params = {'query': query, 'force': 'yes', 'format': options.format}
1
7fd760c99ec5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
80 if options.debug:
3
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
81 print("%s ? %s" % (url, params), file=sys.stderr)
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
82 data = parse.urlencode(params)
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
83 print(f"Retrieving: {url+data}")
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
84 adapter = TimeoutHTTPAdapter(max_retries=retry_strategy)
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
85 http = requests.Session()
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
86 http.mount("https://", adapter)
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
87 response = http.post(url, data=params)
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
88 http.close()
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
89 with open(dest_path, 'w') as fh:
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
90 fh.write(response.text)
1
7fd760c99ec5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
91 if options.format == 'xml':
0
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
92 with open(dest_path, 'r') as contents:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
93 while True:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
94 line = contents.readline()
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
95 if options.debug:
3
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
96 print(line, file=sys.stderr)
1
7fd760c99ec5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
97 if line is None:
0
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
98 break
1
7fd760c99ec5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
99 if line.startswith('<?'):
7fd760c99ec5 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents: 0
diff changeset
100 continue
0
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
101 # pattern match <root or <ns:root for any ns string
3
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
102 pattern = r'^<(\w*:)?uniprot'
0
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
103 if re.match(pattern, line):
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
104 break
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
105 else:
3
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
106 print("failed: Not a uniprot xml file", file=sys.stderr)
0
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
107 exit(1)
3
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
108 print("NCBI Taxon ID:%s" % taxids, file=sys.stdout)
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
109 if 'X-UniProt-Release' in response.headers:
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
110 print("UniProt-Release:%s" % response.headers['X-UniProt-Release'], file=sys.stdout)
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
111 if 'X-Total-Results' in response.headers:
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
112 print("Entries:%s" % response.headers['X-Total-Results'], file=sys.stdout)
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
113 except Exception as e:
b0abab8e78eb "planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents: 2
diff changeset
114 exit("%s" % e)
0
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
115
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
116
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
117 if __name__ == "__main__":
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
118 __main__()