Mercurial > repos > galaxyp > uniprotxml_downloader
annotate uniprotxml_downloader.py @ 6:c4a0f3badafe draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
author | galaxyp |
---|---|
date | Wed, 11 Dec 2024 13:34:46 +0000 |
parents | 7be8e30d536f |
children |
rev | line source |
---|---|
0
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
1 #!/usr/bin/env python |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
2 """ |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
3 # |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
4 #------------------------------------------------------------------------------ |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
5 # University of Minnesota |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
6 # Copyright 2016, Regents of the University of Minnesota |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
7 #------------------------------------------------------------------------------ |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
8 # Author: |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
9 # |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
10 # James E Johnson |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
11 # |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
12 #------------------------------------------------------------------------------ |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
13 """ |
3
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
2
diff
changeset
|
14 import optparse |
0
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
15 import re |
3
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
2
diff
changeset
|
16 import sys |
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
2
diff
changeset
|
17 from urllib import parse |
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
2
diff
changeset
|
18 |
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
2
diff
changeset
|
19 import requests |
6
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
20 from requests.adapters import HTTPAdapter, Retry |
0
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
21 |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
22 |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
23 def __main__(): |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
24 # Parse Command Line |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
25 parser = optparse.OptionParser() |
5
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents:
4
diff
changeset
|
26 parser.add_option('-i', '--input', dest='input', default=None, help='Tabular file containing a column of search search_ids') |
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents:
4
diff
changeset
|
27 parser.add_option('-c', '--column', dest='column', type='int', default=0, help='The column (zero-based) in the tabular file that contains search search_ids') |
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents:
4
diff
changeset
|
28 parser.add_option('-s', '--search-id', dest='search_id', action='append', default=[], help='ID to search in Uniprot') |
1
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
29 parser.add_option('-r', '--reviewed', dest='reviewed', help='Only uniprot reviewed entries') |
6
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
30 parser.add_option('-f', '--format', dest='format', choices=['xml', 'fasta', 'tsv'], default='xml', help='output format') |
5
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents:
4
diff
changeset
|
31 parser.add_option('-k', '--field', dest='field', choices=['taxonomy_name', 'taxonomy_id', 'accession'], default='taxonomy_name', help='query field') |
2
366bf2635603
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 6aac77a68426533c8c18c9f6aabd2df56a82de24
galaxyp
parents:
1
diff
changeset
|
32 parser.add_option('-o', '--output', dest='output', help='file path for the downloaded uniprot xml') |
6
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
33 parser.add_option('--output_columns', dest='output_columns', help='Columns to include in output (tsv)') |
0
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
34 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr') |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
35 (options, args) = parser.parse_args() |
5
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents:
4
diff
changeset
|
36 search_ids = set(options.search_id) |
1
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
37 if options.input: |
3
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
2
diff
changeset
|
38 with open(options.input, 'r') as inputFile: |
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
2
diff
changeset
|
39 for linenum, line in enumerate(inputFile): |
1
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
40 if line.startswith('#'): |
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
41 continue |
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
42 fields = line.rstrip('\r\n').split('\t') |
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
43 if len(fields) > abs(options.column): |
5
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents:
4
diff
changeset
|
44 search_id = fields[options.column].strip() |
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents:
4
diff
changeset
|
45 if search_id: |
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents:
4
diff
changeset
|
46 search_ids.add(search_id) |
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents:
4
diff
changeset
|
47 search_queries = [f'{options.field}:"{search_id}"' for search_id in search_ids] |
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents:
4
diff
changeset
|
48 search_query = ' OR '.join(search_queries) |
0
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
49 if options.output: |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
50 dest_path = options.output |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
51 else: |
5
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
galaxyp
parents:
4
diff
changeset
|
52 dest_path = "uniprot_%s.xml" % '_'.join(search_ids) |
0
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
53 reviewed = " reviewed:%s" % options.reviewed if options.reviewed else '' |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
54 try: |
6
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
55 re_next_link = re.compile(r'<(.+)>; rel="next"') |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
56 retries = Retry(total=5, backoff_factor=0.25, status_forcelist=[500, 502, 503, 504]) |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
57 session = requests.Session() |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
58 session.mount("https://", HTTPAdapter(max_retries=retries)) |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
59 |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
60 def get_next_link(headers): |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
61 if "Link" in headers: |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
62 match = re_next_link.match(headers["Link"]) |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
63 if match: |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
64 return match.group(1) |
4
e1ffb00a0436
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
galaxyp
parents:
3
diff
changeset
|
65 |
6
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
66 def get_batch(batch_url): |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
67 while batch_url: |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
68 response = session.get(batch_url) |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
69 response.raise_for_status() |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
70 total = response.headers["x-total-results"] |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
71 release = response.headers["x-uniprot-release"] |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
72 yield response, total, release |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
73 batch_url = get_next_link(response.headers) |
4
e1ffb00a0436
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
galaxyp
parents:
3
diff
changeset
|
74 |
6
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
75 params = {'size': 500, 'format': options.format, 'query': search_query + reviewed} |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
76 if options.output_columns: |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
77 params['fields'] = options.output_columns |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
78 url = f'https://rest.uniprot.org/uniprotkb/search?{parse.urlencode(params)}' |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
79 print(f"Downloading from:{url}") |
4
e1ffb00a0436
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
galaxyp
parents:
3
diff
changeset
|
80 |
3
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
2
diff
changeset
|
81 with open(dest_path, 'w') as fh: |
6
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
82 for batch, total, release in get_batch(url): |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
83 fh.write(batch.text) |
4
e1ffb00a0436
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
galaxyp
parents:
3
diff
changeset
|
84 |
1
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
85 if options.format == 'xml': |
0
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
86 with open(dest_path, 'r') as contents: |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
87 while True: |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
88 line = contents.readline() |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
89 if options.debug: |
3
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
2
diff
changeset
|
90 print(line, file=sys.stderr) |
1
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
91 if line is None: |
0
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
92 break |
1
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
93 if line.startswith('<?'): |
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
galaxyp
parents:
0
diff
changeset
|
94 continue |
0
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
95 # pattern match <root or <ns:root for any ns string |
3
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
2
diff
changeset
|
96 pattern = r'^<(\w*:)?uniprot' |
0
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
97 if re.match(pattern, line): |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
98 break |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
99 else: |
3
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
2
diff
changeset
|
100 print("failed: Not a uniprot xml file", file=sys.stderr) |
0
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
101 exit(1) |
6
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
102 print(f"Search IDs:{search_ids}") |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
103 print(f"UniProt-Release:{release}") |
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
galaxyp
parents:
5
diff
changeset
|
104 print(f"Entries:{total}") |
3
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
2
diff
changeset
|
105 except Exception as e: |
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
galaxyp
parents:
2
diff
changeset
|
106 exit("%s" % e) |
0
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
107 |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
108 |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
109 if __name__ == "__main__": |
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff
changeset
|
110 __main__() |