Mercurial > repos > galaxyp > uniprotxml_downloader
annotate uniprotxml_downloader.py @ 6:c4a0f3badafe draft default tip
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
| author | galaxyp | 
|---|---|
| date | Wed, 11 Dec 2024 13:34:46 +0000 | 
| parents | 7be8e30d536f | 
| children | 
| rev | line source | 
|---|---|
| 
0
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
1 #!/usr/bin/env python | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
2 """ | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
3 # | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
4 #------------------------------------------------------------------------------ | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
5 # University of Minnesota | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
6 # Copyright 2016, Regents of the University of Minnesota | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
7 #------------------------------------------------------------------------------ | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
8 # Author: | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
9 # | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
10 # James E Johnson | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
11 # | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
12 #------------------------------------------------------------------------------ | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
13 """ | 
| 
3
 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 
galaxyp 
parents: 
2 
diff
changeset
 | 
14 import optparse | 
| 
0
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
15 import re | 
| 
3
 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 
galaxyp 
parents: 
2 
diff
changeset
 | 
16 import sys | 
| 
 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 
galaxyp 
parents: 
2 
diff
changeset
 | 
17 from urllib import parse | 
| 
 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 
galaxyp 
parents: 
2 
diff
changeset
 | 
18 | 
| 
 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 
galaxyp 
parents: 
2 
diff
changeset
 | 
19 import requests | 
| 
6
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
20 from requests.adapters import HTTPAdapter, Retry | 
| 
0
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
21 | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
22 | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
23 def __main__(): | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
24 # Parse Command Line | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
25 parser = optparse.OptionParser() | 
| 
5
 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 
galaxyp 
parents: 
4 
diff
changeset
 | 
26 parser.add_option('-i', '--input', dest='input', default=None, help='Tabular file containing a column of search search_ids') | 
| 
 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 
galaxyp 
parents: 
4 
diff
changeset
 | 
27 parser.add_option('-c', '--column', dest='column', type='int', default=0, help='The column (zero-based) in the tabular file that contains search search_ids') | 
| 
 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 
galaxyp 
parents: 
4 
diff
changeset
 | 
28 parser.add_option('-s', '--search-id', dest='search_id', action='append', default=[], help='ID to search in Uniprot') | 
| 
1
 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 
galaxyp 
parents: 
0 
diff
changeset
 | 
29 parser.add_option('-r', '--reviewed', dest='reviewed', help='Only uniprot reviewed entries') | 
| 
6
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
30 parser.add_option('-f', '--format', dest='format', choices=['xml', 'fasta', 'tsv'], default='xml', help='output format') | 
| 
5
 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 
galaxyp 
parents: 
4 
diff
changeset
 | 
31 parser.add_option('-k', '--field', dest='field', choices=['taxonomy_name', 'taxonomy_id', 'accession'], default='taxonomy_name', help='query field') | 
| 
2
 
366bf2635603
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 6aac77a68426533c8c18c9f6aabd2df56a82de24
 
galaxyp 
parents: 
1 
diff
changeset
 | 
32 parser.add_option('-o', '--output', dest='output', help='file path for the downloaded uniprot xml') | 
| 
6
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
33 parser.add_option('--output_columns', dest='output_columns', help='Columns to include in output (tsv)') | 
| 
0
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
34 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr') | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
35 (options, args) = parser.parse_args() | 
| 
5
 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 
galaxyp 
parents: 
4 
diff
changeset
 | 
36 search_ids = set(options.search_id) | 
| 
1
 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 
galaxyp 
parents: 
0 
diff
changeset
 | 
37 if options.input: | 
| 
3
 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 
galaxyp 
parents: 
2 
diff
changeset
 | 
38 with open(options.input, 'r') as inputFile: | 
| 
 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 
galaxyp 
parents: 
2 
diff
changeset
 | 
39 for linenum, line in enumerate(inputFile): | 
| 
1
 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 
galaxyp 
parents: 
0 
diff
changeset
 | 
40 if line.startswith('#'): | 
| 
 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 
galaxyp 
parents: 
0 
diff
changeset
 | 
41 continue | 
| 
 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 
galaxyp 
parents: 
0 
diff
changeset
 | 
42 fields = line.rstrip('\r\n').split('\t') | 
| 
 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 
galaxyp 
parents: 
0 
diff
changeset
 | 
43 if len(fields) > abs(options.column): | 
| 
5
 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 
galaxyp 
parents: 
4 
diff
changeset
 | 
44 search_id = fields[options.column].strip() | 
| 
 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 
galaxyp 
parents: 
4 
diff
changeset
 | 
45 if search_id: | 
| 
 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 
galaxyp 
parents: 
4 
diff
changeset
 | 
46 search_ids.add(search_id) | 
| 
 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 
galaxyp 
parents: 
4 
diff
changeset
 | 
47 search_queries = [f'{options.field}:"{search_id}"' for search_id in search_ids] | 
| 
 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 
galaxyp 
parents: 
4 
diff
changeset
 | 
48 search_query = ' OR '.join(search_queries) | 
| 
0
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
49 if options.output: | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
50 dest_path = options.output | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
51 else: | 
| 
5
 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 
galaxyp 
parents: 
4 
diff
changeset
 | 
52 dest_path = "uniprot_%s.xml" % '_'.join(search_ids) | 
| 
0
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
53 reviewed = " reviewed:%s" % options.reviewed if options.reviewed else '' | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
54 try: | 
| 
6
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
55 re_next_link = re.compile(r'<(.+)>; rel="next"') | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
56 retries = Retry(total=5, backoff_factor=0.25, status_forcelist=[500, 502, 503, 504]) | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
57 session = requests.Session() | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
58 session.mount("https://", HTTPAdapter(max_retries=retries)) | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
59 | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
60 def get_next_link(headers): | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
61 if "Link" in headers: | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
62 match = re_next_link.match(headers["Link"]) | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
63 if match: | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
64 return match.group(1) | 
| 
4
 
e1ffb00a0436
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
 
galaxyp 
parents: 
3 
diff
changeset
 | 
65 | 
| 
6
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
66 def get_batch(batch_url): | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
67 while batch_url: | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
68 response = session.get(batch_url) | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
69 response.raise_for_status() | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
70 total = response.headers["x-total-results"] | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
71 release = response.headers["x-uniprot-release"] | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
72 yield response, total, release | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
73 batch_url = get_next_link(response.headers) | 
| 
4
 
e1ffb00a0436
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
 
galaxyp 
parents: 
3 
diff
changeset
 | 
74 | 
| 
6
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
75 params = {'size': 500, 'format': options.format, 'query': search_query + reviewed} | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
76 if options.output_columns: | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
77 params['fields'] = options.output_columns | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
78 url = f'https://rest.uniprot.org/uniprotkb/search?{parse.urlencode(params)}' | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
79 print(f"Downloading from:{url}") | 
| 
4
 
e1ffb00a0436
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
 
galaxyp 
parents: 
3 
diff
changeset
 | 
80 | 
| 
3
 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 
galaxyp 
parents: 
2 
diff
changeset
 | 
81 with open(dest_path, 'w') as fh: | 
| 
6
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
82 for batch, total, release in get_batch(url): | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
83 fh.write(batch.text) | 
| 
4
 
e1ffb00a0436
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
 
galaxyp 
parents: 
3 
diff
changeset
 | 
84 | 
| 
1
 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 
galaxyp 
parents: 
0 
diff
changeset
 | 
85 if options.format == 'xml': | 
| 
0
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
86 with open(dest_path, 'r') as contents: | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
87 while True: | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
88 line = contents.readline() | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
89 if options.debug: | 
| 
3
 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 
galaxyp 
parents: 
2 
diff
changeset
 | 
90 print(line, file=sys.stderr) | 
| 
1
 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 
galaxyp 
parents: 
0 
diff
changeset
 | 
91 if line is None: | 
| 
0
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
92 break | 
| 
1
 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 
galaxyp 
parents: 
0 
diff
changeset
 | 
93 if line.startswith('<?'): | 
| 
 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 
galaxyp 
parents: 
0 
diff
changeset
 | 
94 continue | 
| 
0
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
95 # pattern match <root or <ns:root for any ns string | 
| 
3
 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 
galaxyp 
parents: 
2 
diff
changeset
 | 
96 pattern = r'^<(\w*:)?uniprot' | 
| 
0
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
97 if re.match(pattern, line): | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
98 break | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
99 else: | 
| 
3
 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 
galaxyp 
parents: 
2 
diff
changeset
 | 
100 print("failed: Not a uniprot xml file", file=sys.stderr) | 
| 
0
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
101 exit(1) | 
| 
6
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
102 print(f"Search IDs:{search_ids}") | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
103 print(f"UniProt-Release:{release}") | 
| 
 
c4a0f3badafe
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 91705a9789b30878a55d1044c654e39a7726cf60
 
galaxyp 
parents: 
5 
diff
changeset
 | 
104 print(f"Entries:{total}") | 
| 
3
 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 
galaxyp 
parents: 
2 
diff
changeset
 | 
105 except Exception as e: | 
| 
 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 
galaxyp 
parents: 
2 
diff
changeset
 | 
106 exit("%s" % e) | 
| 
0
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
107 | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
108 | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
109 if __name__ == "__main__": | 
| 
 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 
galaxyp 
parents:  
diff
changeset
 | 
110 __main__() | 
