Mercurial > repos > galaxyp > uniprotxml_downloader
annotate uniprotxml_downloader.py @ 5:7be8e30d536f draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
| author | galaxyp | 
|---|---|
| date | Thu, 06 Jul 2023 21:15:29 +0000 | 
| parents | e1ffb00a0436 | 
| children | c4a0f3badafe | 
| rev | line source | 
|---|---|
| 0 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 1 #!/usr/bin/env python | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 2 """ | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 3 # | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 4 #------------------------------------------------------------------------------ | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 5 # University of Minnesota | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 6 # Copyright 2016, Regents of the University of Minnesota | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 7 #------------------------------------------------------------------------------ | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 8 # Author: | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 9 # | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 10 # James E Johnson | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 11 # | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 12 #------------------------------------------------------------------------------ | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 13 """ | 
| 3 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 14 import optparse | 
| 0 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 15 import re | 
| 3 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 16 import sys | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 17 from urllib import parse | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 18 | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 19 import requests | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 20 from requests.adapters import HTTPAdapter | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 21 from requests.packages.urllib3.util.retry import Retry | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 22 | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 23 DEFAULT_TIMEOUT = 5 # seconds | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 24 retry_strategy = Retry( | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 25 total=5, | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 26 backoff_factor=2, | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 27 status_forcelist=[429, 500, 502, 503, 504], | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 28 allowed_methods=["HEAD", "GET", "OPTIONS", "POST"] | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 29 ) | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 30 | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 31 | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 32 class TimeoutHTTPAdapter(HTTPAdapter): | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 33 def __init__(self, *args, **kwargs): | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 34 self.timeout = DEFAULT_TIMEOUT | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 35 if "timeout" in kwargs: | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 36 self.timeout = kwargs["timeout"] | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 37 del kwargs["timeout"] | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 38 super().__init__(*args, **kwargs) | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 39 | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 40 def send(self, request, **kwargs): | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 41 timeout = kwargs.get("timeout") | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 42 if timeout is None: | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 43 kwargs["timeout"] = self.timeout | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 44 return super().send(request, **kwargs) | 
| 0 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 45 | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 46 | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 47 def __main__(): | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 48 # Parse Command Line | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 49 parser = optparse.OptionParser() | 
| 5 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 galaxyp parents: 
4diff
changeset | 50 parser.add_option('-i', '--input', dest='input', default=None, help='Tabular file containing a column of search search_ids') | 
| 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 galaxyp parents: 
4diff
changeset | 51 parser.add_option('-c', '--column', dest='column', type='int', default=0, help='The column (zero-based) in the tabular file that contains search search_ids') | 
| 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 galaxyp parents: 
4diff
changeset | 52 parser.add_option('-s', '--search-id', dest='search_id', action='append', default=[], help='ID to search in Uniprot') | 
| 1 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 galaxyp parents: 
0diff
changeset | 53 parser.add_option('-r', '--reviewed', dest='reviewed', help='Only uniprot reviewed entries') | 
| 2 
366bf2635603
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 6aac77a68426533c8c18c9f6aabd2df56a82de24
 galaxyp parents: 
1diff
changeset | 54 parser.add_option('-f', '--format', dest='format', choices=['xml', 'fasta'], default='xml', help='output format') | 
| 5 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 galaxyp parents: 
4diff
changeset | 55 parser.add_option('-k', '--field', dest='field', choices=['taxonomy_name', 'taxonomy_id', 'accession'], default='taxonomy_name', help='query field') | 
| 2 
366bf2635603
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 6aac77a68426533c8c18c9f6aabd2df56a82de24
 galaxyp parents: 
1diff
changeset | 56 parser.add_option('-o', '--output', dest='output', help='file path for the downloaded uniprot xml') | 
| 0 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 57 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr') | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 58 (options, args) = parser.parse_args() | 
| 5 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 galaxyp parents: 
4diff
changeset | 59 search_ids = set(options.search_id) | 
| 1 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 galaxyp parents: 
0diff
changeset | 60 if options.input: | 
| 3 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 61 with open(options.input, 'r') as inputFile: | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 62 for linenum, line in enumerate(inputFile): | 
| 1 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 galaxyp parents: 
0diff
changeset | 63 if line.startswith('#'): | 
| 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 galaxyp parents: 
0diff
changeset | 64 continue | 
| 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 galaxyp parents: 
0diff
changeset | 65 fields = line.rstrip('\r\n').split('\t') | 
| 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 galaxyp parents: 
0diff
changeset | 66 if len(fields) > abs(options.column): | 
| 5 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 galaxyp parents: 
4diff
changeset | 67 search_id = fields[options.column].strip() | 
| 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 galaxyp parents: 
4diff
changeset | 68 if search_id: | 
| 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 galaxyp parents: 
4diff
changeset | 69 search_ids.add(search_id) | 
| 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 galaxyp parents: 
4diff
changeset | 70 search_queries = [f'{options.field}:"{search_id}"' for search_id in search_ids] | 
| 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 galaxyp parents: 
4diff
changeset | 71 search_query = ' OR '.join(search_queries) | 
| 0 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 72 if options.output: | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 73 dest_path = options.output | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 74 else: | 
| 5 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 galaxyp parents: 
4diff
changeset | 75 dest_path = "uniprot_%s.xml" % '_'.join(search_ids) | 
| 0 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 76 reviewed = " reviewed:%s" % options.reviewed if options.reviewed else '' | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 77 try: | 
| 4 
e1ffb00a0436
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
 galaxyp parents: 
3diff
changeset | 78 url = 'https://rest.uniprot.org/uniprotkb/stream' | 
| 5 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 galaxyp parents: 
4diff
changeset | 79 query = "%s%s" % (search_query, reviewed) | 
| 4 
e1ffb00a0436
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
 galaxyp parents: 
3diff
changeset | 80 params = {'query': query, 'format': options.format} | 
| 1 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 galaxyp parents: 
0diff
changeset | 81 if options.debug: | 
| 3 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 82 print("%s ? %s" % (url, params), file=sys.stderr) | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 83 data = parse.urlencode(params) | 
| 4 
e1ffb00a0436
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
 galaxyp parents: 
3diff
changeset | 84 print(f"Retrieving: {url}?{data}") | 
| 3 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 85 adapter = TimeoutHTTPAdapter(max_retries=retry_strategy) | 
| 4 
e1ffb00a0436
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
 galaxyp parents: 
3diff
changeset | 86 | 
| 3 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 87 http = requests.Session() | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 88 http.mount("https://", adapter) | 
| 4 
e1ffb00a0436
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
 galaxyp parents: 
3diff
changeset | 89 response = http.get(url, params=params) | 
| 3 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 90 http.close() | 
| 4 
e1ffb00a0436
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
 galaxyp parents: 
3diff
changeset | 91 | 
| 
e1ffb00a0436
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
 galaxyp parents: 
3diff
changeset | 92 if response.status_code != 200: | 
| 
e1ffb00a0436
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
 galaxyp parents: 
3diff
changeset | 93 exit(f"Request failed with status code {response.status_code}:\n{response.text}") | 
| 
e1ffb00a0436
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
 galaxyp parents: 
3diff
changeset | 94 | 
| 3 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 95 with open(dest_path, 'w') as fh: | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 96 fh.write(response.text) | 
| 4 
e1ffb00a0436
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit ba0d41c2dc0dbc0a0d3d200f51e67c6598c7e4e9
 galaxyp parents: 
3diff
changeset | 97 | 
| 1 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 galaxyp parents: 
0diff
changeset | 98 if options.format == 'xml': | 
| 0 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 99 with open(dest_path, 'r') as contents: | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 100 while True: | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 101 line = contents.readline() | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 102 if options.debug: | 
| 3 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 103 print(line, file=sys.stderr) | 
| 1 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 galaxyp parents: 
0diff
changeset | 104 if line is None: | 
| 0 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 105 break | 
| 1 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 galaxyp parents: 
0diff
changeset | 106 if line.startswith('<?'): | 
| 
7fd760c99ec5
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 15c2d28359584bcee25cdb456cff50892fff7347
 galaxyp parents: 
0diff
changeset | 107 continue | 
| 0 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 108 # pattern match <root or <ns:root for any ns string | 
| 3 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 109 pattern = r'^<(\w*:)?uniprot' | 
| 0 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 110 if re.match(pattern, line): | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 111 break | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 112 else: | 
| 3 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 113 print("failed: Not a uniprot xml file", file=sys.stderr) | 
| 0 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 114 exit(1) | 
| 5 
7be8e30d536f
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 0c5222345ace5054df44da29cab278f4a02e2b41
 galaxyp parents: 
4diff
changeset | 115 print("Search IDs:%s" % search_ids, file=sys.stdout) | 
| 3 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 116 if 'X-UniProt-Release' in response.headers: | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 117 print("UniProt-Release:%s" % response.headers['X-UniProt-Release'], file=sys.stdout) | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 118 if 'X-Total-Results' in response.headers: | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 119 print("Entries:%s" % response.headers['X-Total-Results'], file=sys.stdout) | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 120 except Exception as e: | 
| 
b0abab8e78eb
"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit 62afd9de6db50f4314e49d9f24881b6d3778a0a5"
 galaxyp parents: 
2diff
changeset | 121 exit("%s" % e) | 
| 0 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 122 | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 123 | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 124 if __name__ == "__main__": | 
| 
1af0f7987741
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
 galaxyp parents: diff
changeset | 125 __main__() | 
