annotate uniprotxml_downloader.py @ 0:1af0f7987741 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
author galaxyp
date Wed, 07 Dec 2016 17:27:06 -0500
parents
children 7fd760c99ec5
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
1 #!/usr/bin/env python
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
2 """
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
3 #
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
4 #------------------------------------------------------------------------------
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
5 # University of Minnesota
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
6 # Copyright 2016, Regents of the University of Minnesota
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
7 #------------------------------------------------------------------------------
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
8 # Author:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
9 #
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
10 # James E Johnson
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
11 #
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
12 #------------------------------------------------------------------------------
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
13 """
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
14 import sys
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
15 import re
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
16 import optparse
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
17 import urllib
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
18
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
19
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
20 def __main__():
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
21 # Parse Command Line
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
22 parser = optparse.OptionParser()
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
23 parser.add_option('-t', '--taxon', dest='taxon', action='append', default=[], help='NCBI taxon ID to download')
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
24 parser.add_option('-r', '--reviewed', dest='reviewed', help='file path for th downloaed uniprot xml')
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
25 parser.add_option('-o', '--output', dest='output', help='file path for th downloaed uniprot xml')
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
26 parser.add_option('-v', '--verbose', dest='verbose', action='store_true', default=False, help='Print UniProt Info')
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
27 parser.add_option('-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr')
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
28 (options, args) = parser.parse_args()
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
29
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
30 taxids = options.taxon if options.taxon else ['9606']
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
31 taxon_queries = ['taxonomy:"%s"' % taxid for taxid in taxids]
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
32 taxon_query = ' OR '.join(taxon_queries)
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
33 if options.output:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
34 dest_path = options.output
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
35 else:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
36 dest_path = "uniprot_%s.xml" % '_'.join(taxids)
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
37 reviewed = " reviewed:%s" % options.reviewed if options.reviewed else ''
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
38 url = 'http://www.uniprot.org/uniprot/?query=%s%s&force=yes&format=xml' % (taxon_query, reviewed)
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
39 if options.debug:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
40 print >> sys.stderr, url
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
41 try:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
42 (fname, msg) = urllib.urlretrieve(url, dest_path)
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
43 headers = {j[0]: j[1].strip() for j in [i.split(':', 1) for i in str(msg).strip().splitlines()]}
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
44 if 'Content-Length' in headers and headers['Content-Length'] == 0:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
45 print >> sys.stderr, url
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
46 print >> sys.stderr, msg
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
47 exit(1)
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
48 elif True:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
49 pass
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
50 else:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
51 with open(dest_path, 'r') as contents:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
52 while True:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
53 line = contents.readline()
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
54 if options.debug:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
55 print >> sys.stderr, line
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
56 if line is None or not line.startswith('<?'):
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
57 break
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
58 # pattern match <root or <ns:root for any ns string
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
59 pattern = '^<(\w*:)?uniprot'
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
60 if re.match(pattern, line):
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
61 break
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
62 else:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
63 print >> sys.stderr, "failed: Not a uniprot xml file"
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
64 exit(1)
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
65
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
66 if options.verbose:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
67 print >> sys.stdout, "NCBI Taxon ID:%s" % taxids
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
68 if 'X-UniProt-Release' in headers:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
69 print >> sys.stdout, "UniProt-Release:%s" % headers['X-UniProt-Release']
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
70 if 'X-Total-Results' in headers:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
71 print >> sys.stdout, "Entries:%s" % headers['X-Total-Results']
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
72 print >> sys.stdout, "%s" % url
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
73 except Exception, e:
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
74 print >> sys.stderr, "failed: %s" % e
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
75
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
76
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
77 if __name__ == "__main__":
1af0f7987741 planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/uniprotxml_downloader commit b4871f9659a924a68430aed3a93f4f9bad733fd6
galaxyp
parents:
diff changeset
78 __main__()