comparison venv/lib/python2.7/site-packages/setuptools/package_index.py @ 0:d67268158946 draft

planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author bcclaywell
date Mon, 12 Oct 2015 17:43:33 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d67268158946
1 """PyPI and direct package downloading"""
2 import sys
3 import os
4 import re
5 import shutil
6 import socket
7 import base64
8 import hashlib
9 from functools import wraps
10
11 from pkg_resources import (
12 CHECKOUT_DIST, Distribution, BINARY_DIST, normalize_path, SOURCE_DIST,
13 require, Environment, find_distributions, safe_name, safe_version,
14 to_filename, Requirement, DEVELOP_DIST,
15 )
16 from setuptools import ssl_support
17 from distutils import log
18 from distutils.errors import DistutilsError
19 from setuptools.compat import (urllib2, httplib, StringIO, HTTPError,
20 urlparse, urlunparse, unquote, splituser,
21 url2pathname, name2codepoint,
22 unichr, urljoin, urlsplit, urlunsplit,
23 ConfigParser)
24 from setuptools.compat import filterfalse
25 from fnmatch import translate
26 from setuptools.py26compat import strip_fragment
27 from setuptools.py27compat import get_all_headers
28
29 EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.]+)$')
30 HREF = re.compile("""href\\s*=\\s*['"]?([^'"> ]+)""", re.I)
31 # this is here to fix emacs' cruddy broken syntax highlighting
32 PYPI_MD5 = re.compile(
33 '<a href="([^"#]+)">([^<]+)</a>\n\s+\\(<a (?:title="MD5 hash"\n\s+)'
34 'href="[^?]+\?:action=show_md5&amp;digest=([0-9a-f]{32})">md5</a>\\)'
35 )
36 URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):',re.I).match
37 EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
38
39 __all__ = [
40 'PackageIndex', 'distros_for_url', 'parse_bdist_wininst',
41 'interpret_distro_name',
42 ]
43
44 _SOCKET_TIMEOUT = 15
45
46 def parse_bdist_wininst(name):
47 """Return (base,pyversion) or (None,None) for possible .exe name"""
48
49 lower = name.lower()
50 base, py_ver, plat = None, None, None
51
52 if lower.endswith('.exe'):
53 if lower.endswith('.win32.exe'):
54 base = name[:-10]
55 plat = 'win32'
56 elif lower.startswith('.win32-py',-16):
57 py_ver = name[-7:-4]
58 base = name[:-16]
59 plat = 'win32'
60 elif lower.endswith('.win-amd64.exe'):
61 base = name[:-14]
62 plat = 'win-amd64'
63 elif lower.startswith('.win-amd64-py',-20):
64 py_ver = name[-7:-4]
65 base = name[:-20]
66 plat = 'win-amd64'
67 return base,py_ver,plat
68
69
70 def egg_info_for_url(url):
71 scheme, server, path, parameters, query, fragment = urlparse(url)
72 base = unquote(path.split('/')[-1])
73 if server=='sourceforge.net' and base=='download': # XXX Yuck
74 base = unquote(path.split('/')[-2])
75 if '#' in base: base, fragment = base.split('#',1)
76 return base,fragment
77
78 def distros_for_url(url, metadata=None):
79 """Yield egg or source distribution objects that might be found at a URL"""
80 base, fragment = egg_info_for_url(url)
81 for dist in distros_for_location(url, base, metadata): yield dist
82 if fragment:
83 match = EGG_FRAGMENT.match(fragment)
84 if match:
85 for dist in interpret_distro_name(
86 url, match.group(1), metadata, precedence = CHECKOUT_DIST
87 ):
88 yield dist
89
90 def distros_for_location(location, basename, metadata=None):
91 """Yield egg or source distribution objects based on basename"""
92 if basename.endswith('.egg.zip'):
93 basename = basename[:-4] # strip the .zip
94 if basename.endswith('.egg') and '-' in basename:
95 # only one, unambiguous interpretation
96 return [Distribution.from_location(location, basename, metadata)]
97 if basename.endswith('.exe'):
98 win_base, py_ver, platform = parse_bdist_wininst(basename)
99 if win_base is not None:
100 return interpret_distro_name(
101 location, win_base, metadata, py_ver, BINARY_DIST, platform
102 )
103 # Try source distro extensions (.zip, .tgz, etc.)
104 #
105 for ext in EXTENSIONS:
106 if basename.endswith(ext):
107 basename = basename[:-len(ext)]
108 return interpret_distro_name(location, basename, metadata)
109 return [] # no extension matched
110
111 def distros_for_filename(filename, metadata=None):
112 """Yield possible egg or source distribution objects based on a filename"""
113 return distros_for_location(
114 normalize_path(filename), os.path.basename(filename), metadata
115 )
116
117
118 def interpret_distro_name(
119 location, basename, metadata, py_version=None, precedence=SOURCE_DIST,
120 platform=None
121 ):
122 """Generate alternative interpretations of a source distro name
123
124 Note: if `location` is a filesystem filename, you should call
125 ``pkg_resources.normalize_path()`` on it before passing it to this
126 routine!
127 """
128 # Generate alternative interpretations of a source distro name
129 # Because some packages are ambiguous as to name/versions split
130 # e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc.
131 # So, we generate each possible interepretation (e.g. "adns, python-1.1.0"
132 # "adns-python, 1.1.0", and "adns-python-1.1.0, no version"). In practice,
133 # the spurious interpretations should be ignored, because in the event
134 # there's also an "adns" package, the spurious "python-1.1.0" version will
135 # compare lower than any numeric version number, and is therefore unlikely
136 # to match a request for it. It's still a potential problem, though, and
137 # in the long run PyPI and the distutils should go for "safe" names and
138 # versions in distribution archive names (sdist and bdist).
139
140 parts = basename.split('-')
141 if not py_version and any(re.match('py\d\.\d$', p) for p in parts[2:]):
142 # it is a bdist_dumb, not an sdist -- bail out
143 return
144
145 for p in range(1,len(parts)+1):
146 yield Distribution(
147 location, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]),
148 py_version=py_version, precedence = precedence,
149 platform = platform
150 )
151
152 # From Python 2.7 docs
153 def unique_everseen(iterable, key=None):
154 "List unique elements, preserving order. Remember all elements ever seen."
155 # unique_everseen('AAAABBBCCDAABBB') --> A B C D
156 # unique_everseen('ABBCcAD', str.lower) --> A B C D
157 seen = set()
158 seen_add = seen.add
159 if key is None:
160 for element in filterfalse(seen.__contains__, iterable):
161 seen_add(element)
162 yield element
163 else:
164 for element in iterable:
165 k = key(element)
166 if k not in seen:
167 seen_add(k)
168 yield element
169
170 def unique_values(func):
171 """
172 Wrap a function returning an iterable such that the resulting iterable
173 only ever yields unique items.
174 """
175 @wraps(func)
176 def wrapper(*args, **kwargs):
177 return unique_everseen(func(*args, **kwargs))
178 return wrapper
179
180 REL = re.compile("""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)
181 # this line is here to fix emacs' cruddy broken syntax highlighting
182
183 @unique_values
184 def find_external_links(url, page):
185 """Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
186
187 for match in REL.finditer(page):
188 tag, rel = match.groups()
189 rels = set(map(str.strip, rel.lower().split(',')))
190 if 'homepage' in rels or 'download' in rels:
191 for match in HREF.finditer(tag):
192 yield urljoin(url, htmldecode(match.group(1)))
193
194 for tag in ("<th>Home Page", "<th>Download URL"):
195 pos = page.find(tag)
196 if pos!=-1:
197 match = HREF.search(page,pos)
198 if match:
199 yield urljoin(url, htmldecode(match.group(1)))
200
201 user_agent = "Python-urllib/%s setuptools/%s" % (
202 sys.version[:3], require('setuptools')[0].version
203 )
204
205 class ContentChecker(object):
206 """
207 A null content checker that defines the interface for checking content
208 """
209 def feed(self, block):
210 """
211 Feed a block of data to the hash.
212 """
213 return
214
215 def is_valid(self):
216 """
217 Check the hash. Return False if validation fails.
218 """
219 return True
220
221 def report(self, reporter, template):
222 """
223 Call reporter with information about the checker (hash name)
224 substituted into the template.
225 """
226 return
227
228 class HashChecker(ContentChecker):
229 pattern = re.compile(
230 r'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)='
231 r'(?P<expected>[a-f0-9]+)'
232 )
233
234 def __init__(self, hash_name, expected):
235 self.hash_name = hash_name
236 self.hash = hashlib.new(hash_name)
237 self.expected = expected
238
239 @classmethod
240 def from_url(cls, url):
241 "Construct a (possibly null) ContentChecker from a URL"
242 fragment = urlparse(url)[-1]
243 if not fragment:
244 return ContentChecker()
245 match = cls.pattern.search(fragment)
246 if not match:
247 return ContentChecker()
248 return cls(**match.groupdict())
249
250 def feed(self, block):
251 self.hash.update(block)
252
253 def is_valid(self):
254 return self.hash.hexdigest() == self.expected
255
256 def report(self, reporter, template):
257 msg = template % self.hash_name
258 return reporter(msg)
259
260
261 class PackageIndex(Environment):
262 """A distribution index that scans web pages for download URLs"""
263
264 def __init__(
265 self, index_url="https://pypi.python.org/simple", hosts=('*',),
266 ca_bundle=None, verify_ssl=True, *args, **kw
267 ):
268 Environment.__init__(self,*args,**kw)
269 self.index_url = index_url + "/"[:not index_url.endswith('/')]
270 self.scanned_urls = {}
271 self.fetched_urls = {}
272 self.package_pages = {}
273 self.allows = re.compile('|'.join(map(translate,hosts))).match
274 self.to_scan = []
275 if verify_ssl and ssl_support.is_available and (ca_bundle or ssl_support.find_ca_bundle()):
276 self.opener = ssl_support.opener_for(ca_bundle)
277 else: self.opener = urllib2.urlopen
278
279 def process_url(self, url, retrieve=False):
280 """Evaluate a URL as a possible download, and maybe retrieve it"""
281 if url in self.scanned_urls and not retrieve:
282 return
283 self.scanned_urls[url] = True
284 if not URL_SCHEME(url):
285 self.process_filename(url)
286 return
287 else:
288 dists = list(distros_for_url(url))
289 if dists:
290 if not self.url_ok(url):
291 return
292 self.debug("Found link: %s", url)
293
294 if dists or not retrieve or url in self.fetched_urls:
295 list(map(self.add, dists))
296 return # don't need the actual page
297
298 if not self.url_ok(url):
299 self.fetched_urls[url] = True
300 return
301
302 self.info("Reading %s", url)
303 self.fetched_urls[url] = True # prevent multiple fetch attempts
304 f = self.open_url(url, "Download error on %s: %%s -- Some packages may not be found!" % url)
305 if f is None: return
306 self.fetched_urls[f.url] = True
307 if 'html' not in f.headers.get('content-type', '').lower():
308 f.close() # not html, we can't process it
309 return
310
311 base = f.url # handle redirects
312 page = f.read()
313 if not isinstance(page, str): # We are in Python 3 and got bytes. We want str.
314 if isinstance(f, HTTPError):
315 # Errors have no charset, assume latin1:
316 charset = 'latin-1'
317 else:
318 charset = f.headers.get_param('charset') or 'latin-1'
319 page = page.decode(charset, "ignore")
320 f.close()
321 for match in HREF.finditer(page):
322 link = urljoin(base, htmldecode(match.group(1)))
323 self.process_url(link)
324 if url.startswith(self.index_url) and getattr(f,'code',None)!=404:
325 page = self.process_index(url, page)
326
327 def process_filename(self, fn, nested=False):
328 # process filenames or directories
329 if not os.path.exists(fn):
330 self.warn("Not found: %s", fn)
331 return
332
333 if os.path.isdir(fn) and not nested:
334 path = os.path.realpath(fn)
335 for item in os.listdir(path):
336 self.process_filename(os.path.join(path,item), True)
337
338 dists = distros_for_filename(fn)
339 if dists:
340 self.debug("Found: %s", fn)
341 list(map(self.add, dists))
342
343 def url_ok(self, url, fatal=False):
344 s = URL_SCHEME(url)
345 if (s and s.group(1).lower()=='file') or self.allows(urlparse(url)[1]):
346 return True
347 msg = ("\nNote: Bypassing %s (disallowed host; see "
348 "http://bit.ly/1dg9ijs for details).\n")
349 if fatal:
350 raise DistutilsError(msg % url)
351 else:
352 self.warn(msg, url)
353
354 def scan_egg_links(self, search_path):
355 for item in search_path:
356 if os.path.isdir(item):
357 for entry in os.listdir(item):
358 if entry.endswith('.egg-link'):
359 self.scan_egg_link(item, entry)
360
361 def scan_egg_link(self, path, entry):
362 lines = [_f for _f in map(str.strip,
363 open(os.path.join(path, entry))) if _f]
364 if len(lines)==2:
365 for dist in find_distributions(os.path.join(path, lines[0])):
366 dist.location = os.path.join(path, *lines)
367 dist.precedence = SOURCE_DIST
368 self.add(dist)
369
370 def process_index(self,url,page):
371 """Process the contents of a PyPI page"""
372 def scan(link):
373 # Process a URL to see if it's for a package page
374 if link.startswith(self.index_url):
375 parts = list(map(
376 unquote, link[len(self.index_url):].split('/')
377 ))
378 if len(parts)==2 and '#' not in parts[1]:
379 # it's a package page, sanitize and index it
380 pkg = safe_name(parts[0])
381 ver = safe_version(parts[1])
382 self.package_pages.setdefault(pkg.lower(),{})[link] = True
383 return to_filename(pkg), to_filename(ver)
384 return None, None
385
386 # process an index page into the package-page index
387 for match in HREF.finditer(page):
388 try:
389 scan(urljoin(url, htmldecode(match.group(1))))
390 except ValueError:
391 pass
392
393 pkg, ver = scan(url) # ensure this page is in the page index
394 if pkg:
395 # process individual package page
396 for new_url in find_external_links(url, page):
397 # Process the found URL
398 base, frag = egg_info_for_url(new_url)
399 if base.endswith('.py') and not frag:
400 if ver:
401 new_url+='#egg=%s-%s' % (pkg,ver)
402 else:
403 self.need_version_info(url)
404 self.scan_url(new_url)
405
406 return PYPI_MD5.sub(
407 lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1,3,2), page
408 )
409 else:
410 return "" # no sense double-scanning non-package pages
411
412 def need_version_info(self, url):
413 self.scan_all(
414 "Page at %s links to .py file(s) without version info; an index "
415 "scan is required.", url
416 )
417
418 def scan_all(self, msg=None, *args):
419 if self.index_url not in self.fetched_urls:
420 if msg: self.warn(msg,*args)
421 self.info(
422 "Scanning index of all packages (this may take a while)"
423 )
424 self.scan_url(self.index_url)
425
426 def find_packages(self, requirement):
427 self.scan_url(self.index_url + requirement.unsafe_name+'/')
428
429 if not self.package_pages.get(requirement.key):
430 # Fall back to safe version of the name
431 self.scan_url(self.index_url + requirement.project_name+'/')
432
433 if not self.package_pages.get(requirement.key):
434 # We couldn't find the target package, so search the index page too
435 self.not_found_in_index(requirement)
436
437 for url in list(self.package_pages.get(requirement.key,())):
438 # scan each page that might be related to the desired package
439 self.scan_url(url)
440
441 def obtain(self, requirement, installer=None):
442 self.prescan()
443 self.find_packages(requirement)
444 for dist in self[requirement.key]:
445 if dist in requirement:
446 return dist
447 self.debug("%s does not match %s", requirement, dist)
448 return super(PackageIndex, self).obtain(requirement,installer)
449
450 def check_hash(self, checker, filename, tfp):
451 """
452 checker is a ContentChecker
453 """
454 checker.report(self.debug,
455 "Validating %%s checksum for %s" % filename)
456 if not checker.is_valid():
457 tfp.close()
458 os.unlink(filename)
459 raise DistutilsError(
460 "%s validation failed for %s; "
461 "possible download problem?" % (
462 checker.hash.name, os.path.basename(filename))
463 )
464
465 def add_find_links(self, urls):
466 """Add `urls` to the list that will be prescanned for searches"""
467 for url in urls:
468 if (
469 self.to_scan is None # if we have already "gone online"
470 or not URL_SCHEME(url) # or it's a local file/directory
471 or url.startswith('file:')
472 or list(distros_for_url(url)) # or a direct package link
473 ):
474 # then go ahead and process it now
475 self.scan_url(url)
476 else:
477 # otherwise, defer retrieval till later
478 self.to_scan.append(url)
479
480 def prescan(self):
481 """Scan urls scheduled for prescanning (e.g. --find-links)"""
482 if self.to_scan:
483 list(map(self.scan_url, self.to_scan))
484 self.to_scan = None # from now on, go ahead and process immediately
485
486 def not_found_in_index(self, requirement):
487 if self[requirement.key]: # we've seen at least one distro
488 meth, msg = self.info, "Couldn't retrieve index page for %r"
489 else: # no distros seen for this name, might be misspelled
490 meth, msg = (self.warn,
491 "Couldn't find index page for %r (maybe misspelled?)")
492 meth(msg, requirement.unsafe_name)
493 self.scan_all()
494
495 def download(self, spec, tmpdir):
496 """Locate and/or download `spec` to `tmpdir`, returning a local path
497
498 `spec` may be a ``Requirement`` object, or a string containing a URL,
499 an existing local filename, or a project/version requirement spec
500 (i.e. the string form of a ``Requirement`` object). If it is the URL
501 of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one
502 that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is
503 automatically created alongside the downloaded file.
504
505 If `spec` is a ``Requirement`` object or a string containing a
506 project/version requirement spec, this method returns the location of
507 a matching distribution (possibly after downloading it to `tmpdir`).
508 If `spec` is a locally existing file or directory name, it is simply
509 returned unchanged. If `spec` is a URL, it is downloaded to a subpath
510 of `tmpdir`, and the local filename is returned. Various errors may be
511 raised if a problem occurs during downloading.
512 """
513 if not isinstance(spec,Requirement):
514 scheme = URL_SCHEME(spec)
515 if scheme:
516 # It's a url, download it to tmpdir
517 found = self._download_url(scheme.group(1), spec, tmpdir)
518 base, fragment = egg_info_for_url(spec)
519 if base.endswith('.py'):
520 found = self.gen_setup(found,fragment,tmpdir)
521 return found
522 elif os.path.exists(spec):
523 # Existing file or directory, just return it
524 return spec
525 else:
526 try:
527 spec = Requirement.parse(spec)
528 except ValueError:
529 raise DistutilsError(
530 "Not a URL, existing file, or requirement spec: %r" %
531 (spec,)
532 )
533 return getattr(self.fetch_distribution(spec, tmpdir),'location',None)
534
535 def fetch_distribution(
536 self, requirement, tmpdir, force_scan=False, source=False,
537 develop_ok=False, local_index=None
538 ):
539 """Obtain a distribution suitable for fulfilling `requirement`
540
541 `requirement` must be a ``pkg_resources.Requirement`` instance.
542 If necessary, or if the `force_scan` flag is set, the requirement is
543 searched for in the (online) package index as well as the locally
544 installed packages. If a distribution matching `requirement` is found,
545 the returned distribution's ``location`` is the value you would have
546 gotten from calling the ``download()`` method with the matching
547 distribution's URL or filename. If no matching distribution is found,
548 ``None`` is returned.
549
550 If the `source` flag is set, only source distributions and source
551 checkout links will be considered. Unless the `develop_ok` flag is
552 set, development and system eggs (i.e., those using the ``.egg-info``
553 format) will be ignored.
554 """
555 # process a Requirement
556 self.info("Searching for %s", requirement)
557 skipped = {}
558 dist = None
559
560 def find(req, env=None):
561 if env is None:
562 env = self
563 # Find a matching distribution; may be called more than once
564
565 for dist in env[req.key]:
566
567 if dist.precedence==DEVELOP_DIST and not develop_ok:
568 if dist not in skipped:
569 self.warn("Skipping development or system egg: %s",dist)
570 skipped[dist] = 1
571 continue
572
573 if dist in req and (dist.precedence<=SOURCE_DIST or not source):
574 return dist
575
576 if force_scan:
577 self.prescan()
578 self.find_packages(requirement)
579 dist = find(requirement)
580
581 if local_index is not None:
582 dist = dist or find(requirement, local_index)
583
584 if dist is None:
585 if self.to_scan is not None:
586 self.prescan()
587 dist = find(requirement)
588
589 if dist is None and not force_scan:
590 self.find_packages(requirement)
591 dist = find(requirement)
592
593 if dist is None:
594 self.warn(
595 "No local packages or download links found for %s%s",
596 (source and "a source distribution of " or ""),
597 requirement,
598 )
599 else:
600 self.info("Best match: %s", dist)
601 return dist.clone(location=self.download(dist.location, tmpdir))
602
603 def fetch(self, requirement, tmpdir, force_scan=False, source=False):
604 """Obtain a file suitable for fulfilling `requirement`
605
606 DEPRECATED; use the ``fetch_distribution()`` method now instead. For
607 backward compatibility, this routine is identical but returns the
608 ``location`` of the downloaded distribution instead of a distribution
609 object.
610 """
611 dist = self.fetch_distribution(requirement,tmpdir,force_scan,source)
612 if dist is not None:
613 return dist.location
614 return None
615
616 def gen_setup(self, filename, fragment, tmpdir):
617 match = EGG_FRAGMENT.match(fragment)
618 dists = match and [
619 d for d in
620 interpret_distro_name(filename, match.group(1), None) if d.version
621 ] or []
622
623 if len(dists)==1: # unambiguous ``#egg`` fragment
624 basename = os.path.basename(filename)
625
626 # Make sure the file has been downloaded to the temp dir.
627 if os.path.dirname(filename) != tmpdir:
628 dst = os.path.join(tmpdir, basename)
629 from setuptools.command.easy_install import samefile
630 if not samefile(filename, dst):
631 shutil.copy2(filename, dst)
632 filename=dst
633
634 with open(os.path.join(tmpdir, 'setup.py'), 'w') as file:
635 file.write(
636 "from setuptools import setup\n"
637 "setup(name=%r, version=%r, py_modules=[%r])\n"
638 % (
639 dists[0].project_name, dists[0].version,
640 os.path.splitext(basename)[0]
641 )
642 )
643 return filename
644
645 elif match:
646 raise DistutilsError(
647 "Can't unambiguously interpret project/version identifier %r; "
648 "any dashes in the name or version should be escaped using "
649 "underscores. %r" % (fragment,dists)
650 )
651 else:
652 raise DistutilsError(
653 "Can't process plain .py files without an '#egg=name-version'"
654 " suffix to enable automatic setup script generation."
655 )
656
657 dl_blocksize = 8192
658 def _download_to(self, url, filename):
659 self.info("Downloading %s", url)
660 # Download the file
661 fp, info = None, None
662 try:
663 checker = HashChecker.from_url(url)
664 fp = self.open_url(strip_fragment(url))
665 if isinstance(fp, HTTPError):
666 raise DistutilsError(
667 "Can't download %s: %s %s" % (url, fp.code,fp.msg)
668 )
669 headers = fp.info()
670 blocknum = 0
671 bs = self.dl_blocksize
672 size = -1
673 if "content-length" in headers:
674 # Some servers return multiple Content-Length headers :(
675 sizes = get_all_headers(headers, 'Content-Length')
676 size = max(map(int, sizes))
677 self.reporthook(url, filename, blocknum, bs, size)
678 with open(filename,'wb') as tfp:
679 while True:
680 block = fp.read(bs)
681 if block:
682 checker.feed(block)
683 tfp.write(block)
684 blocknum += 1
685 self.reporthook(url, filename, blocknum, bs, size)
686 else:
687 break
688 self.check_hash(checker, filename, tfp)
689 return headers
690 finally:
691 if fp: fp.close()
692
693 def reporthook(self, url, filename, blocknum, blksize, size):
694 pass # no-op
695
696 def open_url(self, url, warning=None):
697 if url.startswith('file:'):
698 return local_open(url)
699 try:
700 return open_with_auth(url, self.opener)
701 except (ValueError, httplib.InvalidURL) as v:
702 msg = ' '.join([str(arg) for arg in v.args])
703 if warning:
704 self.warn(warning, msg)
705 else:
706 raise DistutilsError('%s %s' % (url, msg))
707 except urllib2.HTTPError as v:
708 return v
709 except urllib2.URLError as v:
710 if warning:
711 self.warn(warning, v.reason)
712 else:
713 raise DistutilsError("Download error for %s: %s"
714 % (url, v.reason))
715 except httplib.BadStatusLine as v:
716 if warning:
717 self.warn(warning, v.line)
718 else:
719 raise DistutilsError(
720 '%s returned a bad status line. The server might be '
721 'down, %s' %
722 (url, v.line)
723 )
724 except httplib.HTTPException as v:
725 if warning:
726 self.warn(warning, v)
727 else:
728 raise DistutilsError("Download error for %s: %s"
729 % (url, v))
730
731 def _download_url(self, scheme, url, tmpdir):
732 # Determine download filename
733 #
734 name, fragment = egg_info_for_url(url)
735 if name:
736 while '..' in name:
737 name = name.replace('..','.').replace('\\','_')
738 else:
739 name = "__downloaded__" # default if URL has no path contents
740
741 if name.endswith('.egg.zip'):
742 name = name[:-4] # strip the extra .zip before download
743
744 filename = os.path.join(tmpdir,name)
745
746 # Download the file
747 #
748 if scheme=='svn' or scheme.startswith('svn+'):
749 return self._download_svn(url, filename)
750 elif scheme=='git' or scheme.startswith('git+'):
751 return self._download_git(url, filename)
752 elif scheme.startswith('hg+'):
753 return self._download_hg(url, filename)
754 elif scheme=='file':
755 return url2pathname(urlparse(url)[2])
756 else:
757 self.url_ok(url, True) # raises error if not allowed
758 return self._attempt_download(url, filename)
759
760 def scan_url(self, url):
761 self.process_url(url, True)
762
763 def _attempt_download(self, url, filename):
764 headers = self._download_to(url, filename)
765 if 'html' in headers.get('content-type','').lower():
766 return self._download_html(url, headers, filename)
767 else:
768 return filename
769
770 def _download_html(self, url, headers, filename):
771 file = open(filename)
772 for line in file:
773 if line.strip():
774 # Check for a subversion index page
775 if re.search(r'<title>([^- ]+ - )?Revision \d+:', line):
776 # it's a subversion index page:
777 file.close()
778 os.unlink(filename)
779 return self._download_svn(url, filename)
780 break # not an index page
781 file.close()
782 os.unlink(filename)
783 raise DistutilsError("Unexpected HTML page found at "+url)
784
785 def _download_svn(self, url, filename):
786 url = url.split('#',1)[0] # remove any fragment for svn's sake
787 creds = ''
788 if url.lower().startswith('svn:') and '@' in url:
789 scheme, netloc, path, p, q, f = urlparse(url)
790 if not netloc and path.startswith('//') and '/' in path[2:]:
791 netloc, path = path[2:].split('/',1)
792 auth, host = splituser(netloc)
793 if auth:
794 if ':' in auth:
795 user, pw = auth.split(':',1)
796 creds = " --username=%s --password=%s" % (user, pw)
797 else:
798 creds = " --username="+auth
799 netloc = host
800 url = urlunparse((scheme, netloc, url, p, q, f))
801 self.info("Doing subversion checkout from %s to %s", url, filename)
802 os.system("svn checkout%s -q %s %s" % (creds, url, filename))
803 return filename
804
805 @staticmethod
806 def _vcs_split_rev_from_url(url, pop_prefix=False):
807 scheme, netloc, path, query, frag = urlsplit(url)
808
809 scheme = scheme.split('+', 1)[-1]
810
811 # Some fragment identification fails
812 path = path.split('#',1)[0]
813
814 rev = None
815 if '@' in path:
816 path, rev = path.rsplit('@', 1)
817
818 # Also, discard fragment
819 url = urlunsplit((scheme, netloc, path, query, ''))
820
821 return url, rev
822
823 def _download_git(self, url, filename):
824 filename = filename.split('#',1)[0]
825 url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
826
827 self.info("Doing git clone from %s to %s", url, filename)
828 os.system("git clone --quiet %s %s" % (url, filename))
829
830 if rev is not None:
831 self.info("Checking out %s", rev)
832 os.system("(cd %s && git checkout --quiet %s)" % (
833 filename,
834 rev,
835 ))
836
837 return filename
838
839 def _download_hg(self, url, filename):
840 filename = filename.split('#',1)[0]
841 url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
842
843 self.info("Doing hg clone from %s to %s", url, filename)
844 os.system("hg clone --quiet %s %s" % (url, filename))
845
846 if rev is not None:
847 self.info("Updating to %s", rev)
848 os.system("(cd %s && hg up -C -r %s >&-)" % (
849 filename,
850 rev,
851 ))
852
853 return filename
854
855 def debug(self, msg, *args):
856 log.debug(msg, *args)
857
858 def info(self, msg, *args):
859 log.info(msg, *args)
860
861 def warn(self, msg, *args):
862 log.warn(msg, *args)
863
864 # This pattern matches a character entity reference (a decimal numeric
865 # references, a hexadecimal numeric reference, or a named reference).
866 entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
867
868 def uchr(c):
869 if not isinstance(c, int):
870 return c
871 if c>255: return unichr(c)
872 return chr(c)
873
874 def decode_entity(match):
875 what = match.group(1)
876 if what.startswith('#x'):
877 what = int(what[2:], 16)
878 elif what.startswith('#'):
879 what = int(what[1:])
880 else:
881 what = name2codepoint.get(what, match.group(0))
882 return uchr(what)
883
884 def htmldecode(text):
885 """Decode HTML entities in the given text."""
886 return entity_sub(decode_entity, text)
887
888 def socket_timeout(timeout=15):
889 def _socket_timeout(func):
890 def _socket_timeout(*args, **kwargs):
891 old_timeout = socket.getdefaulttimeout()
892 socket.setdefaulttimeout(timeout)
893 try:
894 return func(*args, **kwargs)
895 finally:
896 socket.setdefaulttimeout(old_timeout)
897 return _socket_timeout
898 return _socket_timeout
899
900 def _encode_auth(auth):
901 """
902 A function compatible with Python 2.3-3.3 that will encode
903 auth from a URL suitable for an HTTP header.
904 >>> str(_encode_auth('username%3Apassword'))
905 'dXNlcm5hbWU6cGFzc3dvcmQ='
906
907 Long auth strings should not cause a newline to be inserted.
908 >>> long_auth = 'username:' + 'password'*10
909 >>> chr(10) in str(_encode_auth(long_auth))
910 False
911 """
912 auth_s = unquote(auth)
913 # convert to bytes
914 auth_bytes = auth_s.encode()
915 # use the legacy interface for Python 2.3 support
916 encoded_bytes = base64.encodestring(auth_bytes)
917 # convert back to a string
918 encoded = encoded_bytes.decode()
919 # strip the trailing carriage return
920 return encoded.replace('\n','')
921
922 class Credential(object):
923 """
924 A username/password pair. Use like a namedtuple.
925 """
926 def __init__(self, username, password):
927 self.username = username
928 self.password = password
929
930 def __iter__(self):
931 yield self.username
932 yield self.password
933
934 def __str__(self):
935 return '%(username)s:%(password)s' % vars(self)
936
937 class PyPIConfig(ConfigParser.ConfigParser):
938
939 def __init__(self):
940 """
941 Load from ~/.pypirc
942 """
943 defaults = dict.fromkeys(['username', 'password', 'repository'], '')
944 ConfigParser.ConfigParser.__init__(self, defaults)
945
946 rc = os.path.join(os.path.expanduser('~'), '.pypirc')
947 if os.path.exists(rc):
948 self.read(rc)
949
950 @property
951 def creds_by_repository(self):
952 sections_with_repositories = [
953 section for section in self.sections()
954 if self.get(section, 'repository').strip()
955 ]
956
957 return dict(map(self._get_repo_cred, sections_with_repositories))
958
959 def _get_repo_cred(self, section):
960 repo = self.get(section, 'repository').strip()
961 return repo, Credential(
962 self.get(section, 'username').strip(),
963 self.get(section, 'password').strip(),
964 )
965
966 def find_credential(self, url):
967 """
968 If the URL indicated appears to be a repository defined in this
969 config, return the credential for that repository.
970 """
971 for repository, cred in self.creds_by_repository.items():
972 if url.startswith(repository):
973 return cred
974
975
976 def open_with_auth(url, opener=urllib2.urlopen):
977 """Open a urllib2 request, handling HTTP authentication"""
978
979 scheme, netloc, path, params, query, frag = urlparse(url)
980
981 # Double scheme does not raise on Mac OS X as revealed by a
982 # failing test. We would expect "nonnumeric port". Refs #20.
983 if netloc.endswith(':'):
984 raise httplib.InvalidURL("nonnumeric port: ''")
985
986 if scheme in ('http', 'https'):
987 auth, host = splituser(netloc)
988 else:
989 auth = None
990
991 if not auth:
992 cred = PyPIConfig().find_credential(url)
993 if cred:
994 auth = str(cred)
995 info = cred.username, url
996 log.info('Authenticating as %s for %s (from .pypirc)' % info)
997
998 if auth:
999 auth = "Basic " + _encode_auth(auth)
1000 new_url = urlunparse((scheme,host,path,params,query,frag))
1001 request = urllib2.Request(new_url)
1002 request.add_header("Authorization", auth)
1003 else:
1004 request = urllib2.Request(url)
1005
1006 request.add_header('User-Agent', user_agent)
1007 fp = opener(request)
1008
1009 if auth:
1010 # Put authentication info back into request URL if same host,
1011 # so that links found on the page will work
1012 s2, h2, path2, param2, query2, frag2 = urlparse(fp.url)
1013 if s2==scheme and h2==host:
1014 fp.url = urlunparse((s2,netloc,path2,param2,query2,frag2))
1015
1016 return fp
1017
1018 # adding a timeout to avoid freezing package_index
1019 open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
1020
1021
1022 def fix_sf_url(url):
1023 return url # backward compatibility
1024
1025 def local_open(url):
1026 """Read a local path, with special support for directories"""
1027 scheme, server, path, param, query, frag = urlparse(url)
1028 filename = url2pathname(path)
1029 if os.path.isfile(filename):
1030 return urllib2.urlopen(url)
1031 elif path.endswith('/') and os.path.isdir(filename):
1032 files = []
1033 for f in os.listdir(filename):
1034 if f=='index.html':
1035 with open(os.path.join(filename,f),'r') as fp:
1036 body = fp.read()
1037 break
1038 elif os.path.isdir(os.path.join(filename,f)):
1039 f+='/'
1040 files.append("<a href=%r>%s</a>" % (f,f))
1041 else:
1042 body = ("<html><head><title>%s</title>" % url) + \
1043 "</head><body>%s</body></html>" % '\n'.join(files)
1044 status, message = 200, "OK"
1045 else:
1046 status, message, body = 404, "Path not found", "Not found"
1047
1048 headers = {'content-type': 'text/html'}
1049 return HTTPError(url, status, message, headers, StringIO(body))