Mercurial > repos > bcclaywell > argo_navis
comparison venv/lib/python2.7/site-packages/pip/download.py @ 0:d67268158946 draft
planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
| author | bcclaywell | 
|---|---|
| date | Mon, 12 Oct 2015 17:43:33 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:d67268158946 | 
|---|---|
| 1 from __future__ import absolute_import | |
| 2 | |
| 3 import cgi | |
| 4 import email.utils | |
| 5 import hashlib | |
| 6 import getpass | |
| 7 import json | |
| 8 import logging | |
| 9 import mimetypes | |
| 10 import os | |
| 11 import platform | |
| 12 import re | |
| 13 import shutil | |
| 14 import sys | |
| 15 import tempfile | |
| 16 | |
| 17 from pip._vendor.six.moves.urllib import parse as urllib_parse | |
| 18 from pip._vendor.six.moves.urllib import request as urllib_request | |
| 19 | |
| 20 import pip | |
| 21 | |
| 22 from pip.exceptions import InstallationError, HashMismatch | |
| 23 from pip.models import PyPI | |
| 24 from pip.utils import (splitext, rmtree, format_size, display_path, | |
| 25 backup_dir, ask_path_exists, unpack_file, | |
| 26 call_subprocess) | |
| 27 from pip.utils.filesystem import check_path_owner | |
| 28 from pip.utils.logging import indent_log | |
| 29 from pip.utils.ui import DownloadProgressBar, DownloadProgressSpinner | |
| 30 from pip.locations import write_delete_marker_file | |
| 31 from pip.vcs import vcs | |
| 32 from pip._vendor import requests, six | |
| 33 from pip._vendor.requests.adapters import BaseAdapter, HTTPAdapter | |
| 34 from pip._vendor.requests.auth import AuthBase, HTTPBasicAuth | |
| 35 from pip._vendor.requests.models import Response | |
| 36 from pip._vendor.requests.structures import CaseInsensitiveDict | |
| 37 from pip._vendor.requests.packages import urllib3 | |
| 38 from pip._vendor.cachecontrol import CacheControlAdapter | |
| 39 from pip._vendor.cachecontrol.caches import FileCache | |
| 40 from pip._vendor.lockfile import LockError | |
| 41 from pip._vendor.six.moves import xmlrpc_client | |
| 42 | |
| 43 | |
| 44 __all__ = ['get_file_content', | |
| 45 'is_url', 'url_to_path', 'path_to_url', | |
| 46 'is_archive_file', 'unpack_vcs_link', | |
| 47 'unpack_file_url', 'is_vcs_url', 'is_file_url', | |
| 48 'unpack_http_url', 'unpack_url'] | |
| 49 | |
| 50 | |
| 51 logger = logging.getLogger(__name__) | |
| 52 | |
| 53 | |
| 54 def user_agent(): | |
| 55 """ | |
| 56 Return a string representing the user agent. | |
| 57 """ | |
| 58 data = { | |
| 59 "installer": {"name": "pip", "version": pip.__version__}, | |
| 60 "python": platform.python_version(), | |
| 61 "implementation": { | |
| 62 "name": platform.python_implementation(), | |
| 63 }, | |
| 64 } | |
| 65 | |
| 66 if data["implementation"]["name"] == 'CPython': | |
| 67 data["implementation"]["version"] = platform.python_version() | |
| 68 elif data["implementation"]["name"] == 'PyPy': | |
| 69 if sys.pypy_version_info.releaselevel == 'final': | |
| 70 pypy_version_info = sys.pypy_version_info[:3] | |
| 71 else: | |
| 72 pypy_version_info = sys.pypy_version_info | |
| 73 data["implementation"]["version"] = ".".join( | |
| 74 [str(x) for x in pypy_version_info] | |
| 75 ) | |
| 76 elif data["implementation"]["name"] == 'Jython': | |
| 77 # Complete Guess | |
| 78 data["implementation"]["version"] = platform.python_version() | |
| 79 elif data["implementation"]["name"] == 'IronPython': | |
| 80 # Complete Guess | |
| 81 data["implementation"]["version"] = platform.python_version() | |
| 82 | |
| 83 if sys.platform.startswith("linux"): | |
| 84 distro = dict(filter( | |
| 85 lambda x: x[1], | |
| 86 zip(["name", "version", "id"], platform.linux_distribution()), | |
| 87 )) | |
| 88 libc = dict(filter( | |
| 89 lambda x: x[1], | |
| 90 zip(["lib", "version"], platform.libc_ver()), | |
| 91 )) | |
| 92 if libc: | |
| 93 distro["libc"] = libc | |
| 94 if distro: | |
| 95 data["distro"] = distro | |
| 96 | |
| 97 if sys.platform.startswith("darwin") and platform.mac_ver()[0]: | |
| 98 data["distro"] = {"name": "OS X", "version": platform.mac_ver()[0]} | |
| 99 | |
| 100 if platform.system(): | |
| 101 data.setdefault("system", {})["name"] = platform.system() | |
| 102 | |
| 103 if platform.release(): | |
| 104 data.setdefault("system", {})["release"] = platform.release() | |
| 105 | |
| 106 if platform.machine(): | |
| 107 data["cpu"] = platform.machine() | |
| 108 | |
| 109 return "{data[installer][name]}/{data[installer][version]} {json}".format( | |
| 110 data=data, | |
| 111 json=json.dumps(data, separators=(",", ":"), sort_keys=True), | |
| 112 ) | |
| 113 | |
| 114 | |
| 115 class MultiDomainBasicAuth(AuthBase): | |
| 116 | |
| 117 def __init__(self, prompting=True): | |
| 118 self.prompting = prompting | |
| 119 self.passwords = {} | |
| 120 | |
| 121 def __call__(self, req): | |
| 122 parsed = urllib_parse.urlparse(req.url) | |
| 123 | |
| 124 # Get the netloc without any embedded credentials | |
| 125 netloc = parsed.netloc.rsplit("@", 1)[-1] | |
| 126 | |
| 127 # Set the url of the request to the url without any credentials | |
| 128 req.url = urllib_parse.urlunparse(parsed[:1] + (netloc,) + parsed[2:]) | |
| 129 | |
| 130 # Use any stored credentials that we have for this netloc | |
| 131 username, password = self.passwords.get(netloc, (None, None)) | |
| 132 | |
| 133 # Extract credentials embedded in the url if we have none stored | |
| 134 if username is None: | |
| 135 username, password = self.parse_credentials(parsed.netloc) | |
| 136 | |
| 137 if username or password: | |
| 138 # Store the username and password | |
| 139 self.passwords[netloc] = (username, password) | |
| 140 | |
| 141 # Send the basic auth with this request | |
| 142 req = HTTPBasicAuth(username or "", password or "")(req) | |
| 143 | |
| 144 # Attach a hook to handle 401 responses | |
| 145 req.register_hook("response", self.handle_401) | |
| 146 | |
| 147 return req | |
| 148 | |
| 149 def handle_401(self, resp, **kwargs): | |
| 150 # We only care about 401 responses, anything else we want to just | |
| 151 # pass through the actual response | |
| 152 if resp.status_code != 401: | |
| 153 return resp | |
| 154 | |
| 155 # We are not able to prompt the user so simple return the response | |
| 156 if not self.prompting: | |
| 157 return resp | |
| 158 | |
| 159 parsed = urllib_parse.urlparse(resp.url) | |
| 160 | |
| 161 # Prompt the user for a new username and password | |
| 162 username = six.moves.input("User for %s: " % parsed.netloc) | |
| 163 password = getpass.getpass("Password: ") | |
| 164 | |
| 165 # Store the new username and password to use for future requests | |
| 166 if username or password: | |
| 167 self.passwords[parsed.netloc] = (username, password) | |
| 168 | |
| 169 # Consume content and release the original connection to allow our new | |
| 170 # request to reuse the same one. | |
| 171 resp.content | |
| 172 resp.raw.release_conn() | |
| 173 | |
| 174 # Add our new username and password to the request | |
| 175 req = HTTPBasicAuth(username or "", password or "")(resp.request) | |
| 176 | |
| 177 # Send our new request | |
| 178 new_resp = resp.connection.send(req, **kwargs) | |
| 179 new_resp.history.append(resp) | |
| 180 | |
| 181 return new_resp | |
| 182 | |
| 183 def parse_credentials(self, netloc): | |
| 184 if "@" in netloc: | |
| 185 userinfo = netloc.rsplit("@", 1)[0] | |
| 186 if ":" in userinfo: | |
| 187 return userinfo.split(":", 1) | |
| 188 return userinfo, None | |
| 189 return None, None | |
| 190 | |
| 191 | |
| 192 class LocalFSAdapter(BaseAdapter): | |
| 193 | |
| 194 def send(self, request, stream=None, timeout=None, verify=None, cert=None, | |
| 195 proxies=None): | |
| 196 pathname = url_to_path(request.url) | |
| 197 | |
| 198 resp = Response() | |
| 199 resp.status_code = 200 | |
| 200 resp.url = request.url | |
| 201 | |
| 202 try: | |
| 203 stats = os.stat(pathname) | |
| 204 except OSError as exc: | |
| 205 resp.status_code = 404 | |
| 206 resp.raw = exc | |
| 207 else: | |
| 208 modified = email.utils.formatdate(stats.st_mtime, usegmt=True) | |
| 209 content_type = mimetypes.guess_type(pathname)[0] or "text/plain" | |
| 210 resp.headers = CaseInsensitiveDict({ | |
| 211 "Content-Type": content_type, | |
| 212 "Content-Length": stats.st_size, | |
| 213 "Last-Modified": modified, | |
| 214 }) | |
| 215 | |
| 216 resp.raw = open(pathname, "rb") | |
| 217 resp.close = resp.raw.close | |
| 218 | |
| 219 return resp | |
| 220 | |
| 221 def close(self): | |
| 222 pass | |
| 223 | |
| 224 | |
| 225 class SafeFileCache(FileCache): | |
| 226 """ | |
| 227 A file based cache which is safe to use even when the target directory may | |
| 228 not be accessible or writable. | |
| 229 """ | |
| 230 | |
| 231 def __init__(self, *args, **kwargs): | |
| 232 super(SafeFileCache, self).__init__(*args, **kwargs) | |
| 233 | |
| 234 # Check to ensure that the directory containing our cache directory | |
| 235 # is owned by the user current executing pip. If it does not exist | |
| 236 # we will check the parent directory until we find one that does exist. | |
| 237 # If it is not owned by the user executing pip then we will disable | |
| 238 # the cache and log a warning. | |
| 239 if not check_path_owner(self.directory): | |
| 240 logger.warning( | |
| 241 "The directory '%s' or its parent directory is not owned by " | |
| 242 "the current user and the cache has been disabled. Please " | |
| 243 "check the permissions and owner of that directory. If " | |
| 244 "executing pip with sudo, you may want sudo's -H flag.", | |
| 245 self.directory, | |
| 246 ) | |
| 247 | |
| 248 # Set our directory to None to disable the Cache | |
| 249 self.directory = None | |
| 250 | |
| 251 def get(self, *args, **kwargs): | |
| 252 # If we don't have a directory, then the cache should be a no-op. | |
| 253 if self.directory is None: | |
| 254 return | |
| 255 | |
| 256 try: | |
| 257 return super(SafeFileCache, self).get(*args, **kwargs) | |
| 258 except (LockError, OSError, IOError): | |
| 259 # We intentionally silence this error, if we can't access the cache | |
| 260 # then we can just skip caching and process the request as if | |
| 261 # caching wasn't enabled. | |
| 262 pass | |
| 263 | |
| 264 def set(self, *args, **kwargs): | |
| 265 # If we don't have a directory, then the cache should be a no-op. | |
| 266 if self.directory is None: | |
| 267 return | |
| 268 | |
| 269 try: | |
| 270 return super(SafeFileCache, self).set(*args, **kwargs) | |
| 271 except (LockError, OSError, IOError): | |
| 272 # We intentionally silence this error, if we can't access the cache | |
| 273 # then we can just skip caching and process the request as if | |
| 274 # caching wasn't enabled. | |
| 275 pass | |
| 276 | |
| 277 def delete(self, *args, **kwargs): | |
| 278 # If we don't have a directory, then the cache should be a no-op. | |
| 279 if self.directory is None: | |
| 280 return | |
| 281 | |
| 282 try: | |
| 283 return super(SafeFileCache, self).delete(*args, **kwargs) | |
| 284 except (LockError, OSError, IOError): | |
| 285 # We intentionally silence this error, if we can't access the cache | |
| 286 # then we can just skip caching and process the request as if | |
| 287 # caching wasn't enabled. | |
| 288 pass | |
| 289 | |
| 290 | |
| 291 class InsecureHTTPAdapter(HTTPAdapter): | |
| 292 | |
| 293 def cert_verify(self, conn, url, verify, cert): | |
| 294 conn.cert_reqs = 'CERT_NONE' | |
| 295 conn.ca_certs = None | |
| 296 | |
| 297 | |
| 298 class PipSession(requests.Session): | |
| 299 | |
| 300 timeout = None | |
| 301 | |
| 302 def __init__(self, *args, **kwargs): | |
| 303 retries = kwargs.pop("retries", 0) | |
| 304 cache = kwargs.pop("cache", None) | |
| 305 insecure_hosts = kwargs.pop("insecure_hosts", []) | |
| 306 | |
| 307 super(PipSession, self).__init__(*args, **kwargs) | |
| 308 | |
| 309 # Attach our User Agent to the request | |
| 310 self.headers["User-Agent"] = user_agent() | |
| 311 | |
| 312 # Attach our Authentication handler to the session | |
| 313 self.auth = MultiDomainBasicAuth() | |
| 314 | |
| 315 # Create our urllib3.Retry instance which will allow us to customize | |
| 316 # how we handle retries. | |
| 317 retries = urllib3.Retry( | |
| 318 # Set the total number of retries that a particular request can | |
| 319 # have. | |
| 320 total=retries, | |
| 321 | |
| 322 # A 503 error from PyPI typically means that the Fastly -> Origin | |
| 323 # connection got interupted in some way. A 503 error in general | |
| 324 # is typically considered a transient error so we'll go ahead and | |
| 325 # retry it. | |
| 326 status_forcelist=[503], | |
| 327 | |
| 328 # Add a small amount of back off between failed requests in | |
| 329 # order to prevent hammering the service. | |
| 330 backoff_factor=0.25, | |
| 331 ) | |
| 332 | |
| 333 # We want to _only_ cache responses on securely fetched origins. We do | |
| 334 # this because we can't validate the response of an insecurely fetched | |
| 335 # origin, and we don't want someone to be able to poison the cache and | |
| 336 # require manual evication from the cache to fix it. | |
| 337 if cache: | |
| 338 secure_adapter = CacheControlAdapter( | |
| 339 cache=SafeFileCache(cache), | |
| 340 max_retries=retries, | |
| 341 ) | |
| 342 else: | |
| 343 secure_adapter = HTTPAdapter(max_retries=retries) | |
| 344 | |
| 345 # Our Insecure HTTPAdapter disables HTTPS validation. It does not | |
| 346 # support caching (see above) so we'll use it for all http:// URLs as | |
| 347 # well as any https:// host that we've marked as ignoring TLS errors | |
| 348 # for. | |
| 349 insecure_adapter = InsecureHTTPAdapter(max_retries=retries) | |
| 350 | |
| 351 self.mount("https://", secure_adapter) | |
| 352 self.mount("http://", insecure_adapter) | |
| 353 | |
| 354 # Enable file:// urls | |
| 355 self.mount("file://", LocalFSAdapter()) | |
| 356 | |
| 357 # We want to use a non-validating adapter for any requests which are | |
| 358 # deemed insecure. | |
| 359 for host in insecure_hosts: | |
| 360 self.mount("https://{0}/".format(host), insecure_adapter) | |
| 361 | |
| 362 def request(self, method, url, *args, **kwargs): | |
| 363 # Allow setting a default timeout on a session | |
| 364 kwargs.setdefault("timeout", self.timeout) | |
| 365 | |
| 366 # Dispatch the actual request | |
| 367 return super(PipSession, self).request(method, url, *args, **kwargs) | |
| 368 | |
| 369 | |
| 370 def get_file_content(url, comes_from=None, session=None): | |
| 371 """Gets the content of a file; it may be a filename, file: URL, or | |
| 372 http: URL. Returns (location, content). Content is unicode.""" | |
| 373 if session is None: | |
| 374 raise TypeError( | |
| 375 "get_file_content() missing 1 required keyword argument: 'session'" | |
| 376 ) | |
| 377 | |
| 378 match = _scheme_re.search(url) | |
| 379 if match: | |
| 380 scheme = match.group(1).lower() | |
| 381 if (scheme == 'file' and comes_from and | |
| 382 comes_from.startswith('http')): | |
| 383 raise InstallationError( | |
| 384 'Requirements file %s references URL %s, which is local' | |
| 385 % (comes_from, url)) | |
| 386 if scheme == 'file': | |
| 387 path = url.split(':', 1)[1] | |
| 388 path = path.replace('\\', '/') | |
| 389 match = _url_slash_drive_re.match(path) | |
| 390 if match: | |
| 391 path = match.group(1) + ':' + path.split('|', 1)[1] | |
| 392 path = urllib_parse.unquote(path) | |
| 393 if path.startswith('/'): | |
| 394 path = '/' + path.lstrip('/') | |
| 395 url = path | |
| 396 else: | |
| 397 # FIXME: catch some errors | |
| 398 resp = session.get(url) | |
| 399 resp.raise_for_status() | |
| 400 | |
| 401 if six.PY3: | |
| 402 return resp.url, resp.text | |
| 403 else: | |
| 404 return resp.url, resp.content | |
| 405 try: | |
| 406 with open(url) as f: | |
| 407 content = f.read() | |
| 408 except IOError as exc: | |
| 409 raise InstallationError( | |
| 410 'Could not open requirements file: %s' % str(exc) | |
| 411 ) | |
| 412 return url, content | |
| 413 | |
| 414 | |
| 415 _scheme_re = re.compile(r'^(http|https|file):', re.I) | |
| 416 _url_slash_drive_re = re.compile(r'/*([a-z])\|', re.I) | |
| 417 | |
| 418 | |
| 419 def is_url(name): | |
| 420 """Returns true if the name looks like a URL""" | |
| 421 if ':' not in name: | |
| 422 return False | |
| 423 scheme = name.split(':', 1)[0].lower() | |
| 424 return scheme in ['http', 'https', 'file', 'ftp'] + vcs.all_schemes | |
| 425 | |
| 426 | |
| 427 def url_to_path(url): | |
| 428 """ | |
| 429 Convert a file: URL to a path. | |
| 430 """ | |
| 431 assert url.startswith('file:'), ( | |
| 432 "You can only turn file: urls into filenames (not %r)" % url) | |
| 433 | |
| 434 _, netloc, path, _, _ = urllib_parse.urlsplit(url) | |
| 435 | |
| 436 # if we have a UNC path, prepend UNC share notation | |
| 437 if netloc: | |
| 438 netloc = '\\\\' + netloc | |
| 439 | |
| 440 path = urllib_request.url2pathname(netloc + path) | |
| 441 return path | |
| 442 | |
| 443 | |
| 444 def path_to_url(path): | |
| 445 """ | |
| 446 Convert a path to a file: URL. The path will be made absolute and have | |
| 447 quoted path parts. | |
| 448 """ | |
| 449 path = os.path.normpath(os.path.abspath(path)) | |
| 450 url = urllib_parse.urljoin('file:', urllib_request.pathname2url(path)) | |
| 451 return url | |
| 452 | |
| 453 | |
| 454 def is_archive_file(name): | |
| 455 """Return True if `name` is a considered as an archive file.""" | |
| 456 archives = ( | |
| 457 '.zip', '.tar.gz', '.tar.bz2', '.tgz', '.tar', '.whl' | |
| 458 ) | |
| 459 ext = splitext(name)[1].lower() | |
| 460 if ext in archives: | |
| 461 return True | |
| 462 return False | |
| 463 | |
| 464 | |
| 465 def unpack_vcs_link(link, location, only_download=False): | |
| 466 vcs_backend = _get_used_vcs_backend(link) | |
| 467 if only_download: | |
| 468 vcs_backend.export(location) | |
| 469 else: | |
| 470 vcs_backend.unpack(location) | |
| 471 | |
| 472 | |
| 473 def _get_used_vcs_backend(link): | |
| 474 for backend in vcs.backends: | |
| 475 if link.scheme in backend.schemes: | |
| 476 vcs_backend = backend(link.url) | |
| 477 return vcs_backend | |
| 478 | |
| 479 | |
| 480 def is_vcs_url(link): | |
| 481 return bool(_get_used_vcs_backend(link)) | |
| 482 | |
| 483 | |
| 484 def is_file_url(link): | |
| 485 return link.url.lower().startswith('file:') | |
| 486 | |
| 487 | |
| 488 def _check_hash(download_hash, link): | |
| 489 if download_hash.digest_size != hashlib.new(link.hash_name).digest_size: | |
| 490 logger.critical( | |
| 491 "Hash digest size of the package %d (%s) doesn't match the " | |
| 492 "expected hash name %s!", | |
| 493 download_hash.digest_size, link, link.hash_name, | |
| 494 ) | |
| 495 raise HashMismatch('Hash name mismatch for package %s' % link) | |
| 496 if download_hash.hexdigest() != link.hash: | |
| 497 logger.critical( | |
| 498 "Hash of the package %s (%s) doesn't match the expected hash %s!", | |
| 499 link, download_hash.hexdigest(), link.hash, | |
| 500 ) | |
| 501 raise HashMismatch( | |
| 502 'Bad %s hash for package %s' % (link.hash_name, link) | |
| 503 ) | |
| 504 | |
| 505 | |
| 506 def _get_hash_from_file(target_file, link): | |
| 507 try: | |
| 508 download_hash = hashlib.new(link.hash_name) | |
| 509 except (ValueError, TypeError): | |
| 510 logger.warning( | |
| 511 "Unsupported hash name %s for package %s", link.hash_name, link, | |
| 512 ) | |
| 513 return None | |
| 514 | |
| 515 with open(target_file, 'rb') as fp: | |
| 516 while True: | |
| 517 chunk = fp.read(4096) | |
| 518 if not chunk: | |
| 519 break | |
| 520 download_hash.update(chunk) | |
| 521 return download_hash | |
| 522 | |
| 523 | |
| 524 def _progress_indicator(iterable, *args, **kwargs): | |
| 525 return iterable | |
| 526 | |
| 527 | |
| 528 def _download_url(resp, link, content_file): | |
| 529 download_hash = None | |
| 530 if link.hash and link.hash_name: | |
| 531 try: | |
| 532 download_hash = hashlib.new(link.hash_name) | |
| 533 except ValueError: | |
| 534 logger.warning( | |
| 535 "Unsupported hash name %s for package %s", | |
| 536 link.hash_name, link, | |
| 537 ) | |
| 538 | |
| 539 try: | |
| 540 total_length = int(resp.headers['content-length']) | |
| 541 except (ValueError, KeyError, TypeError): | |
| 542 total_length = 0 | |
| 543 | |
| 544 cached_resp = getattr(resp, "from_cache", False) | |
| 545 | |
| 546 if logger.getEffectiveLevel() > logging.INFO: | |
| 547 show_progress = False | |
| 548 elif cached_resp: | |
| 549 show_progress = False | |
| 550 elif total_length > (40 * 1000): | |
| 551 show_progress = True | |
| 552 elif not total_length: | |
| 553 show_progress = True | |
| 554 else: | |
| 555 show_progress = False | |
| 556 | |
| 557 show_url = link.show_url | |
| 558 | |
| 559 def resp_read(chunk_size): | |
| 560 try: | |
| 561 # Special case for urllib3. | |
| 562 for chunk in resp.raw.stream( | |
| 563 chunk_size, | |
| 564 # We use decode_content=False here because we do | |
| 565 # want urllib3 to mess with the raw bytes we get | |
| 566 # from the server. If we decompress inside of | |
| 567 # urllib3 then we cannot verify the checksum | |
| 568 # because the checksum will be of the compressed | |
| 569 # file. This breakage will only occur if the | |
| 570 # server adds a Content-Encoding header, which | |
| 571 # depends on how the server was configured: | |
| 572 # - Some servers will notice that the file isn't a | |
| 573 # compressible file and will leave the file alone | |
| 574 # and with an empty Content-Encoding | |
| 575 # - Some servers will notice that the file is | |
| 576 # already compressed and will leave the file | |
| 577 # alone and will add a Content-Encoding: gzip | |
| 578 # header | |
| 579 # - Some servers won't notice anything at all and | |
| 580 # will take a file that's already been compressed | |
| 581 # and compress it again and set the | |
| 582 # Content-Encoding: gzip header | |
| 583 # | |
| 584 # By setting this not to decode automatically we | |
| 585 # hope to eliminate problems with the second case. | |
| 586 decode_content=False): | |
| 587 yield chunk | |
| 588 except AttributeError: | |
| 589 # Standard file-like object. | |
| 590 while True: | |
| 591 chunk = resp.raw.read(chunk_size) | |
| 592 if not chunk: | |
| 593 break | |
| 594 yield chunk | |
| 595 | |
| 596 progress_indicator = _progress_indicator | |
| 597 | |
| 598 if link.netloc == PyPI.netloc: | |
| 599 url = show_url | |
| 600 else: | |
| 601 url = link.url_without_fragment | |
| 602 | |
| 603 if show_progress: # We don't show progress on cached responses | |
| 604 if total_length: | |
| 605 logger.info( | |
| 606 "Downloading %s (%s)", url, format_size(total_length), | |
| 607 ) | |
| 608 progress_indicator = DownloadProgressBar( | |
| 609 max=total_length, | |
| 610 ).iter | |
| 611 else: | |
| 612 logger.info("Downloading %s", url) | |
| 613 progress_indicator = DownloadProgressSpinner().iter | |
| 614 elif cached_resp: | |
| 615 logger.info("Using cached %s", url) | |
| 616 else: | |
| 617 logger.info("Downloading %s", url) | |
| 618 | |
| 619 logger.debug('Downloading from URL %s', link) | |
| 620 | |
| 621 for chunk in progress_indicator(resp_read(4096), 4096): | |
| 622 if download_hash is not None: | |
| 623 download_hash.update(chunk) | |
| 624 content_file.write(chunk) | |
| 625 if link.hash and link.hash_name: | |
| 626 _check_hash(download_hash, link) | |
| 627 return download_hash | |
| 628 | |
| 629 | |
| 630 def _copy_file(filename, location, content_type, link): | |
| 631 copy = True | |
| 632 download_location = os.path.join(location, link.filename) | |
| 633 if os.path.exists(download_location): | |
| 634 response = ask_path_exists( | |
| 635 'The file %s exists. (i)gnore, (w)ipe, (b)ackup ' % | |
| 636 display_path(download_location), ('i', 'w', 'b')) | |
| 637 if response == 'i': | |
| 638 copy = False | |
| 639 elif response == 'w': | |
| 640 logger.warning('Deleting %s', display_path(download_location)) | |
| 641 os.remove(download_location) | |
| 642 elif response == 'b': | |
| 643 dest_file = backup_dir(download_location) | |
| 644 logger.warning( | |
| 645 'Backing up %s to %s', | |
| 646 display_path(download_location), | |
| 647 display_path(dest_file), | |
| 648 ) | |
| 649 shutil.move(download_location, dest_file) | |
| 650 if copy: | |
| 651 shutil.copy(filename, download_location) | |
| 652 logger.info('Saved %s', display_path(download_location)) | |
| 653 | |
| 654 | |
| 655 def unpack_http_url(link, location, download_dir=None, session=None): | |
| 656 if session is None: | |
| 657 raise TypeError( | |
| 658 "unpack_http_url() missing 1 required keyword argument: 'session'" | |
| 659 ) | |
| 660 | |
| 661 temp_dir = tempfile.mkdtemp('-unpack', 'pip-') | |
| 662 | |
| 663 # If a download dir is specified, is the file already downloaded there? | |
| 664 already_downloaded_path = None | |
| 665 if download_dir: | |
| 666 already_downloaded_path = _check_download_dir(link, download_dir) | |
| 667 | |
| 668 if already_downloaded_path: | |
| 669 from_path = already_downloaded_path | |
| 670 content_type = mimetypes.guess_type(from_path)[0] | |
| 671 else: | |
| 672 # let's download to a tmp dir | |
| 673 from_path, content_type = _download_http_url(link, session, temp_dir) | |
| 674 | |
| 675 # unpack the archive to the build dir location. even when only downloading | |
| 676 # archives, they have to be unpacked to parse dependencies | |
| 677 unpack_file(from_path, location, content_type, link) | |
| 678 | |
| 679 # a download dir is specified; let's copy the archive there | |
| 680 if download_dir and not already_downloaded_path: | |
| 681 _copy_file(from_path, download_dir, content_type, link) | |
| 682 | |
| 683 if not already_downloaded_path: | |
| 684 os.unlink(from_path) | |
| 685 rmtree(temp_dir) | |
| 686 | |
| 687 | |
| 688 def unpack_file_url(link, location, download_dir=None): | |
| 689 """Unpack link into location. | |
| 690 If download_dir is provided and link points to a file, make a copy | |
| 691 of the link file inside download_dir.""" | |
| 692 | |
| 693 link_path = url_to_path(link.url_without_fragment) | |
| 694 | |
| 695 # If it's a url to a local directory | |
| 696 if os.path.isdir(link_path): | |
| 697 if os.path.isdir(location): | |
| 698 rmtree(location) | |
| 699 shutil.copytree(link_path, location, symlinks=True) | |
| 700 if download_dir: | |
| 701 logger.info('Link is a directory, ignoring download_dir') | |
| 702 return | |
| 703 | |
| 704 # if link has a hash, let's confirm it matches | |
| 705 if link.hash: | |
| 706 link_path_hash = _get_hash_from_file(link_path, link) | |
| 707 _check_hash(link_path_hash, link) | |
| 708 | |
| 709 # If a download dir is specified, is the file already there and valid? | |
| 710 already_downloaded_path = None | |
| 711 if download_dir: | |
| 712 already_downloaded_path = _check_download_dir(link, download_dir) | |
| 713 | |
| 714 if already_downloaded_path: | |
| 715 from_path = already_downloaded_path | |
| 716 else: | |
| 717 from_path = link_path | |
| 718 | |
| 719 content_type = mimetypes.guess_type(from_path)[0] | |
| 720 | |
| 721 # unpack the archive to the build dir location. even when only downloading | |
| 722 # archives, they have to be unpacked to parse dependencies | |
| 723 unpack_file(from_path, location, content_type, link) | |
| 724 | |
| 725 # a download dir is specified and not already downloaded | |
| 726 if download_dir and not already_downloaded_path: | |
| 727 _copy_file(from_path, download_dir, content_type, link) | |
| 728 | |
| 729 | |
| 730 def _copy_dist_from_dir(link_path, location): | |
| 731 """Copy distribution files in `link_path` to `location`. | |
| 732 | |
| 733 Invoked when user requests to install a local directory. E.g.: | |
| 734 | |
| 735 pip install . | |
| 736 pip install ~/dev/git-repos/python-prompt-toolkit | |
| 737 | |
| 738 """ | |
| 739 | |
| 740 # Note: This is currently VERY SLOW if you have a lot of data in the | |
| 741 # directory, because it copies everything with `shutil.copytree`. | |
| 742 # What it should really do is build an sdist and install that. | |
| 743 # See https://github.com/pypa/pip/issues/2195 | |
| 744 | |
| 745 if os.path.isdir(location): | |
| 746 rmtree(location) | |
| 747 | |
| 748 # build an sdist | |
| 749 setup_py = 'setup.py' | |
| 750 sdist_args = [sys.executable] | |
| 751 sdist_args.append('-c') | |
| 752 sdist_args.append( | |
| 753 "import setuptools, tokenize;__file__=%r;" | |
| 754 "exec(compile(getattr(tokenize, 'open', open)(__file__).read()" | |
| 755 ".replace('\\r\\n', '\\n'), __file__, 'exec'))" % setup_py) | |
| 756 sdist_args.append('sdist') | |
| 757 sdist_args += ['--dist-dir', location] | |
| 758 logger.info('Running setup.py sdist for %s', link_path) | |
| 759 | |
| 760 with indent_log(): | |
| 761 call_subprocess(sdist_args, cwd=link_path, show_stdout=False) | |
| 762 | |
| 763 # unpack sdist into `location` | |
| 764 sdist = os.path.join(location, os.listdir(location)[0]) | |
| 765 logger.info('Unpacking sdist %s into %s', sdist, location) | |
| 766 unpack_file(sdist, location, content_type=None, link=None) | |
| 767 | |
| 768 | |
| 769 class PipXmlrpcTransport(xmlrpc_client.Transport): | |
| 770 """Provide a `xmlrpclib.Transport` implementation via a `PipSession` | |
| 771 object. | |
| 772 """ | |
| 773 def __init__(self, index_url, session, use_datetime=False): | |
| 774 xmlrpc_client.Transport.__init__(self, use_datetime) | |
| 775 index_parts = urllib_parse.urlparse(index_url) | |
| 776 self._scheme = index_parts.scheme | |
| 777 self._session = session | |
| 778 | |
| 779 def request(self, host, handler, request_body, verbose=False): | |
| 780 parts = (self._scheme, host, handler, None, None, None) | |
| 781 url = urllib_parse.urlunparse(parts) | |
| 782 try: | |
| 783 headers = {'Content-Type': 'text/xml'} | |
| 784 response = self._session.post(url, data=request_body, | |
| 785 headers=headers, stream=True) | |
| 786 response.raise_for_status() | |
| 787 self.verbose = verbose | |
| 788 return self.parse_response(response.raw) | |
| 789 except requests.HTTPError as exc: | |
| 790 logger.critical( | |
| 791 "HTTP error %s while getting %s", | |
| 792 exc.response.status_code, url, | |
| 793 ) | |
| 794 raise | |
| 795 | |
| 796 | |
| 797 def unpack_url(link, location, download_dir=None, | |
| 798 only_download=False, session=None): | |
| 799 """Unpack link. | |
| 800 If link is a VCS link: | |
| 801 if only_download, export into download_dir and ignore location | |
| 802 else unpack into location | |
| 803 for other types of link: | |
| 804 - unpack into location | |
| 805 - if download_dir, copy the file into download_dir | |
| 806 - if only_download, mark location for deletion | |
| 807 """ | |
| 808 # non-editable vcs urls | |
| 809 if is_vcs_url(link): | |
| 810 unpack_vcs_link(link, location, only_download) | |
| 811 | |
| 812 # file urls | |
| 813 elif is_file_url(link): | |
| 814 unpack_file_url(link, location, download_dir) | |
| 815 if only_download: | |
| 816 write_delete_marker_file(location) | |
| 817 | |
| 818 # http urls | |
| 819 else: | |
| 820 if session is None: | |
| 821 session = PipSession() | |
| 822 | |
| 823 unpack_http_url( | |
| 824 link, | |
| 825 location, | |
| 826 download_dir, | |
| 827 session, | |
| 828 ) | |
| 829 if only_download: | |
| 830 write_delete_marker_file(location) | |
| 831 | |
| 832 | |
| 833 def _download_http_url(link, session, temp_dir): | |
| 834 """Download link url into temp_dir using provided session""" | |
| 835 target_url = link.url.split('#', 1)[0] | |
| 836 try: | |
| 837 resp = session.get( | |
| 838 target_url, | |
| 839 # We use Accept-Encoding: identity here because requests | |
| 840 # defaults to accepting compressed responses. This breaks in | |
| 841 # a variety of ways depending on how the server is configured. | |
| 842 # - Some servers will notice that the file isn't a compressible | |
| 843 # file and will leave the file alone and with an empty | |
| 844 # Content-Encoding | |
| 845 # - Some servers will notice that the file is already | |
| 846 # compressed and will leave the file alone and will add a | |
| 847 # Content-Encoding: gzip header | |
| 848 # - Some servers won't notice anything at all and will take | |
| 849 # a file that's already been compressed and compress it again | |
| 850 # and set the Content-Encoding: gzip header | |
| 851 # By setting this to request only the identity encoding We're | |
| 852 # hoping to eliminate the third case. Hopefully there does not | |
| 853 # exist a server which when given a file will notice it is | |
| 854 # already compressed and that you're not asking for a | |
| 855 # compressed file and will then decompress it before sending | |
| 856 # because if that's the case I don't think it'll ever be | |
| 857 # possible to make this work. | |
| 858 headers={"Accept-Encoding": "identity"}, | |
| 859 stream=True, | |
| 860 ) | |
| 861 resp.raise_for_status() | |
| 862 except requests.HTTPError as exc: | |
| 863 logger.critical( | |
| 864 "HTTP error %s while getting %s", exc.response.status_code, link, | |
| 865 ) | |
| 866 raise | |
| 867 | |
| 868 content_type = resp.headers.get('content-type', '') | |
| 869 filename = link.filename # fallback | |
| 870 # Have a look at the Content-Disposition header for a better guess | |
| 871 content_disposition = resp.headers.get('content-disposition') | |
| 872 if content_disposition: | |
| 873 type, params = cgi.parse_header(content_disposition) | |
| 874 # We use ``or`` here because we don't want to use an "empty" value | |
| 875 # from the filename param. | |
| 876 filename = params.get('filename') or filename | |
| 877 ext = splitext(filename)[1] | |
| 878 if not ext: | |
| 879 ext = mimetypes.guess_extension(content_type) | |
| 880 if ext: | |
| 881 filename += ext | |
| 882 if not ext and link.url != resp.url: | |
| 883 ext = os.path.splitext(resp.url)[1] | |
| 884 if ext: | |
| 885 filename += ext | |
| 886 file_path = os.path.join(temp_dir, filename) | |
| 887 with open(file_path, 'wb') as content_file: | |
| 888 _download_url(resp, link, content_file) | |
| 889 return file_path, content_type | |
| 890 | |
| 891 | |
| 892 def _check_download_dir(link, download_dir): | |
| 893 """ Check download_dir for previously downloaded file with correct hash | |
| 894 If a correct file is found return its path else None | |
| 895 """ | |
| 896 download_path = os.path.join(download_dir, link.filename) | |
| 897 if os.path.exists(download_path): | |
| 898 # If already downloaded, does its hash match? | |
| 899 logger.info('File was already downloaded %s', download_path) | |
| 900 if link.hash: | |
| 901 download_hash = _get_hash_from_file(download_path, link) | |
| 902 try: | |
| 903 _check_hash(download_hash, link) | |
| 904 except HashMismatch: | |
| 905 logger.warning( | |
| 906 'Previously-downloaded file %s has bad hash, ' | |
| 907 're-downloading.', | |
| 908 download_path | |
| 909 ) | |
| 910 os.unlink(download_path) | |
| 911 return None | |
| 912 return download_path | |
| 913 return None | 
