diff venv/lib/python2.7/site-packages/requests_toolbelt/utils/deprecated.py @ 0:d67268158946 draft

planemo upload commit a3f181f5f126803c654b3a66dd4e83a48f7e203b
author bcclaywell
date Mon, 12 Oct 2015 17:43:33 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/venv/lib/python2.7/site-packages/requests_toolbelt/utils/deprecated.py	Mon Oct 12 17:43:33 2015 -0400
@@ -0,0 +1,84 @@
+# -*- coding: utf-8 -*-
+"""A collection of functions deprecated in requests.utils."""
+import re
+
+from requests import utils
+
+
+def get_encodings_from_content(content):
+    """Return encodings from given content string.
+
+    .. code-block:: python
+
+        import requests
+        from requests_toolbelt.utils import deprecated
+
+        r = requests.get(url)
+        encodings = deprecated.get_encodings_from_content(r)
+
+    :param content: bytestring to extract encodings from.
+    :type content: bytes
+    """
+    find_charset = re.compile(
+        r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I
+    ).findall
+
+    find_pragma = re.compile(
+        r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I
+    ).findall
+
+    find_xml = re.compile(
+        r'^<\?xml.*?encoding=["\']*(.+?)["\'>]'
+    ).findall
+
+    return find_charset(content) + find_pragma(content) + find_xml(content)
+
+
+def get_unicode_from_response(response):
+    """Return the requested content back in unicode.
+
+    This will first attempt to retrieve the encoding from the response
+    headers. If that fails, it will use
+    :func:`requests_toolbelt.utils.deprecated.get_encodings_from_content`
+    to determine encodings from HTML elements.
+
+    .. code-block:: python
+
+        import requests
+        from requests_toolbelt.utils import deprecated
+
+        r = requests.get(url)
+        text = deprecated.get_unicode_from_response(r)
+
+    :param response: Response object to get unicode content from.
+    :type response: requests.models.Response
+    """
+    tried_encodings = set()
+
+    # Try charset from content-type
+    encoding = utils.get_encoding_from_headers(response.headers)
+
+    if encoding:
+        try:
+            return str(response.content, encoding)
+        except UnicodeError:
+            tried_encodings.add(encoding.lower())
+
+    encodings = get_encodings_from_content(response.content)
+
+    for _encoding in encodings:
+        _encoding = _encoding.lower()
+        if _encoding in tried_encodings:
+            continue
+        try:
+            return str(response.content, _encoding)
+        except UnicodeError:
+            tried_encodings.add(_encoding)
+
+    # Fall back:
+    if encoding:
+        try:
+            return str(response.content, encoding, errors='replace')
+        except TypeError:
+            pass
+    return response.text