Add conditional string encoding based on urllib3 major version

nateprewitt · nateprewitt · commit 4e383642a9eb · 2024-07-18T10:20:38.000-07:00
diff --git a/src/requests/compat.py b/src/requests/compat.py
@@ -10,6 +10,18 @@
 import importlib
 import sys
 
+# -------
+# urllib3
+# -------
+from urllib3 import __version__ as urllib3_version
+
+# Detect which major version of urllib3 is being used.
+try:
+    is_urllib3_2 = int(urllib3_version.split(".")[0]) == 2
+except (TypeError, AttributeError):
+    # If we can't discern a version, prefer old functionality.
+    is_urllib3_2 = False
+
 # -------------------
 # Character Detection
 # -------------------
diff --git a/src/requests/utils.py b/src/requests/utils.py
@@ -38,6 +38,7 @@
     getproxies,
     getproxies_environment,
     integer_types,
+    is_urllib3_2,
 )
 from .compat import parse_http_list as _parse_list_header
 from .compat import (
@@ -136,7 +137,9 @@ def super_len(o):
     total_length = None
     current_position = 0
 
-    if isinstance(o, str):
+    if is_urllib3_2 and isinstance(o, str):
+        # urllib3 2.x treats all strings as utf-8 instead
+        # of latin-1 (iso-8859-1) like http.client.
         o = o.encode("utf-8")
 
     if hasattr(o, "__len__"):
diff --git a/tests/test_requests.py b/tests/test_requests.py
@@ -25,6 +25,7 @@
     builtin_str,
     cookielib,
     getproxies,
+    is_urllib3_2,
     urlparse,
 )
 from requests.cookies import cookiejar_from_dict, morsel_to_cookie
@@ -1810,23 +1811,6 @@ def test_autoset_header_values_are_native(self, httpbin):
 
         assert p.headers["Content-Length"] == length
 
-    def test_content_length_for_bytes_data(self, httpbin):
-        data = "This is a string containing multi-byte UTF-8 ☃️"
-        encoded_data = data.encode("utf-8")
-        length = str(len(encoded_data))
-        req = requests.Request("POST", httpbin("post"), data=encoded_data)
-        p = req.prepare()
-
-        assert p.headers["Content-Length"] == length
-
-    def test_content_length_for_string_data_counts_bytes(self, httpbin):
-        data = "This is a string containing multi-byte UTF-8 ☃️"
-        length = str(len(data.encode("utf-8")))
-        req = requests.Request("POST", httpbin("post"), data=data)
-        p = req.prepare()
-
-        assert p.headers["Content-Length"] == length
-
     def test_nonhttp_schemes_dont_check_URLs(self):
         test_urls = (
             "data:image/gif;base64,R0lGODlhAQABAHAAACH5BAUAAAAALAAAAAABAAEAAAICRAEAOw==",
@@ -2966,6 +2950,29 @@ def response_handler(sock):
         assert client_cert is not None
 
 
+def test_content_length_for_bytes_data(httpbin):
+    data = "This is a string containing multi-byte UTF-8 ☃️"
+    encoded_data = data.encode("utf-8")
+    length = str(len(encoded_data))
+    req = requests.Request("POST", httpbin("post"), data=encoded_data)
+    p = req.prepare()
+
+    assert p.headers["Content-Length"] == length
+
+
+@pytest.mark.skipif(
+    not is_urllib3_2,
+    reason="urllib3 2.x encodes all strings to utf-8, urllib3 1.x uses latin-1",
+)
+def test_content_length_for_string_data_counts_bytes(httpbin):
+    data = "This is a string containing multi-byte UTF-8 ☃️"
+    length = str(len(data.encode("utf-8")))
+    req = requests.Request("POST", httpbin("post"), data=data)
+    p = req.prepare()
+
+    assert p.headers["Content-Length"] == length
+
+
 def test_json_decode_errors_are_serializable_deserializable():
     json_decode_error = requests.exceptions.JSONDecodeError(
         "Extra data",