Skip to content

Commit 4e38364

Browse files
committed
Add conditional string encoding based on urllib3 major version
1 parent f8aa36b commit 4e38364

File tree

3 files changed

+40
-18
lines changed

3 files changed

+40
-18
lines changed

src/requests/compat.py

+12
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,18 @@
1010
import importlib
1111
import sys
1212

13+
# -------
14+
# urllib3
15+
# -------
16+
from urllib3 import __version__ as urllib3_version
17+
18+
# Detect which major version of urllib3 is being used.
19+
try:
20+
is_urllib3_2 = int(urllib3_version.split(".")[0]) == 2
21+
except (TypeError, AttributeError):
22+
# If we can't discern a version, prefer old functionality.
23+
is_urllib3_2 = False
24+
1325
# -------------------
1426
# Character Detection
1527
# -------------------

src/requests/utils.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
getproxies,
3939
getproxies_environment,
4040
integer_types,
41+
is_urllib3_2,
4142
)
4243
from .compat import parse_http_list as _parse_list_header
4344
from .compat import (
@@ -136,7 +137,9 @@ def super_len(o):
136137
total_length = None
137138
current_position = 0
138139

139-
if isinstance(o, str):
140+
if is_urllib3_2 and isinstance(o, str):
141+
# urllib3 2.x treats all strings as utf-8 instead
142+
# of latin-1 (iso-8859-1) like http.client.
140143
o = o.encode("utf-8")
141144

142145
if hasattr(o, "__len__"):

tests/test_requests.py

+24-17
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
builtin_str,
2626
cookielib,
2727
getproxies,
28+
is_urllib3_2,
2829
urlparse,
2930
)
3031
from requests.cookies import cookiejar_from_dict, morsel_to_cookie
@@ -1810,23 +1811,6 @@ def test_autoset_header_values_are_native(self, httpbin):
18101811

18111812
assert p.headers["Content-Length"] == length
18121813

1813-
def test_content_length_for_bytes_data(self, httpbin):
1814-
data = "This is a string containing multi-byte UTF-8 ☃️"
1815-
encoded_data = data.encode("utf-8")
1816-
length = str(len(encoded_data))
1817-
req = requests.Request("POST", httpbin("post"), data=encoded_data)
1818-
p = req.prepare()
1819-
1820-
assert p.headers["Content-Length"] == length
1821-
1822-
def test_content_length_for_string_data_counts_bytes(self, httpbin):
1823-
data = "This is a string containing multi-byte UTF-8 ☃️"
1824-
length = str(len(data.encode("utf-8")))
1825-
req = requests.Request("POST", httpbin("post"), data=data)
1826-
p = req.prepare()
1827-
1828-
assert p.headers["Content-Length"] == length
1829-
18301814
def test_nonhttp_schemes_dont_check_URLs(self):
18311815
test_urls = (
18321816
"data:image/gif;base64,R0lGODlhAQABAHAAACH5BAUAAAAALAAAAAABAAEAAAICRAEAOw==",
@@ -2966,6 +2950,29 @@ def response_handler(sock):
29662950
assert client_cert is not None
29672951

29682952

2953+
def test_content_length_for_bytes_data(httpbin):
2954+
data = "This is a string containing multi-byte UTF-8 ☃️"
2955+
encoded_data = data.encode("utf-8")
2956+
length = str(len(encoded_data))
2957+
req = requests.Request("POST", httpbin("post"), data=encoded_data)
2958+
p = req.prepare()
2959+
2960+
assert p.headers["Content-Length"] == length
2961+
2962+
2963+
@pytest.mark.skipif(
2964+
not is_urllib3_2,
2965+
reason="urllib3 2.x encodes all strings to utf-8, urllib3 1.x uses latin-1",
2966+
)
2967+
def test_content_length_for_string_data_counts_bytes(httpbin):
2968+
data = "This is a string containing multi-byte UTF-8 ☃️"
2969+
length = str(len(data.encode("utf-8")))
2970+
req = requests.Request("POST", httpbin("post"), data=data)
2971+
p = req.prepare()
2972+
2973+
assert p.headers["Content-Length"] == length
2974+
2975+
29692976
def test_json_decode_errors_are_serializable_deserializable():
29702977
json_decode_error = requests.exceptions.JSONDecodeError(
29712978
"Extra data",

0 commit comments

Comments
 (0)