Skip to content

Commit ba3e933

Browse files
[backport 2.3.x] TST (string dtype): duplicate pandas/tests/indexes/object tests specifically for string dtypes (#60117) (#60131)
TST (string dtype): duplicate pandas/tests/indexes/object tests specifically for string dtypes (#60117) (cherry picked from commit d8905e4)
1 parent 4079314 commit ba3e933

File tree

5 files changed

+149
-91
lines changed

5 files changed

+149
-91
lines changed

pandas/tests/indexes/object/test_astype.py

-18
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,7 @@
33
from pandas import (
44
Index,
55
NaT,
6-
Series,
76
)
8-
import pandas._testing as tm
9-
10-
11-
def test_astype_str_from_bytes():
12-
# https://github.com/pandas-dev/pandas/issues/38607
13-
# GH#49658 pre-2.0 Index called .values.astype(str) here, which effectively
14-
# did a .decode() on the bytes object. In 2.0 we go through
15-
# ensure_string_array which does f"{val}"
16-
idx = Index(["あ", b"a"], dtype="object")
17-
result = idx.astype(str)
18-
expected = Index(["あ", "a"], dtype="str")
19-
tm.assert_index_equal(result, expected)
20-
21-
# while we're here, check that Series.astype behaves the same
22-
result = Series(idx).astype(str)
23-
expected = Series(expected, dtype="str")
24-
tm.assert_series_equal(result, expected)
257

268

279
def test_astype_invalid_nas_to_tdt64_raises():

pandas/tests/indexes/object/test_indexing.py

+10-73
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,8 @@
33
import numpy as np
44
import pytest
55

6-
from pandas._libs.missing import (
7-
NA,
8-
is_matching_na,
9-
)
6+
from pandas._libs.missing import is_matching_na
107

11-
import pandas as pd
128
from pandas import Index
139
import pandas._testing as tm
1410

@@ -22,13 +18,14 @@ class TestGetIndexer:
2218
],
2319
)
2420
def test_get_indexer_strings(self, method, expected):
25-
index = Index(["b", "c"])
21+
expected = np.array(expected, dtype=np.intp)
22+
index = Index(["b", "c"], dtype=object)
2623
actual = index.get_indexer(["a", "b", "c", "d"], method=method)
2724

2825
tm.assert_numpy_array_equal(actual, expected)
2926

30-
def test_get_indexer_strings_raises(self, using_infer_string):
31-
index = Index(["b", "c"])
27+
def test_get_indexer_strings_raises(self):
28+
index = Index(["b", "c"], dtype=object)
3229

3330
msg = "|".join(
3431
[
@@ -67,13 +64,9 @@ def test_get_indexer_with_NA_values(
6764

6865

6966
class TestGetIndexerNonUnique:
70-
def test_get_indexer_non_unique_nas(
71-
self, nulls_fixture, request, using_infer_string
72-
):
67+
def test_get_indexer_non_unique_nas(self, nulls_fixture):
7368
# even though this isn't non-unique, this should still work
74-
if using_infer_string and (nulls_fixture is None or nulls_fixture is NA):
75-
request.applymarker(pytest.mark.xfail(reason="NAs are cast to NaN"))
76-
index = Index(["a", "b", nulls_fixture])
69+
index = Index(["a", "b", nulls_fixture], dtype=object)
7770
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
7871

7972
expected_indexer = np.array([2], dtype=np.intp)
@@ -82,7 +75,7 @@ def test_get_indexer_non_unique_nas(
8275
tm.assert_numpy_array_equal(missing, expected_missing)
8376

8477
# actually non-unique
85-
index = Index(["a", nulls_fixture, "b", nulls_fixture])
78+
index = Index(["a", nulls_fixture, "b", nulls_fixture], dtype=object)
8679
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
8780

8881
expected_indexer = np.array([1, 3], dtype=np.intp)
@@ -91,10 +84,10 @@ def test_get_indexer_non_unique_nas(
9184

9285
# matching-but-not-identical nans
9386
if is_matching_na(nulls_fixture, float("NaN")):
94-
index = Index(["a", float("NaN"), "b", float("NaN")])
87+
index = Index(["a", float("NaN"), "b", float("NaN")], dtype=object)
9588
match_but_not_identical = True
9689
elif is_matching_na(nulls_fixture, Decimal("NaN")):
97-
index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")])
90+
index = Index(["a", Decimal("NaN"), "b", Decimal("NaN")], dtype=object)
9891
match_but_not_identical = True
9992
else:
10093
match_but_not_identical = False
@@ -155,59 +148,3 @@ def test_get_indexer_non_unique_np_nats(self, np_nat_fixture, np_nat_fixture2):
155148
expected_indexer = np.array([1, 3], dtype=np.intp)
156149
tm.assert_numpy_array_equal(indexer, expected_indexer)
157150
tm.assert_numpy_array_equal(missing, expected_missing)
158-
159-
160-
class TestSliceLocs:
161-
@pytest.mark.parametrize(
162-
"in_slice,expected",
163-
[
164-
# error: Slice index must be an integer or None
165-
(pd.IndexSlice[::-1], "yxdcb"),
166-
(pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc]
167-
(pd.IndexSlice["b"::-1], "b"), # type: ignore[misc]
168-
(pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc]
169-
(pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc]
170-
(pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc]
171-
(pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc]
172-
# absent labels
173-
(pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc]
174-
(pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc]
175-
(pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc]
176-
(pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc]
177-
(pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc]
178-
(pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc]
179-
(pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc]
180-
(pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc]
181-
(pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc]
182-
],
183-
)
184-
def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype):
185-
index = Index(list("bcdxy"), dtype=any_string_dtype)
186-
187-
s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
188-
result = index[s_start : s_stop : in_slice.step]
189-
expected = Index(list(expected), dtype=any_string_dtype)
190-
tm.assert_index_equal(result, expected)
191-
192-
def test_slice_locs_negative_step_oob(self, any_string_dtype):
193-
index = Index(list("bcdxy"), dtype=any_string_dtype)
194-
195-
result = index[-10:5:1]
196-
tm.assert_index_equal(result, index)
197-
198-
result = index[4:-10:-1]
199-
expected = Index(list("yxdcb"), dtype=any_string_dtype)
200-
tm.assert_index_equal(result, expected)
201-
202-
def test_slice_locs_dup(self):
203-
index = Index(["a", "a", "b", "c", "d", "d"])
204-
assert index.slice_locs("a", "d") == (0, 6)
205-
assert index.slice_locs(end="d") == (0, 6)
206-
assert index.slice_locs("a", "c") == (0, 4)
207-
assert index.slice_locs("b", "d") == (2, 6)
208-
209-
index2 = index[::-1]
210-
assert index2.slice_locs("d", "a") == (0, 6)
211-
assert index2.slice_locs(end="a") == (0, 6)
212-
assert index2.slice_locs("d", "b") == (0, 4)
213-
assert index2.slice_locs("c", "a") == (2, 6)

pandas/tests/indexes/string/__init__.py

Whitespace-only changes.
+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
from pandas import (
2+
Index,
3+
Series,
4+
)
5+
import pandas._testing as tm
6+
7+
8+
def test_astype_str_from_bytes():
9+
# https://github.com/pandas-dev/pandas/issues/38607
10+
# GH#49658 pre-2.0 Index called .values.astype(str) here, which effectively
11+
# did a .decode() on the bytes object. In 2.0 we go through
12+
# ensure_string_array which does f"{val}"
13+
idx = Index(["あ", b"a"], dtype="object")
14+
result = idx.astype(str)
15+
expected = Index(["あ", "a"], dtype="str")
16+
tm.assert_index_equal(result, expected)
17+
18+
# while we're here, check that Series.astype behaves the same
19+
result = Series(idx).astype(str)
20+
expected = Series(expected, dtype="str")
21+
tm.assert_series_equal(result, expected)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
import numpy as np
2+
import pytest
3+
4+
import pandas as pd
5+
from pandas import Index
6+
import pandas._testing as tm
7+
8+
9+
class TestGetIndexer:
10+
@pytest.mark.parametrize(
11+
"method,expected",
12+
[
13+
("pad", [-1, 0, 1, 1]),
14+
("backfill", [0, 0, 1, -1]),
15+
],
16+
)
17+
def test_get_indexer_strings(self, any_string_dtype, method, expected):
18+
expected = np.array(expected, dtype=np.intp)
19+
index = Index(["b", "c"], dtype=any_string_dtype)
20+
actual = index.get_indexer(["a", "b", "c", "d"], method=method)
21+
22+
tm.assert_numpy_array_equal(actual, expected)
23+
24+
def test_get_indexer_strings_raises(self, any_string_dtype):
25+
index = Index(["b", "c"], dtype=any_string_dtype)
26+
27+
msg = "|".join(
28+
[
29+
"operation 'sub' not supported for dtype 'str",
30+
r"unsupported operand type\(s\) for -: 'str' and 'str'",
31+
]
32+
)
33+
with pytest.raises(TypeError, match=msg):
34+
index.get_indexer(["a", "b", "c", "d"], method="nearest")
35+
36+
with pytest.raises(TypeError, match=msg):
37+
index.get_indexer(["a", "b", "c", "d"], method="pad", tolerance=2)
38+
39+
with pytest.raises(TypeError, match=msg):
40+
index.get_indexer(
41+
["a", "b", "c", "d"], method="pad", tolerance=[2, 2, 2, 2]
42+
)
43+
44+
45+
class TestGetIndexerNonUnique:
46+
@pytest.mark.xfail(reason="TODO(infer_string)", strict=False)
47+
def test_get_indexer_non_unique_nas(self, any_string_dtype, nulls_fixture):
48+
index = Index(["a", "b", None], dtype=any_string_dtype)
49+
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
50+
51+
expected_indexer = np.array([2], dtype=np.intp)
52+
expected_missing = np.array([], dtype=np.intp)
53+
tm.assert_numpy_array_equal(indexer, expected_indexer)
54+
tm.assert_numpy_array_equal(missing, expected_missing)
55+
56+
# actually non-unique
57+
index = Index(["a", None, "b", None], dtype=any_string_dtype)
58+
indexer, missing = index.get_indexer_non_unique([nulls_fixture])
59+
60+
expected_indexer = np.array([1, 3], dtype=np.intp)
61+
tm.assert_numpy_array_equal(indexer, expected_indexer)
62+
tm.assert_numpy_array_equal(missing, expected_missing)
63+
64+
65+
class TestSliceLocs:
66+
@pytest.mark.parametrize(
67+
"in_slice,expected",
68+
[
69+
# error: Slice index must be an integer or None
70+
(pd.IndexSlice[::-1], "yxdcb"),
71+
(pd.IndexSlice["b":"y":-1], ""), # type: ignore[misc]
72+
(pd.IndexSlice["b"::-1], "b"), # type: ignore[misc]
73+
(pd.IndexSlice[:"b":-1], "yxdcb"), # type: ignore[misc]
74+
(pd.IndexSlice[:"y":-1], "y"), # type: ignore[misc]
75+
(pd.IndexSlice["y"::-1], "yxdcb"), # type: ignore[misc]
76+
(pd.IndexSlice["y"::-4], "yb"), # type: ignore[misc]
77+
# absent labels
78+
(pd.IndexSlice[:"a":-1], "yxdcb"), # type: ignore[misc]
79+
(pd.IndexSlice[:"a":-2], "ydb"), # type: ignore[misc]
80+
(pd.IndexSlice["z"::-1], "yxdcb"), # type: ignore[misc]
81+
(pd.IndexSlice["z"::-3], "yc"), # type: ignore[misc]
82+
(pd.IndexSlice["m"::-1], "dcb"), # type: ignore[misc]
83+
(pd.IndexSlice[:"m":-1], "yx"), # type: ignore[misc]
84+
(pd.IndexSlice["a":"a":-1], ""), # type: ignore[misc]
85+
(pd.IndexSlice["z":"z":-1], ""), # type: ignore[misc]
86+
(pd.IndexSlice["m":"m":-1], ""), # type: ignore[misc]
87+
],
88+
)
89+
def test_slice_locs_negative_step(self, in_slice, expected, any_string_dtype):
90+
index = Index(list("bcdxy"), dtype=any_string_dtype)
91+
92+
s_start, s_stop = index.slice_locs(in_slice.start, in_slice.stop, in_slice.step)
93+
result = index[s_start : s_stop : in_slice.step]
94+
expected = Index(list(expected), dtype=any_string_dtype)
95+
tm.assert_index_equal(result, expected)
96+
97+
def test_slice_locs_negative_step_oob(self, any_string_dtype):
98+
index = Index(list("bcdxy"), dtype=any_string_dtype)
99+
100+
result = index[-10:5:1]
101+
tm.assert_index_equal(result, index)
102+
103+
result = index[4:-10:-1]
104+
expected = Index(list("yxdcb"), dtype=any_string_dtype)
105+
tm.assert_index_equal(result, expected)
106+
107+
def test_slice_locs_dup(self, any_string_dtype):
108+
index = Index(["a", "a", "b", "c", "d", "d"], dtype=any_string_dtype)
109+
assert index.slice_locs("a", "d") == (0, 6)
110+
assert index.slice_locs(end="d") == (0, 6)
111+
assert index.slice_locs("a", "c") == (0, 4)
112+
assert index.slice_locs("b", "d") == (2, 6)
113+
114+
index2 = index[::-1]
115+
assert index2.slice_locs("d", "a") == (0, 6)
116+
assert index2.slice_locs(end="a") == (0, 6)
117+
assert index2.slice_locs("d", "b") == (0, 4)
118+
assert index2.slice_locs("c", "a") == (2, 6)

0 commit comments

Comments
 (0)