Skip to content

Commit bb7e65c

Browse files
jbrockmendeljorisvandenbossche
authored andcommitted
API (string): str.center with pyarrow-backed string dtype (#59624)
1 parent 205e637 commit bb7e65c

File tree

4 files changed

+22
-9
lines changed

4 files changed

+22
-9
lines changed

doc/source/whatsnew/v2.3.0.rst

+2-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,8 @@ Conversion
103103
Strings
104104
^^^^^^^
105105
- Bug in :meth:`Series.str.replace` when ``n < 0`` for :class:`StringDtype` with ``storage="pyarrow"`` (:issue:`59628`)
106-
-
106+
- Bug in the ``center`` method on :class:`Series` and :class:`Index` object ``str`` accessors with pyarrow-backed dtype not matching the python behavior in corner cases with an odd number of fill characters (:issue:`54792`)
107+
107108

108109
Interval
109110
^^^^^^^^

pandas/core/arrays/_arrow_string_mixins.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,17 @@
11
from __future__ import annotations
22

3+
from functools import partial
34
from typing import (
45
TYPE_CHECKING,
56
Literal,
67
)
78

89
import numpy as np
910

10-
from pandas.compat import pa_version_under10p1
11+
from pandas.compat import (
12+
pa_version_under10p1,
13+
pa_version_under17p0,
14+
)
1115

1216
from pandas.core.dtypes.missing import isna
1317

@@ -46,7 +50,19 @@ def _str_pad(
4650
elif side == "right":
4751
pa_pad = pc.utf8_rpad
4852
elif side == "both":
49-
pa_pad = pc.utf8_center
53+
if pa_version_under17p0:
54+
# GH#59624 fall back to object dtype
55+
from pandas import array
56+
57+
obj_arr = self.astype(object, copy=False) # type: ignore[attr-defined]
58+
obj = array(obj_arr, dtype=object)
59+
result = obj._str_pad(width, side, fillchar) # type: ignore[attr-defined]
60+
return type(self)._from_sequence(result, dtype=self.dtype) # type: ignore[attr-defined]
61+
else:
62+
# GH#54792
63+
# https://github.com/apache/arrow/issues/15053#issuecomment-2317032347
64+
lean_left = (width % 2) == 0
65+
pa_pad = partial(pc.utf8_center, lean_left_on_odd_padding=lean_left)
5066
else:
5167
raise ValueError(
5268
f"Invalid side: {side}. Side must be one of 'left', 'right', 'both'"

pandas/core/arrays/string_arrow.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,7 @@ def _data(self):
286286
_str_map = BaseStringArray._str_map
287287
_str_startswith = ArrowStringArrayMixin._str_startswith
288288
_str_endswith = ArrowStringArrayMixin._str_endswith
289+
_str_pad = ArrowStringArrayMixin._str_pad
289290

290291
def _str_contains(
291292
self, pat, case: bool = True, flags: int = 0, na=np.nan, regex: bool = True
@@ -546,7 +547,6 @@ class ArrowStringArrayNumpySemantics(ArrowStringArray):
546547
_str_get = ArrowStringArrayMixin._str_get
547548
_str_removesuffix = ArrowStringArrayMixin._str_removesuffix
548549
_str_capitalize = ArrowStringArrayMixin._str_capitalize
549-
_str_pad = ArrowStringArrayMixin._str_pad
550550
_str_title = ArrowStringArrayMixin._str_title
551551
_str_swapcase = ArrowStringArrayMixin._str_swapcase
552552
_str_slice_replace = ArrowStringArrayMixin._str_slice_replace

pandas/tests/strings/test_case_justify.py

+1-5
Original file line numberDiff line numberDiff line change
@@ -291,11 +291,7 @@ def test_center_ljust_rjust_mixed_object():
291291

292292

293293
def test_center_ljust_rjust_fillchar(any_string_dtype):
294-
if any_string_dtype == "string[pyarrow_numpy]":
295-
pytest.skip(
296-
"Arrow logic is different, "
297-
"see https://github.com/pandas-dev/pandas/pull/54533/files#r1299808126",
298-
)
294+
# GH#54533, GH#54792
299295
s = Series(["a", "bb", "cccc", "ddddd", "eeeeee"], dtype=any_string_dtype)
300296

301297
result = s.str.center(5, fillchar="X")

0 commit comments

Comments
 (0)