diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index 01c2ed3821d7a..c5a76a243cb2e 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -54,7 +54,7 @@ notable_bug_fix1 Deprecations ~~~~~~~~~~~~ - Deprecated allowing non-``bool`` values for ``na`` in :meth:`.str.contains`, :meth:`.str.startswith`, and :meth:`.str.endswith` for dtypes that do not already disallow these (:issue:`59615`) -- +- Deprecated the ``"pyarrow_numpy"`` storage option for :class:`StringDtype` (:issue:`60152`) .. --------------------------------------------------------------------------- .. _whatsnew_230.performance: diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index f20c4c8625475..d7e6fbf4fcbcd 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -7,6 +7,7 @@ Literal, cast, ) +import warnings import numpy as np @@ -27,6 +28,7 @@ ) from pandas.compat.numpy import function as nv from pandas.util._decorators import doc +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.base import ( ExtensionDtype, @@ -154,7 +156,16 @@ def __init__( storage = "python" if storage == "pyarrow_numpy": - # TODO raise a deprecation warning + warnings.warn( + "The 'pyarrow_numpy' storage option name is deprecated and will be " + 'removed in pandas 3.0. Use \'pd.StringDtype(storage="pyarrow", ' + "na_value-np.nan)' to construct the same dtype.\nOr enable the " + "'pd.options.future.infer_string = True' option globally and use " + 'the "str" alias as a shorthand notation to specify a dtype ' + '(instead of "string[pyarrow_numpy]").', + FutureWarning, + stacklevel=find_stack_level(), + ) storage = "pyarrow" na_value = np.nan @@ -254,7 +265,7 @@ def construct_from_string(cls, string) -> Self: elif string == "string[pyarrow]": return cls(storage="pyarrow") elif string == "string[pyarrow_numpy]": - # TODO deprecate + # this is deprecated in the dtype __init__, remove this in pandas 3.0 return cls(storage="pyarrow_numpy") else: raise TypeError(f"Cannot construct a '{cls.__name__}' from '{string}'") diff --git a/pandas/tests/arrays/string_/test_string.py b/pandas/tests/arrays/string_/test_string.py index 33708be497f31..8de5407a187c9 100644 --- a/pandas/tests/arrays/string_/test_string.py +++ b/pandas/tests/arrays/string_/test_string.py @@ -42,6 +42,14 @@ def cls(dtype): return dtype.construct_array_type() +def test_dtype_constructor(): + pytest.importorskip("pyarrow") + + with tm.assert_produces_warning(FutureWarning): + dtype = pd.StringDtype("pyarrow_numpy") + assert dtype == pd.StringDtype("pyarrow", na_value=np.nan) + + def test_dtype_equality(): pytest.importorskip("pyarrow") diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index 509ae653e4793..d80205d2eb399 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -105,8 +105,8 @@ def test_eq_with_str(self, dtype): # only the NA-variant supports parametrized string alias assert dtype == f"string[{dtype.storage}]" elif dtype.storage == "pyarrow": - # TODO(infer_string) deprecate this - assert dtype == "string[pyarrow_numpy]" + with tm.assert_produces_warning(FutureWarning): + assert dtype == "string[pyarrow_numpy]" def test_is_not_string_type(self, dtype): # Different from BaseDtypeTests.test_is_not_string_type