diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 38fb0188df5ff..5e82853109015 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -33,6 +33,7 @@ pa_version_under14p1, pa_version_under16p0, pa_version_under17p0, + pa_version_under18p0, ) if TYPE_CHECKING: @@ -191,6 +192,7 @@ def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]: "pa_version_under14p1", "pa_version_under16p0", "pa_version_under17p0", + "pa_version_under18p0", "HAS_PYARROW", "IS64", "ISMUSL", diff --git a/pandas/compat/pyarrow.py b/pandas/compat/pyarrow.py index 7fa197c4a9824..f579b8a45d386 100644 --- a/pandas/compat/pyarrow.py +++ b/pandas/compat/pyarrow.py @@ -17,6 +17,7 @@ pa_version_under15p0 = _palv < Version("15.0.0") pa_version_under16p0 = _palv < Version("16.0.0") pa_version_under17p0 = _palv < Version("17.0.0") + pa_version_under18p0 = _palv < Version("18.0.0") HAS_PYARROW = True except ImportError: pa_version_under10p1 = True @@ -28,4 +29,5 @@ pa_version_under15p0 = True pa_version_under16p0 = True pa_version_under17p0 = True + pa_version_under18p0 = False HAS_PYARROW = False diff --git a/pandas/io/_util.py b/pandas/io/_util.py index 50a97f1059b5c..f3e6dba1391be 100644 --- a/pandas/io/_util.py +++ b/pandas/io/_util.py @@ -4,6 +4,7 @@ import numpy as np +from pandas.compat import pa_version_under18p0 from pandas.compat._optional import import_optional_dependency import pandas as pd @@ -32,7 +33,11 @@ def _arrow_dtype_mapping() -> dict: def arrow_string_types_mapper() -> Callable: pa = import_optional_dependency("pyarrow") - return { + mapping = { pa.string(): pd.StringDtype(na_value=np.nan), pa.large_string(): pd.StringDtype(na_value=np.nan), - }.get + } + if not pa_version_under18p0: + mapping[pa.string_view()] = pd.StringDtype(na_value=np.nan) + + return mapping.get diff --git a/pandas/tests/io/test_feather.py b/pandas/tests/io/test_feather.py index 3b4484e44e155..58a5f78ce3258 100644 --- a/pandas/tests/io/test_feather.py +++ b/pandas/tests/io/test_feather.py @@ -2,6 +2,8 @@ import numpy as np import pytest +from pandas.compat.pyarrow import pa_version_under18p0 + import pandas as pd import pandas._testing as tm @@ -250,3 +252,21 @@ def test_string_inference(self, tmp_path): data={"a": ["x", "y"]}, dtype=pd.StringDtype(na_value=np.nan) ) tm.assert_frame_equal(result, expected) + + @pytest.mark.skipif(pa_version_under18p0, reason="not supported before 18.0") + def test_string_inference_string_view_type(self, tmp_path): + # GH#54798 + import pyarrow as pa + from pyarrow import feather + + path = tmp_path / "string_view.parquet" + table = pa.table({"a": pa.array([None, "b", "c"], pa.string_view())}) + feather.write_feather(table, path) + + with pd.option_context("future.infer_string", True): + result = read_feather(path) + + expected = pd.DataFrame( + data={"a": [None, "b", "c"]}, dtype=pd.StringDtype(na_value=np.nan) + ) + tm.assert_frame_equal(result, expected)