diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 2072dda2965d7..d89a1e012d5f8 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -364,6 +364,7 @@ I/O - Bug in :func:`read_csv` not respecting a specified converter to index columns in all cases (:issue:`40589`) - Bug in :func:`read_parquet` when ``engine="pyarrow"`` which caused partial write to disk when column of unsupported datatype was passed (:issue:`44914`) - Bug in :func:`DataFrame.to_excel` and :class:`ExcelWriter` would raise when writing an empty DataFrame to a ``.ods`` file (:issue:`45793`) +- Bug in Parquet roundtrip for Interval dtype with ``datetime64[ns]`` subtype (:issue:`45881`) Period ^^^^^^ diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 322da6a1e4ece..072caa6a4b74f 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -1253,16 +1253,18 @@ def __from_arrow__( results = [] for arr in chunks: - left = np.asarray(arr.storage.field("left"), dtype=self.subtype) - right = np.asarray(arr.storage.field("right"), dtype=self.subtype) - iarr = IntervalArray.from_arrays(left, right, closed=array.type.closed) + if isinstance(arr, pyarrow.ExtensionArray): + arr = arr.storage + left = np.asarray(arr.field("left"), dtype=self.subtype) + right = np.asarray(arr.field("right"), dtype=self.subtype) + iarr = IntervalArray.from_arrays(left, right, closed=self.closed) results.append(iarr) if not results: return IntervalArray.from_arrays( np.array([], dtype=self.subtype), np.array([], dtype=self.subtype), - closed=array.type.closed, + closed=self.closed, ) return IntervalArray._concat_same_type(results) diff --git a/pandas/tests/arrays/interval/test_interval.py b/pandas/tests/arrays/interval/test_interval.py index 5ef69106f0278..2b5712e76e8cc 100644 --- a/pandas/tests/arrays/interval/test_interval.py +++ b/pandas/tests/arrays/interval/test_interval.py @@ -376,3 +376,23 @@ def test_arrow_table_roundtrip_without_metadata(breaks): result = table.to_pandas() assert isinstance(result["a"].dtype, pd.IntervalDtype) tm.assert_frame_equal(result, df) + + +@pyarrow_skip +def test_from_arrow_from_raw_struct_array(): + # in case pyarrow lost the Interval extension type (eg on parquet roundtrip + # with datetime64[ns] subtype, see GH-45881), still allow conversion + # from arrow to IntervalArray + import pyarrow as pa + + arr = pa.array([{"left": 0, "right": 1}, {"left": 1, "right": 2}]) + dtype = pd.IntervalDtype(np.dtype("int64"), closed="neither") + + result = dtype.__from_arrow__(arr) + expected = IntervalArray.from_breaks( + np.array([0, 1, 2], dtype="int64"), closed="neither" + ) + tm.assert_extension_array_equal(result, expected) + + result = dtype.__from_arrow__(pa.chunked_array([arr])) + tm.assert_extension_array_equal(result, expected) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 85f1cbf8c7707..3001922e95a54 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -928,15 +928,18 @@ def test_pyarrow_backed_string_array(self, pa, string_storage): with pd.option_context("string_storage", string_storage): check_round_trip(df, pa, expected=df.astype(f"string[{string_storage}]")) - @td.skip_if_no("pyarrow") + @td.skip_if_no("pyarrow", min_version="2.0.0") def test_additional_extension_types(self, pa): # test additional ExtensionArrays that are supported through the # __arrow_array__ protocol + by defining a custom ExtensionType df = pd.DataFrame( { - # Arrow does not yet support struct in writing to Parquet (ARROW-1644) - # "c": pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2), (3, 4)]), + "c": pd.IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)]), "d": pd.period_range("2012-01-01", periods=3, freq="D"), + # GH-45881 issue with interval with datetime64[ns] subtype + "e": pd.IntervalIndex.from_breaks( + pd.date_range("2012-01-01", periods=4, freq="D") + ), } ) check_round_trip(df, pa)