Skip to content

Commit

Permalink
BUG: fix parquet roundtrip for Interval dtype with datetime64[ns] sub…
Browse files Browse the repository at this point in the history
  • Loading branch information
jorisvandenbossche authored Feb 27, 2022
1 parent 696a8e9 commit b8c2e82
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 7 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,7 @@ I/O
- Bug in :func:`read_csv` not respecting a specified converter to index columns in all cases (:issue:`40589`)
- Bug in :func:`read_parquet` when ``engine="pyarrow"`` which caused partial write to disk when column of unsupported datatype was passed (:issue:`44914`)
- Bug in :func:`DataFrame.to_excel` and :class:`ExcelWriter` would raise when writing an empty DataFrame to a ``.ods`` file (:issue:`45793`)
- Bug in Parquet roundtrip for Interval dtype with ``datetime64[ns]`` subtype (:issue:`45881`)

Period
^^^^^^
Expand Down
10 changes: 6 additions & 4 deletions pandas/core/dtypes/dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1253,16 +1253,18 @@ def __from_arrow__(

results = []
for arr in chunks:
left = np.asarray(arr.storage.field("left"), dtype=self.subtype)
right = np.asarray(arr.storage.field("right"), dtype=self.subtype)
iarr = IntervalArray.from_arrays(left, right, closed=array.type.closed)
if isinstance(arr, pyarrow.ExtensionArray):
arr = arr.storage
left = np.asarray(arr.field("left"), dtype=self.subtype)
right = np.asarray(arr.field("right"), dtype=self.subtype)
iarr = IntervalArray.from_arrays(left, right, closed=self.closed)
results.append(iarr)

if not results:
return IntervalArray.from_arrays(
np.array([], dtype=self.subtype),
np.array([], dtype=self.subtype),
closed=array.type.closed,
closed=self.closed,
)
return IntervalArray._concat_same_type(results)

Expand Down
20 changes: 20 additions & 0 deletions pandas/tests/arrays/interval/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,3 +376,23 @@ def test_arrow_table_roundtrip_without_metadata(breaks):
result = table.to_pandas()
assert isinstance(result["a"].dtype, pd.IntervalDtype)
tm.assert_frame_equal(result, df)


@pyarrow_skip
def test_from_arrow_from_raw_struct_array():
# in case pyarrow lost the Interval extension type (eg on parquet roundtrip
# with datetime64[ns] subtype, see GH-45881), still allow conversion
# from arrow to IntervalArray
import pyarrow as pa

arr = pa.array([{"left": 0, "right": 1}, {"left": 1, "right": 2}])
dtype = pd.IntervalDtype(np.dtype("int64"), closed="neither")

result = dtype.__from_arrow__(arr)
expected = IntervalArray.from_breaks(
np.array([0, 1, 2], dtype="int64"), closed="neither"
)
tm.assert_extension_array_equal(result, expected)

result = dtype.__from_arrow__(pa.chunked_array([arr]))
tm.assert_extension_array_equal(result, expected)
9 changes: 6 additions & 3 deletions pandas/tests/io/test_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -928,15 +928,18 @@ def test_pyarrow_backed_string_array(self, pa, string_storage):
with pd.option_context("string_storage", string_storage):
check_round_trip(df, pa, expected=df.astype(f"string[{string_storage}]"))

@td.skip_if_no("pyarrow")
@td.skip_if_no("pyarrow", min_version="2.0.0")
def test_additional_extension_types(self, pa):
# test additional ExtensionArrays that are supported through the
# __arrow_array__ protocol + by defining a custom ExtensionType
df = pd.DataFrame(
{
# Arrow does not yet support struct in writing to Parquet (ARROW-1644)
# "c": pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2), (3, 4)]),
"c": pd.IntervalIndex.from_tuples([(0, 1), (1, 2), (3, 4)]),
"d": pd.period_range("2012-01-01", periods=3, freq="D"),
# GH-45881 issue with interval with datetime64[ns] subtype
"e": pd.IntervalIndex.from_breaks(
pd.date_range("2012-01-01", periods=4, freq="D")
),
}
)
check_round_trip(df, pa)
Expand Down

0 comments on commit b8c2e82

Please sign in to comment.