Skip to content

Commit

Permalink
BUG: Add fix for hashing timestamps with folds (pandas-dev#44282)
Browse files Browse the repository at this point in the history
  • Loading branch information
jonwiggins authored Jan 4, 2022
1 parent 8ae1b4c commit 490b189
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 0 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.4.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,7 @@ Datetimelike
- Bug in :class:`DateOffset`` addition with :class:`Timestamp` where ``offset.nanoseconds`` would not be included in the result (:issue:`43968`, :issue:`36589`)
- Bug in :meth:`Timestamp.fromtimestamp` not supporting the ``tz`` argument (:issue:`45083`)
- Bug in :class:`DataFrame` construction from dict of :class:`Series` with mismatched index dtypes sometimes raising depending on the ordering of the passed dict (:issue:`44091`)
- Bug in :class:`Timestamp` hashing during some DST transitions caused a segmentation fault (:issue:`33931` and :issue:`40817`)
-

Timedelta
Expand Down
2 changes: 2 additions & 0 deletions pandas/_libs/tslibs/timestamps.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,8 @@ cdef class _Timestamp(ABCTimestamp):
def __hash__(_Timestamp self):
if self.nanosecond:
return hash(self.value)
if self.fold:
return datetime.__hash__(self.replace(fold=0))
return datetime.__hash__(self)

def __richcmp__(_Timestamp self, object other, int op):
Expand Down
37 changes: 37 additions & 0 deletions pandas/tests/frame/methods/test_reindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import numpy as np
import pytest

from pandas._libs.tslibs.timezones import dateutil_gettz as gettz

import pandas as pd
from pandas import (
Categorical,
Expand Down Expand Up @@ -78,6 +80,41 @@ def test_setitem_reset_index_dtypes(self):
result = df2.reset_index()
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"timezone, year, month, day, hour",
[["America/Chicago", 2013, 11, 3, 1], ["America/Santiago", 2021, 4, 3, 23]],
)
def test_reindex_timestamp_with_fold(self, timezone, year, month, day, hour):
# see gh-40817
test_timezone = gettz(timezone)
transition_1 = pd.Timestamp(
year=year,
month=month,
day=day,
hour=hour,
minute=0,
fold=0,
tzinfo=test_timezone,
)
transition_2 = pd.Timestamp(
year=year,
month=month,
day=day,
hour=hour,
minute=0,
fold=1,
tzinfo=test_timezone,
)
df = (
DataFrame({"index": [transition_1, transition_2], "vals": ["a", "b"]})
.set_index("index")
.reindex(["1", "2"])
)
tm.assert_frame_equal(
df,
DataFrame({"index": ["1", "2"], "vals": [None, None]}).set_index("index"),
)


class TestDataFrameSelectReindex:
# These are specific reindex-based tests; other indexing tests should go in
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/scalar/timestamp/test_timestamp.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,33 @@ def test_hash_equivalent(self):
stamp = Timestamp(datetime(2011, 1, 1))
assert d[stamp] == 5

@pytest.mark.parametrize(
"timezone, year, month, day, hour",
[["America/Chicago", 2013, 11, 3, 1], ["America/Santiago", 2021, 4, 3, 23]],
)
def test_hash_timestamp_with_fold(self, timezone, year, month, day, hour):
# see gh-33931
test_timezone = gettz(timezone)
transition_1 = Timestamp(
year=year,
month=month,
day=day,
hour=hour,
minute=0,
fold=0,
tzinfo=test_timezone,
)
transition_2 = Timestamp(
year=year,
month=month,
day=day,
hour=hour,
minute=0,
fold=1,
tzinfo=test_timezone,
)
assert hash(transition_1) == hash(transition_2)

def test_tz_conversion_freq(self, tz_naive_fixture):
# GH25241
with tm.assert_produces_warning(FutureWarning, match="freq"):
Expand Down

0 comments on commit 490b189

Please sign in to comment.