Skip to content

Commit

Permalink
PERF: short-circuit allocations in infer_dtype, ensure_datetime64ns (p…
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel authored Dec 22, 2021
1 parent 4735c0f commit 50b4df3
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 24 deletions.
19 changes: 11 additions & 8 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1447,25 +1447,28 @@ def infer_dtype(value: object, skipna: bool = True) -> str:
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
values = construct_1d_object_array_from_listlike(value)

# make contiguous
# for f-contiguous array 1000 x 1000, passing order="K" gives 5000x speedup
values = values.ravel(order="K")

val = _try_infer_map(values.dtype)
if val is not None:
# Anything other than object-dtype should return here.
return val

if values.dtype != np.object_:
values = values.astype("O")
if values.descr.type_num != NPY_OBJECT:
# i.e. values.dtype != np.object
# This should not be reached
values = values.astype(object)

# for f-contiguous array 1000 x 1000, passing order="K" gives 5000x speedup
values = values.ravel(order="K")

if skipna:
values = values[~isnaobj(values)]

n = len(values)
n = cnp.PyArray_SIZE(values)
if n == 0:
return "empty"

# try to use a valid value
# Iterate until we find our first valid value. We will use this
# value to decide which of the is_foo_array functions to call.
for i in range(n):
val = values[i]

Expand Down
33 changes: 17 additions & 16 deletions pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -227,12 +227,7 @@ def ensure_datetime64ns(arr: ndarray, copy: bool = True):
dtype = arr.dtype
arr = arr.astype(dtype.newbyteorder("<"))

ivalues = arr.view(np.int64).ravel("K")

result = np.empty_like(arr, dtype=DT64NS_DTYPE)
iresult = result.ravel("K").view(np.int64)

if len(iresult) == 0:
if arr.size == 0:
result = arr.view(DT64NS_DTYPE)
if copy:
result = result.copy()
Expand All @@ -245,17 +240,23 @@ def ensure_datetime64ns(arr: ndarray, copy: bool = True):
raise ValueError("datetime64/timedelta64 must have a unit specified")

if unit == NPY_FR_ns:
# Check this before allocating result for perf, might save some memory
if copy:
arr = arr.copy()
result = arr
else:
for i in range(n):
if ivalues[i] != NPY_NAT:
pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts)
iresult[i] = dtstruct_to_dt64(&dts)
check_dts_bounds(&dts)
else:
iresult[i] = NPY_NAT
return arr.copy()
return arr

ivalues = arr.view(np.int64).ravel("K")

result = np.empty_like(arr, dtype=DT64NS_DTYPE)
iresult = result.ravel("K").view(np.int64)

for i in range(n):
if ivalues[i] != NPY_NAT:
pandas_datetime_to_datetimestruct(ivalues[i], unit, &dts)
iresult[i] = dtstruct_to_dt64(&dts)
check_dts_bounds(&dts)
else:
iresult[i] = NPY_NAT

return result

Expand Down

0 comments on commit 50b4df3

Please sign in to comment.