Skip to content

Commit

Permalink
feat: Add dt.replace (#19708)
Browse files Browse the repository at this point in the history
  • Loading branch information
mcrumiller authored Dec 20, 2024
1 parent d68689f commit 8757ad0
Show file tree
Hide file tree
Showing 14 changed files with 823 additions and 64 deletions.
30 changes: 30 additions & 0 deletions crates/polars-plan/src/dsl/dt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -331,4 +331,34 @@ impl DateLikeNameSpace {
TemporalFunction::TotalNanoseconds,
))
}

/// Replace the time units of a value
#[allow(clippy::too_many_arguments)]
pub fn replace(
self,
year: Expr,
month: Expr,
day: Expr,
hour: Expr,
minute: Expr,
second: Expr,
microsecond: Expr,
ambiguous: Expr,
) -> Expr {
self.0.map_many_private(
FunctionExpr::TemporalExpr(TemporalFunction::Replace),
&[
year,
month,
day,
hour,
minute,
second,
microsecond,
ambiguous,
],
false,
None,
)
}
}
48 changes: 48 additions & 0 deletions crates/polars-plan/src/dsl/function_expr/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ use polars_time::base_utc_offset as base_utc_offset_fn;
use polars_time::dst_offset as dst_offset_fn;
#[cfg(feature = "offset_by")]
use polars_time::impl_offset_by;
#[cfg(any(feature = "dtype-date", feature = "dtype-datetime"))]
use polars_time::replace::{replace_date, replace_datetime};
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

Expand Down Expand Up @@ -62,6 +64,7 @@ pub enum TemporalFunction {
#[cfg(feature = "timezones")]
DSTOffset,
Round,
Replace,
#[cfg(feature = "timezones")]
ReplaceTimeZone(Option<TimeZone>, NonExistent),
Combine(TimeUnit),
Expand Down Expand Up @@ -117,6 +120,7 @@ impl TemporalFunction {
#[cfg(feature = "timezones")]
DSTOffset => mapper.with_dtype(DataType::Duration(TimeUnit::Milliseconds)),
Round => mapper.with_same_dtype(),
Replace => mapper.with_same_dtype(),
#[cfg(feature = "timezones")]
ReplaceTimeZone(tz, _non_existent) => mapper.map_datetime_dtype_timezone(tz.as_ref()),
DatetimeFunction {
Expand Down Expand Up @@ -187,6 +191,7 @@ impl Display for TemporalFunction {
#[cfg(feature = "timezones")]
DSTOffset => "dst_offset",
Round => "round",
Replace => "replace",
#[cfg(feature = "timezones")]
ReplaceTimeZone(_, _) => "replace_time_zone",
DatetimeFunction { .. } => return write!(f, "dt.datetime"),
Expand Down Expand Up @@ -555,3 +560,46 @@ pub(super) fn round(s: &[Column]) -> PolarsResult<Column> {
dt => polars_bail!(opq = round, got = dt, expected = "date/datetime"),
})
}

pub(super) fn replace(s: &[Column]) -> PolarsResult<Column> {
let time_series = &s[0];
let s_year = &s[1].strict_cast(&DataType::Int32)?;
let s_month = &s[2].strict_cast(&DataType::Int8)?;
let s_day = &s[3].strict_cast(&DataType::Int8)?;
let year = s_year.i32()?;
let month = s_month.i8()?;
let day = s_day.i8()?;

match time_series.dtype() {
DataType::Datetime(_, _) => {
let s_hour = &s[4].strict_cast(&DataType::Int8)?;
let s_minute = &s[5].strict_cast(&DataType::Int8)?;
let s_second = &s[6].strict_cast(&DataType::Int8)?;
let s_microsecond = &s[7].strict_cast(&DataType::Int32)?;
let hour = s_hour.i8()?;
let minute = s_minute.i8()?;
let second = s_second.i8()?;
let nanosecond = &(s_microsecond.i32()? * 1_000);
let s_ambiguous = &s[8].strict_cast(&DataType::String)?;
let ambiguous = s_ambiguous.str()?;

let out = replace_datetime(
time_series.datetime().unwrap(),
year,
month,
day,
hour,
minute,
second,
nanosecond,
ambiguous,
);
out.map(|s| s.into_column())
},
DataType::Date => {
let out = replace_date(time_series.date().unwrap(), year, month, day);
out.map(|s| s.into_column())
},
dt => polars_bail!(opq = round, got = dt, expected = "date/datetime"),
}
}
85 changes: 23 additions & 62 deletions crates/polars-plan/src/dsl/function_expr/temporal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ impl From<TemporalFunction> for SpecialEq<Arc<dyn ColumnsUdf>> {
#[cfg(feature = "timezones")]
DSTOffset => map!(datetime::dst_offset),
Round => map_as_slice!(datetime::round),
Replace => map_as_slice!(datetime::replace),
#[cfg(feature = "timezones")]
ReplaceTimeZone(tz, non_existent) => {
map_as_slice!(dispatch::replace_time_zone, tz.as_deref(), non_existent)
Expand All @@ -73,14 +74,12 @@ impl From<TemporalFunction> for SpecialEq<Arc<dyn ColumnsUdf>> {
}
}

#[cfg(feature = "dtype-datetime")]
pub(super) fn datetime(
s: &[Column],
time_unit: &TimeUnit,
time_zone: Option<&str>,
) -> PolarsResult<Column> {
use polars_core::export::chrono::NaiveDate;
use polars_core::utils::CustomIterTools;

let col_name = PlSmallStr::from_static("datetime");

if s.iter().any(|s| s.is_empty()) {
Expand Down Expand Up @@ -123,91 +122,53 @@ pub(super) fn datetime(
}
let year = year.i32()?;

let mut month = month.cast(&DataType::UInt32)?;
let mut month = month.cast(&DataType::Int8)?;
if month.len() < max_len {
month = month.new_from_index(0, max_len);
}
let month = month.u32()?;
let month = month.i8()?;

let mut day = day.cast(&DataType::UInt32)?;
let mut day = day.cast(&DataType::Int8)?;
if day.len() < max_len {
day = day.new_from_index(0, max_len);
}
let day = day.u32()?;
let day = day.i8()?;

let mut hour = hour.cast(&DataType::UInt32)?;
let mut hour = hour.cast(&DataType::Int8)?;
if hour.len() < max_len {
hour = hour.new_from_index(0, max_len);
}
let hour = hour.u32()?;
let hour = hour.i8()?;

let mut minute = minute.cast(&DataType::UInt32)?;
let mut minute = minute.cast(&DataType::Int8)?;
if minute.len() < max_len {
minute = minute.new_from_index(0, max_len);
}
let minute = minute.u32()?;
let minute = minute.i8()?;

let mut second = second.cast(&DataType::UInt32)?;
let mut second = second.cast(&DataType::Int8)?;
if second.len() < max_len {
second = second.new_from_index(0, max_len);
}
let second = second.u32()?;
let second = second.i8()?;

let mut microsecond = microsecond.cast(&DataType::UInt32)?;
if microsecond.len() < max_len {
microsecond = microsecond.new_from_index(0, max_len);
let mut nanosecond = microsecond.cast(&DataType::Int32)? * 1_000;
if nanosecond.len() < max_len {
nanosecond = nanosecond.new_from_index(0, max_len);
}
let microsecond = microsecond.u32()?;
let nanosecond = nanosecond.i32()?;

let mut _ambiguous = ambiguous.cast(&DataType::String)?;
if _ambiguous.len() < max_len {
_ambiguous = _ambiguous.new_from_index(0, max_len);
}
let _ambiguous = _ambiguous.str()?;

let ca: Int64Chunked = year
.into_iter()
.zip(month)
.zip(day)
.zip(hour)
.zip(minute)
.zip(second)
.zip(microsecond)
.map(|((((((y, m), d), h), mnt), s), us)| {
if let (Some(y), Some(m), Some(d), Some(h), Some(mnt), Some(s), Some(us)) =
(y, m, d, h, mnt, s, us)
{
NaiveDate::from_ymd_opt(y, m, d)
.and_then(|nd| nd.and_hms_micro_opt(h, mnt, s, us))
.map(|ndt| match time_unit {
TimeUnit::Milliseconds => ndt.and_utc().timestamp_millis(),
TimeUnit::Microseconds => ndt.and_utc().timestamp_micros(),
TimeUnit::Nanoseconds => ndt.and_utc().timestamp_nanos_opt().unwrap(),
})
} else {
None
}
})
.collect_trusted();

let ca = match time_zone {
#[cfg(feature = "timezones")]
Some(_) => {
let mut ca = ca.into_datetime(*time_unit, None);
ca = replace_time_zone(&ca, time_zone, _ambiguous, NonExistent::Raise)?;
ca
},
_ => {
assert!(
time_zone.is_none(),
"cannot make use of the `time_zone` argument without the 'timezones' feature enabled."
);
ca.into_datetime(*time_unit, None)
},
};
let ambiguous = _ambiguous.str()?;

let mut s = ca.into_column();
s.rename(col_name);
Ok(s)
let ca = DatetimeChunked::new_from_parts(
year, month, day, hour, minute, second, nanosecond, ambiguous, time_unit, time_zone,
col_name,
);
ca.map(|s| s.into_column())
}

pub(super) fn combine(s: &[Column], tu: TimeUnit) -> PolarsResult<Column> {
Expand Down
27 changes: 27 additions & 0 deletions crates/polars-python/src/expr/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,33 @@ impl PyExpr {
self.inner.clone().dt().round(every.inner).into()
}

fn dt_replace(
&self,
year: Self,
month: Self,
day: Self,
hour: Self,
minute: Self,
second: Self,
microsecond: Self,
ambiguous: Self,
) -> Self {
self.inner
.clone()
.dt()
.replace(
year.inner,
month.inner,
day.inner,
hour.inner,
minute.inner,
second.inner,
microsecond.inner,
ambiguous.inner,
)
.into()
}

fn dt_combine(&self, time: Self, time_unit: Wrap<TimeUnit>) -> Self {
self.inner
.clone()
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-python/src/lazyframe/visitor/expr_nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,7 @@ pub enum PyTemporalFunction {
BaseUtcOffset,
DSTOffset,
Round,
Replace,
ReplaceTimeZone,
Combine,
DatetimeFunction,
Expand Down Expand Up @@ -1024,6 +1025,7 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult<PyObject> {
#[cfg(feature = "timezones")]
TemporalFunction::DSTOffset => (PyTemporalFunction::DSTOffset,).into_py_any(py),
TemporalFunction::Round => (PyTemporalFunction::Round,).into_py_any(py),
TemporalFunction::Replace => (PyTemporalFunction::Replace).into_py_any(py),
#[cfg(feature = "timezones")]
TemporalFunction::ReplaceTimeZone(time_zone, non_existent) => (
PyTemporalFunction::ReplaceTimeZone,
Expand Down
28 changes: 27 additions & 1 deletion crates/polars-time/src/chunkedarray/date.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use arrow::temporal_conversions::{MILLISECONDS, SECONDS_IN_DAY};
use arrow::temporal_conversions::{EPOCH_DAYS_FROM_CE, MILLISECONDS, SECONDS_IN_DAY};
use polars_core::export::chrono::{Datelike, NaiveDate};
use polars_core::utils::CustomIterTools;

use super::*;

Expand Down Expand Up @@ -74,6 +76,30 @@ pub trait DateMethods: AsDate {
}

fn parse_from_str_slice(name: PlSmallStr, v: &[&str], fmt: &str) -> DateChunked;

/// Construct a date ChunkedArray from individual time components.
fn new_from_parts(
year: &Int32Chunked,
month: &Int8Chunked,
day: &Int8Chunked,
name: PlSmallStr,
) -> PolarsResult<DateChunked> {
let mut ca: Int32Chunked = year
.into_iter()
.zip(month)
.zip(day)
.map(|((y, m), d)| {
if let (Some(y), Some(m), Some(d)) = (y, m, d) {
NaiveDate::from_ymd_opt(y, m as u32, d as u32)
.map(|t| t.num_days_from_ce() - EPOCH_DAYS_FROM_CE)
} else {
None
}
})
.collect_trusted();
ca.rename(name);
Ok(ca.into_date())
}
}

impl DateMethods for DateChunked {
Expand Down
Loading

0 comments on commit 8757ad0

Please sign in to comment.