Skip to content

Commit

Permalink
feat: Add new Int128Type (#20232)
Browse files Browse the repository at this point in the history
Co-authored-by: ritchie <[email protected]>
  • Loading branch information
coastalwhite and ritchie46 authored Dec 12, 2024
1 parent 0869598 commit f599e88
Show file tree
Hide file tree
Showing 104 changed files with 1,022 additions and 660 deletions.
1 change: 1 addition & 0 deletions crates/polars-arrow/src/array/primitive/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ pub fn get_write_value<'a, T: NativeType, F: Write>(
Int16 => Box::new(|f, index| write!(f, "{}", array.value(index))),
Int32 => Box::new(|f, index| write!(f, "{}", array.value(index))),
Int64 => Box::new(|f, index| write!(f, "{}", array.value(index))),
Int128 => Box::new(|f, index| write!(f, "{}", array.value(index))),
UInt8 => Box::new(|f, index| write!(f, "{}", array.value(index))),
UInt16 => Box::new(|f, index| write!(f, "{}", array.value(index))),
UInt32 => Box::new(|f, index| write!(f, "{}", array.value(index))),
Expand Down
7 changes: 6 additions & 1 deletion crates/polars-arrow/src/datatypes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ pub enum ArrowDataType {
Int32,
/// An [`i64`]
Int64,
/// An [`i128`]
Int128,
/// An [`u8`]
UInt8,
/// An [`u16`]
Expand Down Expand Up @@ -259,6 +261,7 @@ impl ArrowDataType {
Float16 => PhysicalType::Primitive(PrimitiveType::Float16),
Float32 => PhysicalType::Primitive(PrimitiveType::Float32),
Float64 => PhysicalType::Primitive(PrimitiveType::Float64),
Int128 => PhysicalType::Primitive(PrimitiveType::Int128),
Interval(IntervalUnit::DayTime) => PhysicalType::Primitive(PrimitiveType::DaysMs),
Interval(IntervalUnit::MonthDayNano) => {
PhysicalType::Primitive(PrimitiveType::MonthDayNano)
Expand Down Expand Up @@ -374,6 +377,7 @@ impl ArrowDataType {
| D::Int16
| D::Int32
| D::Int64
| D::Int128
| D::UInt8
| D::UInt16
| D::UInt32
Expand Down Expand Up @@ -410,6 +414,7 @@ impl ArrowDataType {
| D::UInt16
| D::UInt32
| D::UInt64
| D::Int128
| D::Float16
| D::Float32
| D::Float64
Expand Down Expand Up @@ -469,7 +474,7 @@ impl From<PrimitiveType> for ArrowDataType {
PrimitiveType::UInt16 => ArrowDataType::UInt16,
PrimitiveType::UInt32 => ArrowDataType::UInt32,
PrimitiveType::UInt64 => ArrowDataType::UInt64,
PrimitiveType::Int128 => ArrowDataType::Decimal(32, 32),
PrimitiveType::Int128 => ArrowDataType::Int128,
PrimitiveType::Int256 => ArrowDataType::Decimal256(32, 32),
PrimitiveType::Float16 => ArrowDataType::Float16,
PrimitiveType::Float32 => ArrowDataType::Float32,
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-arrow/src/ffi/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,8 @@ fn to_format(dtype: &ArrowDataType) -> String {
ArrowDataType::UInt32 => "I".to_string(),
ArrowDataType::Int64 => "l".to_string(),
ArrowDataType::UInt64 => "L".to_string(),
// Doesn't exist in arrow, '_pl' prefixed is Polars specific
ArrowDataType::Int128 => "_pli128".to_string(),
ArrowDataType::Float16 => "e".to_string(),
ArrowDataType::Float32 => "f".to_string(),
ArrowDataType::Float64 => "g".to_string(),
Expand Down
5 changes: 5 additions & 0 deletions crates/polars-arrow/src/io/ipc/write/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -187,6 +187,10 @@ fn serialize_type(dtype: &ArrowDataType) -> arrow_format::ipc::Type {
bit_width: 64,
is_signed: true,
})),
Int128 => ipc::Type::Int(Box::new(ipc::Int {
bit_width: 128,
is_signed: true,
})),
Float16 => ipc::Type::FloatingPoint(Box::new(ipc::FloatingPoint {
precision: ipc::Precision::Half,
})),
Expand Down Expand Up @@ -281,6 +285,7 @@ fn serialize_children(
| UInt16
| UInt32
| UInt64
| Int128
| Float16
| Float32
| Float64
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-compute/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,4 +35,5 @@ nightly = []
simd = ["arrow/simd"]
approx_unique = []
dtype-array = []
dtype-decimal = ["arrow/dtype-decimal"]
dtype-decimal = ["arrow/dtype-decimal", "dtype-i128"]
dtype-i128 = []
5 changes: 5 additions & 0 deletions crates/polars-compute/src/bitwise/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@ impl_bitwise_kernel! {
(f64, f64::to_bits, f64::from_bits),
}

#[cfg(feature = "dtype-i128")]
impl_bitwise_kernel! {
(i128, identity, identity),
}

impl BitwiseKernel for BooleanArray {
type Scalar = bool;

Expand Down
4 changes: 4 additions & 0 deletions crates/polars-compute/src/cast/binary_to.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,15 @@ impl_parse!(i8);
impl_parse!(i16);
impl_parse!(i32);
impl_parse!(i64);

impl_parse!(u8);
impl_parse!(u16);
impl_parse!(u32);
impl_parse!(u64);

#[cfg(feature = "dtype-i128")]
impl_parse!(i128);

impl Parse for f32 {
fn parse(val: &[u8]) -> Option<Self>
where
Expand Down
13 changes: 9 additions & 4 deletions crates/polars-compute/src/cast/binview_to.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,15 @@ pub fn binview_to_decimal(
scale: usize,
) -> PrimitiveArray<i128> {
let precision = precision.map(|p| p as u8);
array
.iter()
.map(|val| val.and_then(|val| deserialize_decimal(val, precision, scale as u8)))
.collect()
PrimitiveArray::<i128>::from_trusted_len_iter(
array
.iter()
.map(|val| val.and_then(|val| deserialize_decimal(val, precision, scale as u8))),
)
.to(ArrowDataType::Decimal(
precision.unwrap_or(38).into(),
scale,
))
}

pub(super) fn utf8view_to_naive_timestamp_dyn(
Expand Down
54 changes: 54 additions & 0 deletions crates/polars-compute/src/cast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,8 @@ pub fn cast(
Int16 => binview_to_primitive_dyn::<i16>(&arr.to_binview(), to_type, options),
Int32 => binview_to_primitive_dyn::<i32>(&arr.to_binview(), to_type, options),
Int64 => binview_to_primitive_dyn::<i64>(&arr.to_binview(), to_type, options),
#[cfg(feature = "dtype-i128")]
Int128 => binview_to_primitive_dyn::<i128>(&arr.to_binview(), to_type, options),
Float32 => binview_to_primitive_dyn::<f32>(&arr.to_binview(), to_type, options),
Float64 => binview_to_primitive_dyn::<f64>(&arr.to_binview(), to_type, options),
Timestamp(time_unit, None) => {
Expand All @@ -415,6 +417,10 @@ pub fn cast(
)
.map(|arr| arr.boxed()),
Date32 => utf8view_to_date32_dyn(array),
#[cfg(feature = "dtype-decimal")]
Decimal(precision, scale) => {
Ok(binview_to_decimal(&arr.to_binview(), Some(*precision), *scale).to_boxed())
},
_ => polars_bail!(InvalidOperation:
"casting from {from_type:?} to {to_type:?} not supported",
),
Expand All @@ -430,6 +436,8 @@ pub fn cast(
Int16 => primitive_to_boolean_dyn::<i16>(array, to_type.clone()),
Int32 => primitive_to_boolean_dyn::<i32>(array, to_type.clone()),
Int64 => primitive_to_boolean_dyn::<i64>(array, to_type.clone()),
#[cfg(feature = "dtype-i128")]
Int128 => primitive_to_boolean_dyn::<i128>(array, to_type.clone()),
Float32 => primitive_to_boolean_dyn::<f32>(array, to_type.clone()),
Float64 => primitive_to_boolean_dyn::<f64>(array, to_type.clone()),
Decimal(_, _) => primitive_to_boolean_dyn::<i128>(array, to_type.clone()),
Expand All @@ -446,6 +454,8 @@ pub fn cast(
Int16 => boolean_to_primitive_dyn::<i16>(array),
Int32 => boolean_to_primitive_dyn::<i32>(array),
Int64 => boolean_to_primitive_dyn::<i64>(array),
#[cfg(feature = "dtype-i128")]
Int128 => boolean_to_primitive_dyn::<i128>(array),
Float32 => boolean_to_primitive_dyn::<f32>(array),
Float64 => boolean_to_primitive_dyn::<f64>(array),
Utf8View => boolean_to_utf8view_dyn(array),
Expand Down Expand Up @@ -516,6 +526,8 @@ pub fn cast(
Int16 => binary_to_primitive_dyn::<i64, i16>(array, to_type, options),
Int32 => binary_to_primitive_dyn::<i64, i32>(array, to_type, options),
Int64 => binary_to_primitive_dyn::<i64, i64>(array, to_type, options),
#[cfg(feature = "dtype-i128")]
Int128 => binary_to_primitive_dyn::<i64, i128>(array, to_type, options),
Float32 => binary_to_primitive_dyn::<i64, f32>(array, to_type, options),
Float64 => binary_to_primitive_dyn::<i64, f64>(array, to_type, options),
Binary => {
Expand Down Expand Up @@ -553,6 +565,8 @@ pub fn cast(
(UInt8, Int16) => primitive_to_primitive_dyn::<u8, i16>(array, to_type, options),
(UInt8, Int32) => primitive_to_primitive_dyn::<u8, i32>(array, to_type, options),
(UInt8, Int64) => primitive_to_primitive_dyn::<u8, i64>(array, to_type, options),
#[cfg(feature = "dtype-i128")]
(UInt8, Int128) => primitive_to_primitive_dyn::<u8, i128>(array, to_type, options),
(UInt8, Float32) => primitive_to_primitive_dyn::<u8, f32>(array, to_type, as_options),
(UInt8, Float64) => primitive_to_primitive_dyn::<u8, f64>(array, to_type, as_options),
(UInt8, Decimal(p, s)) => integer_to_decimal_dyn::<u8>(array, *p, *s),
Expand All @@ -564,6 +578,8 @@ pub fn cast(
(UInt16, Int16) => primitive_to_primitive_dyn::<u16, i16>(array, to_type, options),
(UInt16, Int32) => primitive_to_primitive_dyn::<u16, i32>(array, to_type, options),
(UInt16, Int64) => primitive_to_primitive_dyn::<u16, i64>(array, to_type, options),
#[cfg(feature = "dtype-i128")]
(UInt16, Int128) => primitive_to_primitive_dyn::<u16, i128>(array, to_type, options),
(UInt16, Float32) => primitive_to_primitive_dyn::<u16, f32>(array, to_type, as_options),
(UInt16, Float64) => primitive_to_primitive_dyn::<u16, f64>(array, to_type, as_options),
(UInt16, Decimal(p, s)) => integer_to_decimal_dyn::<u16>(array, *p, *s),
Expand All @@ -575,6 +591,8 @@ pub fn cast(
(UInt32, Int16) => primitive_to_primitive_dyn::<u32, i16>(array, to_type, options),
(UInt32, Int32) => primitive_to_primitive_dyn::<u32, i32>(array, to_type, options),
(UInt32, Int64) => primitive_to_primitive_dyn::<u32, i64>(array, to_type, options),
#[cfg(feature = "dtype-i128")]
(UInt32, Int128) => primitive_to_primitive_dyn::<u32, i128>(array, to_type, options),
(UInt32, Float32) => primitive_to_primitive_dyn::<u32, f32>(array, to_type, as_options),
(UInt32, Float64) => primitive_to_primitive_dyn::<u32, f64>(array, to_type, as_options),
(UInt32, Decimal(p, s)) => integer_to_decimal_dyn::<u32>(array, *p, *s),
Expand All @@ -586,6 +604,8 @@ pub fn cast(
(UInt64, Int16) => primitive_to_primitive_dyn::<u64, i16>(array, to_type, options),
(UInt64, Int32) => primitive_to_primitive_dyn::<u64, i32>(array, to_type, options),
(UInt64, Int64) => primitive_to_primitive_dyn::<u64, i64>(array, to_type, options),
#[cfg(feature = "dtype-i128")]
(UInt64, Int128) => primitive_to_primitive_dyn::<u64, i128>(array, to_type, options),
(UInt64, Float32) => primitive_to_primitive_dyn::<u64, f32>(array, to_type, as_options),
(UInt64, Float64) => primitive_to_primitive_dyn::<u64, f64>(array, to_type, as_options),
(UInt64, Decimal(p, s)) => integer_to_decimal_dyn::<u64>(array, *p, *s),
Expand All @@ -597,6 +617,8 @@ pub fn cast(
(Int8, Int16) => primitive_to_primitive_dyn::<i8, i16>(array, to_type, as_options),
(Int8, Int32) => primitive_to_primitive_dyn::<i8, i32>(array, to_type, as_options),
(Int8, Int64) => primitive_to_primitive_dyn::<i8, i64>(array, to_type, as_options),
#[cfg(feature = "dtype-i128")]
(Int8, Int128) => primitive_to_primitive_dyn::<i8, i128>(array, to_type, as_options),
(Int8, Float32) => primitive_to_primitive_dyn::<i8, f32>(array, to_type, as_options),
(Int8, Float64) => primitive_to_primitive_dyn::<i8, f64>(array, to_type, as_options),
(Int8, Decimal(p, s)) => integer_to_decimal_dyn::<i8>(array, *p, *s),
Expand All @@ -608,6 +630,8 @@ pub fn cast(
(Int16, Int8) => primitive_to_primitive_dyn::<i16, i8>(array, to_type, options),
(Int16, Int32) => primitive_to_primitive_dyn::<i16, i32>(array, to_type, as_options),
(Int16, Int64) => primitive_to_primitive_dyn::<i16, i64>(array, to_type, as_options),
#[cfg(feature = "dtype-i128")]
(Int16, Int128) => primitive_to_primitive_dyn::<i16, i128>(array, to_type, as_options),
(Int16, Float32) => primitive_to_primitive_dyn::<i16, f32>(array, to_type, as_options),
(Int16, Float64) => primitive_to_primitive_dyn::<i16, f64>(array, to_type, as_options),
(Int16, Decimal(p, s)) => integer_to_decimal_dyn::<i16>(array, *p, *s),
Expand All @@ -619,6 +643,8 @@ pub fn cast(
(Int32, Int8) => primitive_to_primitive_dyn::<i32, i8>(array, to_type, options),
(Int32, Int16) => primitive_to_primitive_dyn::<i32, i16>(array, to_type, options),
(Int32, Int64) => primitive_to_primitive_dyn::<i32, i64>(array, to_type, as_options),
#[cfg(feature = "dtype-i128")]
(Int32, Int128) => primitive_to_primitive_dyn::<i32, i128>(array, to_type, as_options),
(Int32, Float32) => primitive_to_primitive_dyn::<i32, f32>(array, to_type, as_options),
(Int32, Float64) => primitive_to_primitive_dyn::<i32, f64>(array, to_type, as_options),
(Int32, Decimal(p, s)) => integer_to_decimal_dyn::<i32>(array, *p, *s),
Expand All @@ -630,10 +656,35 @@ pub fn cast(
(Int64, Int8) => primitive_to_primitive_dyn::<i64, i8>(array, to_type, options),
(Int64, Int16) => primitive_to_primitive_dyn::<i64, i16>(array, to_type, options),
(Int64, Int32) => primitive_to_primitive_dyn::<i64, i32>(array, to_type, options),
#[cfg(feature = "dtype-i128")]
(Int64, Int128) => primitive_to_primitive_dyn::<i64, i128>(array, to_type, options),
(Int64, Float32) => primitive_to_primitive_dyn::<i64, f32>(array, to_type, options),
(Int64, Float64) => primitive_to_primitive_dyn::<i64, f64>(array, to_type, as_options),
(Int64, Decimal(p, s)) => integer_to_decimal_dyn::<i64>(array, *p, *s),

#[cfg(feature = "dtype-i128")]
(Int128, UInt8) => primitive_to_primitive_dyn::<i128, u8>(array, to_type, options),
#[cfg(feature = "dtype-i128")]
(Int128, UInt16) => primitive_to_primitive_dyn::<i128, u16>(array, to_type, options),
#[cfg(feature = "dtype-i128")]
(Int128, UInt32) => primitive_to_primitive_dyn::<i128, u32>(array, to_type, options),
#[cfg(feature = "dtype-i128")]
(Int128, UInt64) => primitive_to_primitive_dyn::<i128, u64>(array, to_type, options),
#[cfg(feature = "dtype-i128")]
(Int128, Int8) => primitive_to_primitive_dyn::<i128, i8>(array, to_type, options),
#[cfg(feature = "dtype-i128")]
(Int128, Int16) => primitive_to_primitive_dyn::<i128, i16>(array, to_type, options),
#[cfg(feature = "dtype-i128")]
(Int128, Int32) => primitive_to_primitive_dyn::<i128, i32>(array, to_type, options),
#[cfg(feature = "dtype-i128")]
(Int128, Int64) => primitive_to_primitive_dyn::<i128, i64>(array, to_type, options),
#[cfg(feature = "dtype-i128")]
(Int128, Float32) => primitive_to_primitive_dyn::<i128, f32>(array, to_type, options),
#[cfg(feature = "dtype-i128")]
(Int128, Float64) => primitive_to_primitive_dyn::<i128, f64>(array, to_type, as_options),
#[cfg(feature = "dtype-i128")]
(Int128, Decimal(p, s)) => integer_to_decimal_dyn::<i128>(array, *p, *s),

(Float16, Float32) => {
let from = array.as_any().downcast_ref().unwrap();
Ok(f16_to_f32(from).boxed())
Expand All @@ -647,6 +698,7 @@ pub fn cast(
(Float32, Int16) => primitive_to_primitive_dyn::<f32, i16>(array, to_type, options),
(Float32, Int32) => primitive_to_primitive_dyn::<f32, i32>(array, to_type, options),
(Float32, Int64) => primitive_to_primitive_dyn::<f32, i64>(array, to_type, options),
(Float32, Int128) => primitive_to_primitive_dyn::<f32, i128>(array, to_type, options),
(Float32, Float64) => primitive_to_primitive_dyn::<f32, f64>(array, to_type, as_options),
(Float32, Decimal(p, s)) => float_to_decimal_dyn::<f32>(array, *p, *s),

Expand All @@ -658,6 +710,7 @@ pub fn cast(
(Float64, Int16) => primitive_to_primitive_dyn::<f64, i16>(array, to_type, options),
(Float64, Int32) => primitive_to_primitive_dyn::<f64, i32>(array, to_type, options),
(Float64, Int64) => primitive_to_primitive_dyn::<f64, i64>(array, to_type, options),
(Float64, Int128) => primitive_to_primitive_dyn::<f64, i128>(array, to_type, options),
(Float64, Float32) => primitive_to_primitive_dyn::<f64, f32>(array, to_type, options),
(Float64, Decimal(p, s)) => float_to_decimal_dyn::<f64>(array, *p, *s),

Expand All @@ -669,6 +722,7 @@ pub fn cast(
(Decimal(_, _), Int16) => decimal_to_integer_dyn::<i16>(array),
(Decimal(_, _), Int32) => decimal_to_integer_dyn::<i32>(array),
(Decimal(_, _), Int64) => decimal_to_integer_dyn::<i64>(array),
(Decimal(_, _), Int128) => decimal_to_integer_dyn::<i128>(array),
(Decimal(_, _), Float32) => decimal_to_float_dyn::<f32>(array),
(Decimal(_, _), Float64) => decimal_to_float_dyn::<f64>(array),
(Decimal(_, _), Decimal(to_p, to_s)) => decimal_to_decimal_dyn(array, *to_p, *to_s),
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ dtype-time = ["temporal"]
dtype-array = ["arrow/dtype-array", "polars-compute/dtype-array"]
dtype-i8 = []
dtype-i16 = []
dtype-decimal = ["arrow/dtype-decimal", "polars-compute/cast", "polars-compute/dtype-decimal"]
dtype-i128 = ["polars-compute/dtype-i128"]
dtype-decimal = ["arrow/dtype-decimal", "polars-compute/cast", "polars-compute/dtype-decimal", "dtype-i128"]
dtype-u8 = []
dtype-u16 = []
dtype-categorical = []
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@ where
values_capacity: usize,
inner_type: DataType,
) -> Self {
assert!(
inner_type.is_numeric() || inner_type.is_temporal(),
"inner type must be primitive"
debug_assert!(
inner_type.to_physical().is_numeric(),
"inner type must be primitive, got {}",
inner_type
);
let values = MutablePrimitiveArray::<T::Native>::with_capacity(values_capacity);
let builder = LargePrimitiveBuilder::<T::Native>::new_with_capacity(values, capacity);
Expand Down
24 changes: 23 additions & 1 deletion crates/polars-core/src/chunked_array/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,26 @@ fn cast_impl_inner(
dtype: &DataType,
options: CastOptions,
) -> PolarsResult<Series> {
let chunks = cast_chunks(chunks, &dtype.to_physical(), options)?;
let chunks = match dtype {
#[cfg(feature = "dtype-decimal")]
DataType::Decimal(_, _) => {
let mut chunks = cast_chunks(chunks, dtype, options)?;
// @NOTE: We cannot cast here as that will lower the scale.
for chunk in chunks.iter_mut() {
*chunk = std::mem::take(
chunk
.as_any_mut()
.downcast_mut::<PrimitiveArray<i128>>()
.unwrap(),
)
.to(ArrowDataType::Int128)
.to_boxed();
}
chunks
},
_ => cast_chunks(chunks, &dtype.to_physical(), options)?,
};

let out = Series::try_from((name, chunks))?;
use DataType::*;
let out = match dtype {
Expand All @@ -91,6 +110,8 @@ fn cast_impl_inner(
Duration(tu) => out.into_duration(*tu),
#[cfg(feature = "dtype-time")]
Time => out.into_time(),
#[cfg(feature = "dtype-decimal")]
Decimal(precision, scale) => out.into_decimal(*precision, scale.unwrap_or(0))?,
_ => out,
};

Expand Down Expand Up @@ -303,6 +324,7 @@ impl ChunkCast for StringChunked {
*precision,
*scale,
)
.to(ArrowDataType::Int128)
});
Ok(Int128Chunked::from_chunk_iter(self.name().clone(), chunks)
.into_decimal_unchecked(*precision, *scale)
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/list/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ impl<I: Iterator<Item = Option<ArrayBox>>> Iterator for AmortizedListIter<'_, I>
vec![array_ref],
&self.inner_dtype.to_physical(),
)
.cast_unchecked(&self.inner_dtype)
.from_physical_unchecked(&self.inner_dtype)
.unwrap();
let inner = Rc::make_mut(&mut self.series_container);
*inner = s;
Expand Down
Loading

0 comments on commit f599e88

Please sign in to comment.