Skip to content

Commit

Permalink
account for pola-rs#20364
Browse files Browse the repository at this point in the history
  • Loading branch information
coastalwhite committed Dec 19, 2024
1 parent e5c507e commit ce01d03
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 16 deletions.
3 changes: 3 additions & 0 deletions crates/polars-core/src/chunked_array/ops/row_encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,11 +114,14 @@ pub fn get_row_encoding_dictionary(dtype: &DataType) -> Option<RowEncodingContex
RevMapping::Global(map, _, _) => {
let num_known_categories = map.keys().max().copied().map_or(0, |m| m + 1);

dbg!(map);

// @TODO: This should probably be cached.
let lexical_sort_idxs =
matches!(ordering, CategoricalOrdering::Lexical).then(|| {
let read_map = crate::STRING_CACHE.read_map();
let payloads = read_map.get_current_payloads();
dbg!(&payloads);
assert!(payloads.len() >= num_known_categories as usize);

let mut idxs = (0..num_known_categories).collect::<Vec<u32>>();
Expand Down
33 changes: 18 additions & 15 deletions py-polars/polars/testing/parametric/strategies/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

from polars._utils.deprecation import issue_deprecation_warning
from polars.dataframe import DataFrame
from polars.datatypes import DataType, DataTypeClass, Null, List, Array, Struct
from polars.datatypes import DataType, DataTypeClass, Null, List, Array, Struct, Boolean
from polars.series import Series
from polars import select, when
from polars.string_cache import StringCache
Expand Down Expand Up @@ -204,14 +204,25 @@ def series(
if isinstance(name, st.SearchStrategy):
name = draw(name)

do_mask_out = (
allow_masked_out
and allow_null
and (
isinstance(dtype, List)
or isinstance(dtype, Array)
or isinstance(dtype, Struct)
)
and draw(st.booleans())
)

if size == 0:
values = []
else:
# Create series using dtype-specific strategy to generate values
if strategy is None:
strategy = data(
dtype, # type: ignore[arg-type]
allow_null=allow_null,
allow_null=allow_null and not do_mask_out,
**kwargs,
)

Expand All @@ -227,26 +238,18 @@ def series(
s = Series(name=name, values=values, dtype=dtype)

# Apply masking out of values
can_mask = isinstance(dtype, List) or isinstance(dtype, Array) or isinstance(dtype, Struct)
if allow_masked_out and can_mask and s.has_nulls() and draw(st.booleans()):
masked_out_strategy = data(
dtype, # type: ignore[arg-type]
allow_null=False,
**kwargs,
)
masked_out = draw(
if do_mask_out:
values = draw(
st.lists(
masked_out_strategy,
st.booleans(),
min_size=size,
max_size=size,
unique_by=(flexhash if unique else None),
)
)
masked_out_s = Series(name=name, values=masked_out, dtype=dtype)

nulls_mask = s.is_null()
s = select(when(nulls_mask).then(masked_out_s).otherwise(s).alias(s.name)).to_series()
s = select(when(~nulls_mask).then(s)).to_series()
mask = Series(name=None, values=values, dtype=Boolean)
s = select(when(mask).then(s).alias(s.name)).to_series()

# Apply chunking
if allow_chunks and size > 1 and draw(st.booleans()):
Expand Down
5 changes: 4 additions & 1 deletion py-polars/tests/unit/operations/test_sort.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ def test_series_sort_idempotent(s: pl.Series) -> None:
pl.Object, # Unsortable type
pl.Null, # Bug, see: https://github.com/pola-rs/polars/issues/17007
pl.Decimal, # Bug, see: https://github.com/pola-rs/polars/issues/17009
]
pl.Categorical(
ordering="lexical"
), # Bug, see: https://github.com/pola-rs/polars/issues/20364
],
)
)
def test_df_sort_idempotent(df: pl.DataFrame) -> None:
Expand Down

0 comments on commit ce01d03

Please sign in to comment.