Skip to content

Commit 0f6f68c

Browse files
authored
fix: Null handling in full-null group_by_dynamic mean/sum (#23435)
1 parent b977cfc commit 0f6f68c

File tree

3 files changed

+56
-2
lines changed

3 files changed

+56
-2
lines changed

crates/polars-compute/src/rolling/nulls/mean.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,12 @@ impl<
3232

3333
unsafe fn update(&mut self, start: usize, end: usize) -> Option<T> {
3434
let sum = self.sum.update(start, end);
35-
sum.map(|sum| sum / NumCast::from(end - start - self.sum.null_count).unwrap())
35+
let len = end - start;
36+
if self.sum.null_count == len {
37+
None
38+
} else {
39+
sum.map(|sum| sum / NumCast::from(end - start - self.sum.null_count).unwrap())
40+
}
3641
}
3742
fn is_valid(&self, min_periods: usize) -> bool {
3843
self.sum.is_valid(min_periods)

crates/polars-compute/src/rolling/nulls/sum.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ where
155155
}
156156
}
157157
self.last_end = end;
158-
self.sum.and_then(NumCast::from)
158+
self.sum.and_then(NumCast::from).or(Some(T::zeroed()))
159159
}
160160

161161
fn is_valid(&self, min_periods: usize) -> bool {

py-polars/tests/unit/operations/test_group_by_dynamic.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1082,3 +1082,52 @@ def test_group_by_dynamic_single_row_22585() -> None:
10821082
{"group": ["x"], "date": [date(2025, 1, 1)], "len": [1]}
10831083
).with_columns(pl.col("len").cast(pl.UInt32))
10841084
assert_frame_equal(expected, out)
1085+
1086+
1087+
def test_group_by_dynamic_zero_sum_23433() -> None:
1088+
df = pl.DataFrame(
1089+
{
1090+
"g": [0, 0, 1, 1, 2, 2, 2, 3, 3],
1091+
"x": [None, None, None, None, None, None, None, 1, 2],
1092+
}
1093+
)
1094+
out = df.group_by_dynamic("g", every="1i", period="2i").agg(pl.col.x.sum())
1095+
expected = pl.DataFrame({"g": [0, 1, 2, 3], "x": [0, 0, 3, 3]})
1096+
assert_frame_equal(out, expected)
1097+
1098+
1099+
def test_group_by_dynamic_null_mean_22724() -> None:
1100+
time = pl.datetime_range(
1101+
start=datetime(2025, 1, 1, 0, 0, 00),
1102+
end=datetime(2025, 1, 1, 0, 0, 10),
1103+
interval="1s",
1104+
eager=True,
1105+
)
1106+
1107+
b = pl.DataFrame(
1108+
{
1109+
"time": time,
1110+
"value": [None, None, None, 0, None, None, None, None, -1, None, None],
1111+
"empty": [None] * len(time),
1112+
}
1113+
).cast({"value": pl.Float32, "empty": pl.Float32})
1114+
gb = b.group_by_dynamic("time", every="2s", period="3s", offset="-3s")
1115+
out = gb.agg([pl.col("value").cast(pl.Float32).mean()])
1116+
1117+
expected = pl.DataFrame(
1118+
{
1119+
"time": pl.Series(
1120+
[
1121+
datetime(2024, 12, 31, 23, 59, 59),
1122+
datetime(2025, 1, 1, 0, 0, 1),
1123+
datetime(2025, 1, 1, 0, 0, 3),
1124+
datetime(2025, 1, 1, 0, 0, 5),
1125+
datetime(2025, 1, 1, 0, 0, 7),
1126+
datetime(2025, 1, 1, 0, 0, 9),
1127+
],
1128+
dtype=pl.Datetime(time_unit="us", time_zone=None),
1129+
),
1130+
"value": pl.Series([None, 0.0, 0.0, None, -1.0, None], dtype=pl.Float32),
1131+
}
1132+
)
1133+
assert_frame_equal(out, expected)

0 commit comments

Comments
 (0)