Skip to content

Commit f544238

Browse files
authored
fix: Method dt.month_end was unnecessarily raising when the month-start timestamp was ambiguous (#24647)
1 parent c65a422 commit f544238

File tree

3 files changed

+169
-104
lines changed

3 files changed

+169
-104
lines changed

crates/polars-time/src/month_end.rs

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ use polars_core::utils::arrow::temporal_conversions::{
77
};
88

99
use crate::month_start::roll_backward;
10+
#[cfg(feature = "timezones")]
11+
use crate::utils::{try_localize_datetime, unlocalize_datetime};
1012
use crate::windows::duration::Duration;
1113

1214
// roll forward to the last day of the month
@@ -17,9 +19,35 @@ fn roll_forward(
1719
datetime_to_timestamp: fn(NaiveDateTime) -> i64,
1820
offset_fn: fn(&Duration, i64, Option<&Tz>) -> PolarsResult<i64>,
1921
) -> PolarsResult<i64> {
20-
let t = roll_backward(t, time_zone, timestamp_to_datetime, datetime_to_timestamp)?;
21-
let t = offset_fn(&Duration::parse("1mo"), t, time_zone)?;
22-
offset_fn(&Duration::parse("-1d"), t, time_zone)
22+
// Use Ambiguous::Latest to roll back to the start of the month. It doesn't matter
23+
// if that timestamp lands on an ambiguous time as we then add 1 month anyway, we
24+
// could just as well use Ambiguous::Earliest.
25+
let naive_t = match time_zone {
26+
#[cfg(feature = "timezones")]
27+
Some(tz) => datetime_to_timestamp(unlocalize_datetime(timestamp_to_datetime(t), tz)),
28+
_ => t,
29+
};
30+
let naive_month_start_t =
31+
roll_backward(naive_t, None, timestamp_to_datetime, datetime_to_timestamp)?;
32+
let naive_result = offset_fn(
33+
&Duration::parse("-1d"),
34+
offset_fn(&Duration::parse("1mo"), naive_month_start_t, None)?,
35+
None,
36+
)?;
37+
let result = match time_zone {
38+
#[cfg(feature = "timezones")]
39+
Some(tz) => datetime_to_timestamp(
40+
try_localize_datetime(
41+
timestamp_to_datetime(naive_result),
42+
tz,
43+
Ambiguous::Raise,
44+
NonExistent::Raise,
45+
)?
46+
.expect("we didn't use Ambiguous::Null or NonExistent::Null"),
47+
),
48+
_ => naive_result,
49+
};
50+
Ok(result)
2351
}
2452

2553
pub trait PolarsMonthEnd {

py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py

Lines changed: 0 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -260,107 +260,6 @@ def test_dt_datetime_date_time_invalid() -> None:
260260
pl.Series([date(2020, 1, 1)]).dt.time()
261261

262262

263-
@pytest.mark.parametrize(
264-
("dt", "expected"),
265-
[
266-
(datetime(2022, 3, 15, 3), datetime(2022, 3, 1, 3)),
267-
(datetime(2022, 3, 15, 3, 2, 1, 123000), datetime(2022, 3, 1, 3, 2, 1, 123000)),
268-
(datetime(2022, 3, 15), datetime(2022, 3, 1)),
269-
(datetime(2022, 3, 1), datetime(2022, 3, 1)),
270-
],
271-
)
272-
@pytest.mark.parametrize(
273-
("tzinfo", "time_zone"),
274-
[
275-
(None, None),
276-
(ZoneInfo("Asia/Kathmandu"), "Asia/Kathmandu"),
277-
],
278-
)
279-
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
280-
def test_month_start_datetime(
281-
dt: datetime,
282-
expected: datetime,
283-
time_unit: TimeUnit,
284-
tzinfo: ZoneInfo | None,
285-
time_zone: str | None,
286-
) -> None:
287-
ser = pl.Series([dt]).dt.replace_time_zone(time_zone).dt.cast_time_unit(time_unit)
288-
result = ser.dt.month_start().item()
289-
assert result == expected.replace(tzinfo=tzinfo)
290-
291-
292-
@pytest.mark.parametrize(
293-
("dt", "expected"),
294-
[
295-
(date(2022, 3, 15), date(2022, 3, 1)),
296-
(date(2022, 3, 31), date(2022, 3, 1)),
297-
],
298-
)
299-
def test_month_start_date(dt: date, expected: date) -> None:
300-
ser = pl.Series([dt])
301-
result = ser.dt.month_start().item()
302-
assert result == expected
303-
304-
305-
@pytest.mark.parametrize(
306-
("dt", "expected"),
307-
[
308-
(datetime(2022, 3, 15, 3), datetime(2022, 3, 31, 3)),
309-
(
310-
datetime(2022, 3, 15, 3, 2, 1, 123000),
311-
datetime(2022, 3, 31, 3, 2, 1, 123000),
312-
),
313-
(datetime(2022, 3, 15), datetime(2022, 3, 31)),
314-
(datetime(2022, 3, 31), datetime(2022, 3, 31)),
315-
],
316-
)
317-
@pytest.mark.parametrize(
318-
("tzinfo", "time_zone"),
319-
[
320-
(None, None),
321-
(ZoneInfo("Asia/Kathmandu"), "Asia/Kathmandu"),
322-
],
323-
)
324-
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
325-
def test_month_end_datetime(
326-
dt: datetime,
327-
expected: datetime,
328-
time_unit: TimeUnit,
329-
tzinfo: ZoneInfo | None,
330-
time_zone: str | None,
331-
) -> None:
332-
ser = pl.Series([dt]).dt.replace_time_zone(time_zone).dt.cast_time_unit(time_unit)
333-
result = ser.dt.month_end().item()
334-
assert result == expected.replace(tzinfo=tzinfo)
335-
336-
337-
@pytest.mark.parametrize(
338-
("dt", "expected"),
339-
[
340-
(date(2022, 3, 15), date(2022, 3, 31)),
341-
(date(2022, 3, 31), date(2022, 3, 31)),
342-
],
343-
)
344-
def test_month_end_date(dt: date, expected: date) -> None:
345-
ser = pl.Series([dt])
346-
result = ser.dt.month_end().item()
347-
assert result == expected
348-
349-
350-
def test_month_start_end_invalid() -> None:
351-
ser = pl.Series([time(1, 2, 3)])
352-
with pytest.raises(
353-
InvalidOperationError,
354-
match=r"`month_start` operation not supported for dtype `time` \(expected: date/datetime\)",
355-
):
356-
ser.dt.month_start()
357-
with pytest.raises(
358-
InvalidOperationError,
359-
match=r"`month_end` operation not supported for dtype `time` \(expected: date/datetime\)",
360-
):
361-
ser.dt.month_end()
362-
363-
364263
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
365264
def test_base_utc_offset(time_unit: TimeUnit) -> None:
366265
ser = pl.datetime_range(
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
from __future__ import annotations
2+
3+
from datetime import date, datetime, time
4+
from typing import TYPE_CHECKING
5+
from zoneinfo import ZoneInfo
6+
7+
import pytest
8+
9+
import polars as pl
10+
from polars.exceptions import ComputeError, InvalidOperationError
11+
from polars.testing import assert_frame_equal
12+
13+
if TYPE_CHECKING:
14+
from polars._typing import TimeUnit
15+
16+
17+
@pytest.mark.parametrize(
18+
("dt", "expected"),
19+
[
20+
(datetime(2022, 3, 15, 3), datetime(2022, 3, 1, 3)),
21+
(datetime(2022, 3, 15, 3, 2, 1, 123000), datetime(2022, 3, 1, 3, 2, 1, 123000)),
22+
(datetime(2022, 3, 15), datetime(2022, 3, 1)),
23+
(datetime(2022, 3, 1), datetime(2022, 3, 1)),
24+
],
25+
)
26+
@pytest.mark.parametrize(
27+
("tzinfo", "time_zone"),
28+
[
29+
(None, None),
30+
(ZoneInfo("Asia/Kathmandu"), "Asia/Kathmandu"),
31+
],
32+
)
33+
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
34+
def test_month_start_datetime(
35+
dt: datetime,
36+
expected: datetime,
37+
time_unit: TimeUnit,
38+
tzinfo: ZoneInfo | None,
39+
time_zone: str | None,
40+
) -> None:
41+
ser = pl.Series([dt]).dt.replace_time_zone(time_zone).dt.cast_time_unit(time_unit)
42+
result = ser.dt.month_start().item()
43+
assert result == expected.replace(tzinfo=tzinfo)
44+
45+
46+
@pytest.mark.parametrize(
47+
("dt", "expected"),
48+
[
49+
(date(2022, 3, 15), date(2022, 3, 1)),
50+
(date(2022, 3, 31), date(2022, 3, 1)),
51+
],
52+
)
53+
def test_month_start_date(dt: date, expected: date) -> None:
54+
ser = pl.Series([dt])
55+
result = ser.dt.month_start().item()
56+
assert result == expected
57+
58+
59+
@pytest.mark.parametrize(
60+
("dt", "expected"),
61+
[
62+
(datetime(2022, 3, 15, 3), datetime(2022, 3, 31, 3)),
63+
(
64+
datetime(2022, 3, 15, 3, 2, 1, 123000),
65+
datetime(2022, 3, 31, 3, 2, 1, 123000),
66+
),
67+
(datetime(2022, 3, 15), datetime(2022, 3, 31)),
68+
(datetime(2022, 3, 31), datetime(2022, 3, 31)),
69+
],
70+
)
71+
@pytest.mark.parametrize(
72+
("tzinfo", "time_zone"),
73+
[
74+
(None, None),
75+
(ZoneInfo("Asia/Kathmandu"), "Asia/Kathmandu"),
76+
],
77+
)
78+
@pytest.mark.parametrize("time_unit", ["ms", "us", "ns"])
79+
def test_month_end_datetime(
80+
dt: datetime,
81+
expected: datetime,
82+
time_unit: TimeUnit,
83+
tzinfo: ZoneInfo | None,
84+
time_zone: str | None,
85+
) -> None:
86+
ser = pl.Series([dt]).dt.replace_time_zone(time_zone).dt.cast_time_unit(time_unit)
87+
result = ser.dt.month_end().item()
88+
assert result == expected.replace(tzinfo=tzinfo)
89+
90+
91+
@pytest.mark.parametrize(
92+
("dt", "expected"),
93+
[
94+
(date(2022, 3, 15), date(2022, 3, 31)),
95+
(date(2022, 3, 31), date(2022, 3, 31)),
96+
],
97+
)
98+
def test_month_end_date(dt: date, expected: date) -> None:
99+
ser = pl.Series([dt])
100+
result = ser.dt.month_end().item()
101+
assert result == expected
102+
103+
104+
def test_month_start_end_invalid() -> None:
105+
ser = pl.Series([time(1, 2, 3)])
106+
with pytest.raises(
107+
InvalidOperationError,
108+
match=r"`month_start` operation not supported for dtype `time` \(expected: date/datetime\)",
109+
):
110+
ser.dt.month_start()
111+
with pytest.raises(
112+
InvalidOperationError,
113+
match=r"`month_end` operation not supported for dtype `time` \(expected: date/datetime\)",
114+
):
115+
ser.dt.month_end()
116+
117+
118+
def test_month_end_ambiguous_start_24646() -> None:
119+
dt = datetime(1987, 3, 3, 2, 45, 00, tzinfo=ZoneInfo("Pacific/Chatham"))
120+
with pytest.raises(ComputeError, match="is ambiguous"):
121+
pl.DataFrame({"a": [dt]}).select(pl.col("a").dt.month_start())
122+
result = pl.DataFrame({"a": [dt]}).select(pl.col("a").dt.month_end())
123+
expected = pl.DataFrame({"a": [datetime(1987, 3, 31, 2, 45)]}).with_columns(
124+
pl.col("a").dt.replace_time_zone("Pacific/Chatham")
125+
)
126+
assert_frame_equal(result, expected)
127+
128+
129+
def test_month_end_non_existent_start_24646() -> None:
130+
# 2015-03-01 00:30 in America/Havana
131+
dt = datetime(1990, 4, 2, 0, 30, tzinfo=ZoneInfo("America/Havana"))
132+
with pytest.raises(ComputeError, match="is non-existent"):
133+
pl.DataFrame({"a": [dt]}).select(pl.col("a").dt.month_start())
134+
result = pl.DataFrame({"a": [dt]}).select(pl.col("a").dt.month_end())
135+
expected = pl.DataFrame(
136+
{"a": [datetime(1990, 4, 30, 0, 30, tzinfo=ZoneInfo("America/Havana"))]}
137+
)
138+
assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)