diff --git a/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py b/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py index eb407db44..7d9ac9ad4 100644 --- a/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py +++ b/airbyte_cdk/sources/declarative/datetime/min_max_datetime.py @@ -8,6 +8,7 @@ from airbyte_cdk.sources.declarative.datetime.datetime_parser import DatetimeParser from airbyte_cdk.sources.declarative.interpolation.interpolated_string import InterpolatedString +from airbyte_cdk.utils.datetime_helpers import ab_datetime_try_parse @dataclass @@ -65,15 +66,23 @@ def get_datetime( if not datetime_format: datetime_format = "%Y-%m-%dT%H:%M:%S.%f%z" - time = self._parser.parse( - str( - self.datetime.eval( # type: ignore[union-attr] # str has no attribute "eval" - config, - **additional_parameters, + datetime_str = str( + self.datetime.eval( # type: ignore[union-attr] # str has no attribute "eval" + config, + **additional_parameters, + ) + ) + + try: + time = self._parser.parse(datetime_str, datetime_format) + except ValueError: + parsed_dt = ab_datetime_try_parse(datetime_str) + if parsed_dt is not None: + time = parsed_dt + else: + raise ValueError( + f"Unable to parse datetime '{datetime_str}' with format '{datetime_format}' or robust parsing" ) - ), - datetime_format, - ) # type: ignore # datetime is always cast to an interpolated string if self.min_datetime: min_time = str(self.min_datetime.eval(config, **additional_parameters)) # type: ignore # min_datetime is always cast to an interpolated string diff --git a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml index e7f8d0793..f7bb0a6c5 100644 --- a/airbyte_cdk/sources/declarative/declarative_component_schema.yaml +++ b/airbyte_cdk/sources/declarative/declarative_component_schema.yaml @@ -943,8 +943,11 @@ definitions: type: array items: type: string + airbyte_hidden: true description: | - The possible formats for the cursor field, in order of preference. The first format that matches the cursor field value will be used to parse it. If not provided, the Outgoing Datetime Format will be used. + The possible formats for the cursor field, in order of preference. The first format that matches the cursor field value will be used to parse it. + If none of the specified formats match, the system will attempt to parse the value using robust datetime parsing that handles most ISO8601/RFC3339 compliant formats. + If not provided, the Outgoing Datetime Format will be used as the first attempt. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available: * **%s**: Epoch unix timestamp - `1686218963` * **%s_as_float**: Epoch unix timestamp in seconds as float with microsecond precision - `1686218963.123456` @@ -2803,6 +2806,7 @@ definitions: - "{{ now_utc().strftime('%Y-%m-%dT%H:%M:%SZ') }}" datetime_format: title: Datetime Format + airbyte_hidden: true description: | Format of the datetime value. Defaults to "%Y-%m-%dT%H:%M:%S.%f%z" if left empty. Use placeholders starting with "%" to describe the format the API is using. The following placeholders are available: * **%s**: Epoch unix timestamp - `1686218963` diff --git a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py index 4eadf68e1..031e9121f 100644 --- a/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py +++ b/airbyte_cdk/sources/declarative/incremental/datetime_based_cursor.py @@ -21,6 +21,7 @@ ) from airbyte_cdk.sources.message import MessageRepository from airbyte_cdk.sources.types import Config, Record, StreamSlice, StreamState +from airbyte_cdk.utils.datetime_helpers import ab_datetime_format, ab_datetime_try_parse from airbyte_cdk.utils.mapping_helpers import _validate_component_request_option_paths @@ -313,6 +314,14 @@ def parse_date(self, date: str) -> datetime.datetime: return self._parser.parse(date, datetime_format) except ValueError: pass + + # If we have not parsed by now, use the robust parser which handles + # all common formats, including all formats supported by ISO8601 + # and RFC3339. + parsed_dt = ab_datetime_try_parse(date) + if parsed_dt is not None: + return parsed_dt + raise ValueError(f"No format in {self.cursor_datetime_formats} matching {date}") @classmethod diff --git a/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py b/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py index b4f990ee7..b45b8f40f 100644 --- a/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py +++ b/unit_tests/sources/declarative/incremental/test_datetime_based_cursor.py @@ -997,6 +997,30 @@ def test_parse_date_legacy_merge_datetime_format_in_cursor_datetime_format( ["%Y-%m-%dT%H:%M:%S.%f%z", "%s"], datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), ), + ( + "test_robust_fallback_z_suffix", + "2021-01-01T00:00:00Z", + ["%Y-%m-%d"], + datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), + ), + ( + "test_robust_fallback_iso_with_colon_tz", + "2021-01-01T00:00:00+00:00", + ["%Y-%m-%d"], + datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), + ), + ( + "test_robust_fallback_date_only", + "2021-01-01", + ["%s"], + datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), + ), + ( + "test_robust_fallback_unix_timestamp_string", + "1609459200", + ["%Y-%m-%d"], + datetime.datetime(2021, 1, 1, 0, 0, tzinfo=datetime.timezone.utc), + ), ], ) def test_parse_date(test_name, input_date, date_formats, expected_output_date): @@ -1021,7 +1045,26 @@ def test_given_unknown_format_when_parse_date_then_raise_error(): parameters={}, ) with pytest.raises(ValueError): - slicer.parse_date("2021-01-01T00:00:00.000000+0000") + slicer.parse_date("not-a-valid-datetime-string") + + +def test_minmax_datetime_robust_fallback(): + from airbyte_cdk.sources.declarative.datetime.min_max_datetime import MinMaxDatetime + + test_cases = [ + ("2021-01-01T00:00:00Z", "%Y-%m-%d"), + ("2021-01-01T00:00:00+00:00", "%Y-%m-%d"), + ("1609459200", "%Y-%m-%d"), + ] + + for input_date, incompatible_format in test_cases: + min_max_dt = MinMaxDatetime( + datetime=input_date, datetime_format=incompatible_format, parameters={} + ) + result = min_max_dt.get_datetime({}) + assert result.year == 2021 + assert result.month == 1 + assert result.day == 1 @pytest.mark.parametrize(