-
-
Couldn't load subscription status.
- Fork 361
Open
Labels
bugSomething isn't workingSomething isn't working
Description
Validating a column of time zone aware datetimes fail when there are multiple time zones.
- I have checked that this issue has not already been reported.
- I have confirmed this bug exists on the latest version of pandera.
Note: Please read this guide detailing how to provide the necessary information for us to reproduce your bug.
Code Sample, a copy-pastable example
import pandera as pa
from pandera.typing import Series
import pandas as pd
class Model(pa.SchemaModel):
timestamp: Series[pd.DatetimeTZDtype] = pa.Field(
dtype_kwargs={"unit": "ns", "tz": "America/Chicago"}
)
class Config:
coerce = True
strict = False
df = pd.DataFrame(
[
[pd.to_datetime("2023-03-01 13:00:00").tz_localize("America/Chicago")],
[pd.to_datetime("2023-03-01 13:00:00").tz_localize("America/New_York")],
],
columns=["timestamp"],
)
Model.validate(df)
SchemaErrors Traceback (most recent call last)
File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/backends/pandas/container.py:81, in DataFrameSchemaBackend.validate(self, check_obj, schema, head, tail, sample, random_state, lazy, inplace)
80 try:
---> 81 check_obj = parser(check_obj, *args)
82 except SchemaError as exc:
File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/backends/pandas/container.py:532, in DataFrameSchemaBackend.coerce_dtype(self, check_obj, schema)
529 if error_handler.collected_errors:
530 # raise SchemaErrors if this method is called without an
531 # error_handler
--> 532 raise SchemaErrors(
533 schema=schema,
534 schema_errors=error_handler.collected_errors,
535 data=check_obj,
536 )
538 return check_obj
SchemaErrors: Schema Model: A total of 1 schema errors were found.
Error Counts
------------
- SchemaErrorReason.SCHEMA_COMPONENT_CHECK: 1
Schema Error Summary
--------------------
Empty DataFrame
Columns: [failure_cases, n_failure_cases]
Index: []
Usage Tip
---------
Directly inspect all errors by catching the exception:
``
try:
schema.validate(dataframe, lazy=True)
except SchemaErrors as err:
err.failure_cases # dataframe of schema errors
err.data # invalid dataframe
``
The above exception was the direct cause of the following exception:
SchemaError Traceback (most recent call last)
Cell In[1], line 23
13 strict = False
16 df = pd.DataFrame(
17 [
18 [pd.to_datetime("2023-03-01 13:00:00").tz_localize("America/Chicago")],
(...)
21 columns=["timestamp"],
22 )
---> 23 Model.validate(df)
File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/api/pandas/model.py:306, in DataFrameModel.validate(cls, check_obj, head, tail, sample, random_state, lazy, inplace)
291 @classmethod
292 @docstring_substitution(validate_doc=DataFrameSchema.validate.__doc__)
293 def validate(
(...)
301 inplace: bool = False,
302 ) -> DataFrameBase[TDataFrameModel]:
303 """%(validate_doc)s"""
304 return cast(
305 DataFrameBase[TDataFrameModel],
--> 306 cls.to_schema().validate(
307 check_obj, head, tail, sample, random_state, lazy, inplace
308 ),
309 )
File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/api/pandas/container.py:366, in DataFrameSchema.validate(self, check_obj, head, tail, sample, random_state, lazy, inplace)
354 check_obj = check_obj.map_partitions( # type: ignore [operator]
355 self._validate,
356 head=head,
(...)
362 meta=check_obj,
363 )
364 return check_obj.pandera.add_schema(self)
--> 366 return self._validate(
367 check_obj=check_obj,
368 head=head,
369 tail=tail,
370 sample=sample,
371 random_state=random_state,
372 lazy=lazy,
373 inplace=inplace,
374 )
File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/api/pandas/container.py:395, in DataFrameSchema._validate(self, check_obj, head, tail, sample, random_state, lazy, inplace)
386 if self._is_inferred:
387 warnings.warn(
388 f"This {type(self)} is an inferred schema that hasn't been "
389 "modified. It's recommended that you refine the schema "
(...)
392 UserWarning,
393 )
--> 395 return self.get_backend(check_obj).validate(
396 check_obj,
397 schema=self,
398 head=head,
399 tail=tail,
400 sample=sample,
401 random_state=random_state,
402 lazy=lazy,
403 inplace=inplace,
404 )
File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/backends/pandas/container.py:85, in DataFrameSchemaBackend.validate(self, check_obj, schema, head, tail, sample, random_state, lazy, inplace)
83 error_handler.collect_error(exc.reason_code, exc)
84 except SchemaErrors as exc:
---> 85 error_handler.collect_errors(exc)
87 # We may have modified columns, for example by
88 # add_missing_columns, so regenerate column info
89 column_info = self.collect_column_info(check_obj, schema)
File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/error_handlers.py:63, in SchemaErrorHandler.collect_errors(self, schema_errors, original_exc)
56 """Collect schema errors from a SchemaErrors exception.
57
58 :param reason_code: string representing reason for error.
59 :param schema_error: ``SchemaError`` object.
60 :param original_exc: original exception associated with the SchemaError.
61 """
62 for schema_error in schema_errors.schema_errors:
---> 63 self.collect_error(
64 schema_error.reason_code,
65 schema_error,
66 original_exc or schema_errors,
67 )
File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/error_handlers.py:38, in SchemaErrorHandler.collect_error(self, reason_code, schema_error, original_exc)
31 """Collect schema error, raising exception if lazy is False.
32
33 :param reason_code: string representing reason for error.
34 :param schema_error: ``SchemaError`` object.
35 :param original_exc: original exception associated with the SchemaError.
36 """
37 if not self._lazy:
---> 38 raise schema_error from original_exc
40 # delete data of validated object from SchemaError object to prevent
41 # storing copies of the validated DataFrame/Series for every
42 # SchemaError collected.
43 del schema_error.data
File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/backends/pandas/container.py:576, in DataFrameSchemaBackend._coerce_dtype_helper.<locals>._try_coercion(coerce_fn, obj)
574 def _try_coercion(coerce_fn, obj):
575 try:
--> 576 return coerce_fn(obj)
577 except SchemaError as exc:
578 error_handler.collect_error(
579 SchemaErrorReason.DATATYPE_COERCION,
580 exc,
581 )
File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/api/pandas/array.py:146, in ArraySchema.coerce_dtype(self, check_obj)
136 def coerce_dtype(
137 self,
138 check_obj: Union[pd.Series, pd.Index],
139 ) -> Union[pd.Series, pd.Index]:
140 """Coerce type of a pd.Series by type specified in dtype.
141
142 :param pd.Series series: One-dimensional ndarray with axis labels
143 (including time series).
144 :returns: ``Series`` with coerced data type
145 """
--> 146 return self.get_backend(check_obj).coerce_dtype(check_obj, schema=self)
File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/backends/pandas/components.py:194, in ColumnBackend.coerce_dtype(self, check_obj, schema)
190 # pylint: disable=super-with-arguments
191 # pylint: disable=fixme
192 # TODO: use singledispatchmethod here
193 if is_field(check_obj) or is_index(check_obj):
--> 194 return super(ColumnBackend, self).coerce_dtype(
195 check_obj,
196 schema=schema,
197 )
198 return check_obj.apply(
199 lambda x: super(ColumnBackend, self).coerce_dtype(
200 x,
(...)
203 axis="columns",
204 )
File ~/miniconda3/envs/dev/lib/python3.9/site-packages/pandera/backends/pandas/array.py:177, in ArraySchemaBackend.coerce_dtype(self, check_obj, schema)
175 return schema.dtype.try_coerce(check_obj)
176 except ParserError as exc:
--> 177 raise SchemaError(
178 schema=schema,
179 data=check_obj,
180 message=(
181 f"Error while coercing '{schema.name}' to type "
182 f"{schema.dtype}: {exc}:\n{exc.failure_cases}"
183 ),
184 failure_cases=exc.failure_cases,
185 check=f"coerce_dtype('{schema.dtype}')",
186 ) from exc
SchemaError: Error while coercing 'timestamp' to type datetime64[ns, America/Chicago]: Could not coerce <class 'pandas.core.series.Series'> data_container into type datetime64[ns, America/Chicago]:
Empty DataFrame
Columns: [index, failure_case]
Index: []Expected behavior
All the timezoe get converted ot the target time zone
Desktop (please complete the following information):
- OS: ubuntu
- Browser crome
- Version 0.16.1
Screenshots
If applicable, add screenshots to help explain your problem.
Additional context
Add any other context about the problem here.
antonioalegria, ddp111 and rgmz
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working