-
-
Notifications
You must be signed in to change notification settings - Fork 364
feat: add max_reported_failures config to limit SchemaError output
#2095
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
24af2bb
e5f2042
c46110d
77df471
9c51515
5bdb62c
45f8951
9f244ea
2ae8911
c5cf6f6
dc78cd1
1ce613a
471be6f
275e679
22f2819
38cbc6d
8b9ae99
8673d46
e92a4c9
888933d
1b42f18
58664e3
90dd5d0
5dd160c
53adf4f
be32ee3
f6d46e4
4a8504d
4c111a5
f1eb2e5
f3a272b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| """Common error formatting utilities for all backends.""" | ||
|
|
||
| from typing import Callable, Optional, Tuple, TypeVar, Union | ||
|
Check failure on line 3 in pandera/backends/error_formatters.py
|
||
|
|
||
| T = TypeVar("T") | ||
|
|
||
|
|
||
| def format_failure_cases_with_truncation( | ||
| failure_cases: T, | ||
| total_failures: int, | ||
| max_reported_failures: int, | ||
| format_all_cases: Callable[[T], str], | ||
| format_limited_cases: Callable[[T, int], Tuple[str, int]], | ||
|
Check failure on line 13 in pandera/backends/error_formatters.py
|
||
| ) -> str: | ||
| """ | ||
| Format failure cases with truncation based on max_reported_failures. | ||
| This function provides a unified way to handle failure case formatting | ||
| across different backends (pandas, polars, etc.) while allowing each | ||
| backend to maintain its specific formatting requirements. | ||
| :param failure_cases: The failure cases to format (backend-specific type) | ||
| :param total_failures: Total number of failures | ||
| :param max_reported_failures: Maximum failures to report | ||
| (-1 for unlimited, 0 for summary only) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Just as I'm passing by, I feel like |
||
| :param format_all_cases: Function to format all cases without truncation | ||
| :param format_limited_cases: Function to format limited number of cases, | ||
| returns tuple of (formatted_string, actual_count_shown) | ||
| :return: Formatted failure cases string with truncation message if needed | ||
| """ | ||
| if max_reported_failures == -1: | ||
| return format_all_cases(failure_cases) | ||
|
|
||
| if max_reported_failures == 0: | ||
| return f"... {total_failures} failure cases" | ||
|
|
||
| formatted_str, shown_count = format_limited_cases( | ||
| failure_cases, max_reported_failures | ||
| ) | ||
|
|
||
| if shown_count < total_failures: | ||
| omitted_count = total_failures - shown_count | ||
| return f"{formatted_str} ... and {omitted_count} more failure cases ({total_failures} total)" | ||
|
|
||
| return formatted_str | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| """Make schema error messages human-friendly for Ibis.""" | ||
|
|
||
| import re | ||
| from typing import Any, Optional | ||
|
|
||
| import pandas as pd | ||
|
|
||
| from pandera.backends.error_formatters import ( | ||
| format_failure_cases_with_truncation, | ||
| ) | ||
| from pandera.backends.pandas.error_formatters import ( | ||
| format_generic_error_message, | ||
| reshape_failure_cases, | ||
| ) | ||
| from pandera.config import get_config_context | ||
|
|
||
|
|
||
| def format_vectorized_error_message( | ||
| parent_schema, | ||
| check, | ||
| check_index: int, | ||
| reshaped_failure_cases: Any, | ||
| max_reported_failures: Optional[int] = None, | ||
|
Check failure on line 23 in pandera/backends/ibis/error_formatters.py
|
||
| ) -> str: | ||
| """Construct an error message when a validator fails. | ||
stakahashy marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| :param parent_schema: class of schema being validated. | ||
| :param check: check that generated error. | ||
| :param check_index: The validator that failed. | ||
| :param reshaped_failure_cases: The failure cases encountered by the | ||
| element-wise or vectorized validator. | ||
| :param max_reported_failures: Maximum number of failures to report | ||
| in the error message. If None, use config value. | ||
| """ | ||
| if max_reported_failures is None: | ||
| config = get_config_context() | ||
| max_reported_failures = config.max_reported_failures | ||
|
|
||
| pattern = r"<Check\s+([^:>]+):\s*([^>]+)>" | ||
| matches = re.findall(pattern, str(check)) | ||
|
|
||
| check_strs = [f"{match[1]}" for match in matches] | ||
|
|
||
| if check_strs: | ||
| check_str = check_strs[0] | ||
| else: | ||
| check_str = str(check) | ||
|
|
||
| failure_cases = reshaped_failure_cases.failure_case | ||
| total_failures = len(failure_cases) | ||
|
|
||
| def format_all(cases): | ||
| return ", ".join(cases.apply(str)) | ||
|
|
||
| def format_limited(cases, limit): | ||
| limited = cases.iloc[:limit] | ||
| formatted = ", ".join(limited.apply(str)) | ||
| return formatted, len(limited) | ||
|
|
||
| failure_cases_string = format_failure_cases_with_truncation( | ||
| failure_cases, | ||
| total_failures, | ||
| max_reported_failures, | ||
| format_all, | ||
| format_limited, | ||
| ) | ||
|
|
||
| return ( | ||
| f"{parent_schema.__class__.__name__} '{parent_schema.name}' failed " | ||
| f"element-wise validator number {check_index}: " | ||
| f"{check_str} failure cases: {failure_cases_string}" | ||
| ) | ||
|
|
||
|
|
||
| # Re-export functions that don't need modification | ||
| __all__ = [ | ||
| "format_generic_error_message", | ||
| "format_vectorized_error_message", | ||
| "reshape_failure_cases", | ||
| ] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,0 +1,41 @@ | ||
| """Make schema error messages human-friendly for Polars.""" | ||
|
|
||
| from typing import Any, Optional | ||
|
|
||
| from pandera.backends.error_formatters import ( | ||
| format_failure_cases_with_truncation, | ||
| ) | ||
| from pandera.config import get_config_context | ||
|
|
||
|
|
||
| def format_failure_cases_message( | ||
| failure_cases: Any, | ||
| max_reported_failures: Optional[int] = None, | ||
|
Check failure on line 13 in pandera/backends/polars/error_formatters.py
|
||
| ) -> str: | ||
| """Format failure cases for Polars error messages. | ||
|
|
||
| :param failure_cases: Polars DataFrame containing failure cases | ||
| :param max_reported_failures: Maximum number of failures to report. | ||
| If None, uses config value. | ||
| :return: Formatted failure cases string | ||
| """ | ||
| if max_reported_failures is None: | ||
| config = get_config_context() | ||
| max_reported_failures = config.max_reported_failures | ||
|
|
||
| total_failures = failure_cases.height | ||
|
|
||
| def format_all(cases): | ||
| return cases.rows(named=True) | ||
|
|
||
| def format_limited(cases, limit): | ||
| limited = cases.head(limit) | ||
| return limited.rows(named=True), limited.height | ||
|
|
||
| return format_failure_cases_with_truncation( | ||
| failure_cases, | ||
| total_failures, | ||
| max_reported_failures, | ||
| format_all, | ||
| format_limited, | ||
| ) | ||
Uh oh!
There was an error while loading. Please reload this page.