Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
24af2bb
feat(config): add max_failure_cases configuration option
stakahashy Jul 28, 2025
e5f2042
feat(pandas): implement max_failure_cases limit in error formatters
stakahashy Jul 28, 2025
c46110d
feat(polars): implement max_failure_cases limit in error handling
stakahashy Jul 28, 2025
77df471
test: add comprehensive tests for max_failure_cases feature
stakahashy Jul 28, 2025
9c51515
fix: add missing newlines at end of test files
stakahashy Jul 28, 2025
5bdb62c
Upgrade `pyupgrade` hook and target Python version
deepyaman Jul 27, 2025
45f8951
Use `builtins.type` to not clash with `type` field
deepyaman Jul 28, 2025
9f244ea
Remove unused imports, using ruff with manual help
deepyaman Jul 28, 2025
2ae8911
Fix passing an empty column list to check duplicates (#2092)
rush4ratio Jul 30, 2025
c5cf6f6
Replace `Literal` imports from `typing_extensions` (#2100)
deepyaman Jul 30, 2025
dc78cd1
Add `.git-blame-ignore-revs` to avoid bulk changes (#2101)
deepyaman Jul 31, 2025
1ce613a
limit polars version even on mac (#2105)
amerberg Aug 4, 2025
471be6f
refactor: rename PANDERA_MAX_FAILURE_CASES to PANDERA_MAX_REPORTED_FA…
stakahashy Aug 23, 2025
275e679
refactor: simplify max_reported_failures logic to reduce nesting
stakahashy Aug 23, 2025
22f2819
refactor: further simplify max_reported_failures logic with early ret…
stakahashy Aug 23, 2025
38cbc6d
revert docstring change on config_context
stakahashy Aug 23, 2025
8b9ae99
test: refactor tests to follow single responsibility principle
stakahashy Aug 23, 2025
8673d46
test: remove environment variable tests from unit tests
stakahashy Aug 23, 2025
e92a4c9
refactor: unify error formatting logic across backends
stakahashy Aug 23, 2025
888933d
feat: extend unified error formatting to ibis and pyspark backends
stakahashy Aug 23, 2025
1b42f18
refactor: remove unnecessary format_failure_cases_message from pyspark
stakahashy Aug 23, 2025
58664e3
style: apply black and isort formatting to error formatters
stakahashy Aug 23, 2025
90dd5d0
test: consolidate redundant test cases for max_reported_failures
stakahashy Aug 23, 2025
5dd160c
style: rename polars test file to follow directory naming convention
stakahashy Aug 23, 2025
53adf4f
test: add ibis max_reported_failures tests (currently skipped)
stakahashy Aug 23, 2025
be32ee3
fix: declare import at top
stakahashy Aug 24, 2025
f6d46e4
Fix remaining test parameter names from max_failure_cases to max_repo…
stakahashy Aug 24, 2025
4a8504d
fix: update config tests to include max_reported_failures field
stakahashy Sep 7, 2025
4c111a5
Resolve noxfile.py conflicts with upstream/main polars parametrization
stakahashy Sep 7, 2025
f1eb2e5
Merge branch 'main' into limit_log_length_on_schema_error
stakahashy Sep 7, 2025
f3a272b
Merge branch 'main' into limit_log_length_on_schema_error
cosmicBboy Nov 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions pandera/backends/error_formatters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""Common error formatting utilities for all backends."""

from typing import Callable, Optional, Tuple, TypeVar, Union

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.11 polars=0.20.0 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 `typing.Tuple` is deprecated, use `tuple` instead

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.11 polars=0.20.0 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 Import from `collections.abc` instead: `Callable`

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.10 polars=0.20.0 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 `typing.Tuple` is deprecated, use `tuple` instead

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.10 polars=0.20.0 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 Import from `collections.abc` instead: `Callable`

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.12 polars=0.20.0 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 `typing.Tuple` is deprecated, use `tuple` instead

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.12 polars=0.20.0 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 Import from `collections.abc` instead: `Callable`

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.11 polars=1.33.1 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 `typing.Tuple` is deprecated, use `tuple` instead

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.11 polars=1.33.1 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 Import from `collections.abc` instead: `Callable`

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.12 polars=1.33.1 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 `typing.Tuple` is deprecated, use `tuple` instead

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.12 polars=1.33.1 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 Import from `collections.abc` instead: `Callable`

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.10 polars=1.33.1 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 `typing.Tuple` is deprecated, use `tuple` instead

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.10 polars=1.33.1 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 Import from `collections.abc` instead: `Callable`

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.13 polars=1.33.1 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 `typing.Tuple` is deprecated, use `tuple` instead

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.13 polars=1.33.1 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 Import from `collections.abc` instead: `Callable`

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.13 polars=0.20.0 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 `typing.Tuple` is deprecated, use `tuple` instead

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.13 polars=0.20.0 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 Import from `collections.abc` instead: `Callable`

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.14.0 polars=0.20.0 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 `typing.Tuple` is deprecated, use `tuple` instead

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.14.0 polars=0.20.0 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 Import from `collections.abc` instead: `Callable`

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.14.0 polars=1.33.1 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 `typing.Tuple` is deprecated, use `tuple` instead

Check failure on line 3 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.14.0 polars=1.33.1 )

Ruff (UP035)

pandera/backends/error_formatters.py:3:1: UP035 Import from `collections.abc` instead: `Callable`

T = TypeVar("T")


def format_failure_cases_with_truncation(
failure_cases: T,
total_failures: int,
max_reported_failures: int,
format_all_cases: Callable[[T], str],
format_limited_cases: Callable[[T, int], Tuple[str, int]],

Check failure on line 13 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.11 polars=0.20.0 )

Ruff (UP006)

pandera/backends/error_formatters.py:13:46: UP006 Use `tuple` instead of `Tuple` for type annotation

Check failure on line 13 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.10 polars=0.20.0 )

Ruff (UP006)

pandera/backends/error_formatters.py:13:46: UP006 Use `tuple` instead of `Tuple` for type annotation

Check failure on line 13 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.12 polars=0.20.0 )

Ruff (UP006)

pandera/backends/error_formatters.py:13:46: UP006 Use `tuple` instead of `Tuple` for type annotation

Check failure on line 13 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.11 polars=1.33.1 )

Ruff (UP006)

pandera/backends/error_formatters.py:13:46: UP006 Use `tuple` instead of `Tuple` for type annotation

Check failure on line 13 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.12 polars=1.33.1 )

Ruff (UP006)

pandera/backends/error_formatters.py:13:46: UP006 Use `tuple` instead of `Tuple` for type annotation

Check failure on line 13 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.10 polars=1.33.1 )

Ruff (UP006)

pandera/backends/error_formatters.py:13:46: UP006 Use `tuple` instead of `Tuple` for type annotation

Check failure on line 13 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.13 polars=1.33.1 )

Ruff (UP006)

pandera/backends/error_formatters.py:13:46: UP006 Use `tuple` instead of `Tuple` for type annotation

Check failure on line 13 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.13 polars=0.20.0 )

Ruff (UP006)

pandera/backends/error_formatters.py:13:46: UP006 Use `tuple` instead of `Tuple` for type annotation

Check failure on line 13 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.14.0 polars=0.20.0 )

Ruff (UP006)

pandera/backends/error_formatters.py:13:46: UP006 Use `tuple` instead of `Tuple` for type annotation

Check failure on line 13 in pandera/backends/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.14.0 polars=1.33.1 )

Ruff (UP006)

pandera/backends/error_formatters.py:13:46: UP006 Use `tuple` instead of `Tuple` for type annotation
) -> str:
"""
Format failure cases with truncation based on max_reported_failures.
This function provides a unified way to handle failure case formatting
across different backends (pandas, polars, etc.) while allowing each
backend to maintain its specific formatting requirements.
:param failure_cases: The failure cases to format (backend-specific type)
:param total_failures: Total number of failures
:param max_reported_failures: Maximum failures to report
(-1 for unlimited, 0 for summary only)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Just as I'm passing by, I feel like None for no limit is cleaner, and summary (if necessary) could be a different argument? Having magic numbers like -1 and 0 is confusing to users (even if documented, it's not intuitive).

:param format_all_cases: Function to format all cases without truncation
:param format_limited_cases: Function to format limited number of cases,
returns tuple of (formatted_string, actual_count_shown)
:return: Formatted failure cases string with truncation message if needed
"""
if max_reported_failures == -1:
return format_all_cases(failure_cases)

if max_reported_failures == 0:
return f"... {total_failures} failure cases"

formatted_str, shown_count = format_limited_cases(
failure_cases, max_reported_failures
)

if shown_count < total_failures:
omitted_count = total_failures - shown_count
return f"{formatted_str} ... and {omitted_count} more failure cases ({total_failures} total)"

return formatted_str
4 changes: 3 additions & 1 deletion pandera/backends/ibis/base.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,30 @@
"""Ibis parsing, validation, and error-reporting backends."""

import warnings
from collections import defaultdict

import ibis
import ibis.selectors as s

from pandera.api.ibis.error_handler import ErrorHandler
from pandera.api.ibis.types import CheckResult
from pandera.backends.base import BaseSchemaBackend, CoreCheckResult
from pandera.backends.ibis.constants import POSITIONAL_JOIN_BACKENDS
from pandera.backends.pandas.error_formatters import (
consolidate_failure_cases,
reshape_failure_cases,
)
from pandera.backends.ibis.error_formatters import (
format_generic_error_message,
format_vectorized_error_message,
reshape_failure_cases,
)
from pandera.constants import CHECK_OUTPUT_KEY, CHECK_OUTPUT_SUFFIX
from pandera.errors import (
FailureCaseMetadata,
SchemaError,
SchemaErrorReason,
SchemaWarning,
)

Check failure on line 27 in pandera/backends/ibis/base.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.11 polars=0.20.0 )

Ruff (I001)

pandera/backends/ibis/base.py:3:1: I001 Import block is un-sorted or un-formatted

Check failure on line 27 in pandera/backends/ibis/base.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.10 polars=0.20.0 )

Ruff (I001)

pandera/backends/ibis/base.py:3:1: I001 Import block is un-sorted or un-formatted

Check failure on line 27 in pandera/backends/ibis/base.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.12 polars=0.20.0 )

Ruff (I001)

pandera/backends/ibis/base.py:3:1: I001 Import block is un-sorted or un-formatted

Check failure on line 27 in pandera/backends/ibis/base.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.11 polars=1.33.1 )

Ruff (I001)

pandera/backends/ibis/base.py:3:1: I001 Import block is un-sorted or un-formatted

Check failure on line 27 in pandera/backends/ibis/base.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.12 polars=1.33.1 )

Ruff (I001)

pandera/backends/ibis/base.py:3:1: I001 Import block is un-sorted or un-formatted

Check failure on line 27 in pandera/backends/ibis/base.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.10 polars=1.33.1 )

Ruff (I001)

pandera/backends/ibis/base.py:3:1: I001 Import block is un-sorted or un-formatted

Check failure on line 27 in pandera/backends/ibis/base.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.13 polars=1.33.1 )

Ruff (I001)

pandera/backends/ibis/base.py:3:1: I001 Import block is un-sorted or un-formatted

Check failure on line 27 in pandera/backends/ibis/base.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.13 polars=0.20.0 )

Ruff (I001)

pandera/backends/ibis/base.py:3:1: I001 Import block is un-sorted or un-formatted

Check failure on line 27 in pandera/backends/ibis/base.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.14.0 polars=0.20.0 )

Ruff (I001)

pandera/backends/ibis/base.py:3:1: I001 Import block is un-sorted or un-formatted

Check failure on line 27 in pandera/backends/ibis/base.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.14.0 polars=1.33.1 )

Ruff (I001)

pandera/backends/ibis/base.py:3:1: I001 Import block is un-sorted or un-formatted


class IbisSchemaBackend(BaseSchemaBackend):
Expand Down
80 changes: 80 additions & 0 deletions pandera/backends/ibis/error_formatters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""Make schema error messages human-friendly for Ibis."""

import re
from typing import Any, Optional

import pandas as pd

from pandera.backends.error_formatters import (
format_failure_cases_with_truncation,
)
from pandera.backends.pandas.error_formatters import (
format_generic_error_message,
reshape_failure_cases,
)
from pandera.config import get_config_context


def format_vectorized_error_message(
parent_schema,
check,
check_index: int,
reshaped_failure_cases: Any,
max_reported_failures: Optional[int] = None,

Check failure on line 23 in pandera/backends/ibis/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.11 polars=0.20.0 )

Ruff (UP045)

pandera/backends/ibis/error_formatters.py:23:28: UP045 Use `X | None` for type annotations

Check failure on line 23 in pandera/backends/ibis/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.10 polars=0.20.0 )

Ruff (UP045)

pandera/backends/ibis/error_formatters.py:23:28: UP045 Use `X | None` for type annotations

Check failure on line 23 in pandera/backends/ibis/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.12 polars=0.20.0 )

Ruff (UP045)

pandera/backends/ibis/error_formatters.py:23:28: UP045 Use `X | None` for type annotations

Check failure on line 23 in pandera/backends/ibis/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.11 polars=1.33.1 )

Ruff (UP045)

pandera/backends/ibis/error_formatters.py:23:28: UP045 Use `X | None` for type annotations

Check failure on line 23 in pandera/backends/ibis/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.12 polars=1.33.1 )

Ruff (UP045)

pandera/backends/ibis/error_formatters.py:23:28: UP045 Use `X | None` for type annotations

Check failure on line 23 in pandera/backends/ibis/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.10 polars=1.33.1 )

Ruff (UP045)

pandera/backends/ibis/error_formatters.py:23:28: UP045 Use `X | None` for type annotations

Check failure on line 23 in pandera/backends/ibis/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.13 polars=1.33.1 )

Ruff (UP045)

pandera/backends/ibis/error_formatters.py:23:28: UP045 Use `X | None` for type annotations

Check failure on line 23 in pandera/backends/ibis/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.13 polars=0.20.0 )

Ruff (UP045)

pandera/backends/ibis/error_formatters.py:23:28: UP045 Use `X | None` for type annotations

Check failure on line 23 in pandera/backends/ibis/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.14.0 polars=0.20.0 )

Ruff (UP045)

pandera/backends/ibis/error_formatters.py:23:28: UP045 Use `X | None` for type annotations

Check failure on line 23 in pandera/backends/ibis/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.14.0 polars=1.33.1 )

Ruff (UP045)

pandera/backends/ibis/error_formatters.py:23:28: UP045 Use `X | None` for type annotations
) -> str:
"""Construct an error message when a validator fails.
:param parent_schema: class of schema being validated.
:param check: check that generated error.
:param check_index: The validator that failed.
:param reshaped_failure_cases: The failure cases encountered by the
element-wise or vectorized validator.
:param max_reported_failures: Maximum number of failures to report
in the error message. If None, use config value.
"""
if max_reported_failures is None:
config = get_config_context()
max_reported_failures = config.max_reported_failures

pattern = r"<Check\s+([^:>]+):\s*([^>]+)>"
matches = re.findall(pattern, str(check))

check_strs = [f"{match[1]}" for match in matches]

if check_strs:
check_str = check_strs[0]
else:
check_str = str(check)

failure_cases = reshaped_failure_cases.failure_case
total_failures = len(failure_cases)

def format_all(cases):
return ", ".join(cases.apply(str))

def format_limited(cases, limit):
limited = cases.iloc[:limit]
formatted = ", ".join(limited.apply(str))
return formatted, len(limited)

failure_cases_string = format_failure_cases_with_truncation(
failure_cases,
total_failures,
max_reported_failures,
format_all,
format_limited,
)

return (
f"{parent_schema.__class__.__name__} '{parent_schema.name}' failed "
f"element-wise validator number {check_index}: "
f"{check_str} failure cases: {failure_cases_string}"
)


# Re-export functions that don't need modification
__all__ = [
"format_generic_error_message",
"format_vectorized_error_message",
"reshape_failure_cases",
]
67 changes: 60 additions & 7 deletions pandera/backends/pandas/error_formatters.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
"""Make schema error messages human-friendly."""

import re
from typing import Any, Union
from typing import Any, Optional, Union

import pandas as pd

from pandera.backends.error_formatters import (
format_failure_cases_with_truncation,
)
from pandera.config import get_config_context
from pandera.errors import SchemaError


Expand All @@ -26,11 +30,41 @@
)


def _format_failure_cases_string(
failure_cases,
total_failures: int,
max_reported_failures: int,
is_pyspark: bool = False,
) -> str:
"""Format failure cases into a string with appropriate truncation."""

def format_all(cases):
return ", ".join(cases.astype(str) if is_pyspark else cases.apply(str))

def format_limited(cases, limit):
if is_pyspark:
limited = cases[:limit]
formatted = ", ".join(limited.astype(str))
else:
limited = cases.iloc[:limit]
formatted = ", ".join(limited.apply(str))
return formatted, len(limited)

return format_failure_cases_with_truncation(
failure_cases,
total_failures,
max_reported_failures,
format_all,
format_limited,
)


def format_vectorized_error_message(
parent_schema,
check,
check_index: int,
reshaped_failure_cases: Any,
max_reported_failures: Optional[int] = None,

Check failure on line 67 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.11 polars=0.20.0 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:67:28: UP045 Use `X | None` for type annotations

Check failure on line 67 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.10 polars=0.20.0 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:67:28: UP045 Use `X | None` for type annotations

Check failure on line 67 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.12 polars=0.20.0 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:67:28: UP045 Use `X | None` for type annotations

Check failure on line 67 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.11 polars=1.33.1 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:67:28: UP045 Use `X | None` for type annotations

Check failure on line 67 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.12 polars=1.33.1 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:67:28: UP045 Use `X | None` for type annotations

Check failure on line 67 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.10 polars=1.33.1 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:67:28: UP045 Use `X | None` for type annotations

Check failure on line 67 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.13 polars=1.33.1 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:67:28: UP045 Use `X | None` for type annotations

Check failure on line 67 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.13 polars=0.20.0 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:67:28: UP045 Use `X | None` for type annotations

Check failure on line 67 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.14.0 polars=0.20.0 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:67:28: UP045 Use `X | None` for type annotations

Check failure on line 67 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.14.0 polars=1.33.1 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:67:28: UP045 Use `X | None` for type annotations
) -> str:
"""Construct an error message when a validator fails.

Expand All @@ -39,8 +73,13 @@
:param check_index: The validator that failed.
:param reshaped_failure_cases: The failure cases encountered by the
element-wise or vectorized validator.
:param max_reported_failures: Maximum number of failures to report
in the error message. If None, use config value.

"""
if max_reported_failures is None:
config = get_config_context()
max_reported_failures = config.max_reported_failures

pattern = r"<Check\s+([^:>]+):\s*([^>]+)>"
matches = re.findall(pattern, str(check))
Expand All @@ -52,14 +91,19 @@
else:
check_str = str(check)

if type(reshaped_failure_cases.failure_case).__module__.startswith(
"pyspark.pandas"
):
is_pyspark = type(
reshaped_failure_cases.failure_case
).__module__.startswith("pyspark.pandas")

if is_pyspark:
failure_cases = reshaped_failure_cases.failure_case.to_numpy()
failure_cases_string = ", ".join(failure_cases.astype(str))
else:
failure_cases = reshaped_failure_cases.failure_case
failure_cases_string = ", ".join(failure_cases.apply(str))

total_failures = len(failure_cases)
failure_cases_string = _format_failure_cases_string(
failure_cases, total_failures, max_reported_failures, is_pyspark
)

return (
f"{parent_schema.__class__.__name__} '{parent_schema.name}' failed "
Expand Down Expand Up @@ -149,14 +193,23 @@
return df.index.to_frame().drop_duplicates()


def consolidate_failure_cases(schema_errors: list[SchemaError]):
def consolidate_failure_cases(
schema_errors: list[SchemaError],
max_reported_failures: Optional[int] = None,

Check failure on line 198 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.11 polars=0.20.0 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:198:28: UP045 Use `X | None` for type annotations

Check failure on line 198 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.10 polars=0.20.0 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:198:28: UP045 Use `X | None` for type annotations

Check failure on line 198 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.12 polars=0.20.0 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:198:28: UP045 Use `X | None` for type annotations

Check failure on line 198 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.11 polars=1.33.1 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:198:28: UP045 Use `X | None` for type annotations

Check failure on line 198 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.12 polars=1.33.1 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:198:28: UP045 Use `X | None` for type annotations

Check failure on line 198 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.10 polars=1.33.1 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:198:28: UP045 Use `X | None` for type annotations

Check failure on line 198 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.13 polars=1.33.1 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:198:28: UP045 Use `X | None` for type annotations

Check failure on line 198 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.13 polars=0.20.0 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:198:28: UP045 Use `X | None` for type annotations

Check failure on line 198 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.14.0 polars=0.20.0 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:198:28: UP045 Use `X | None` for type annotations

Check failure on line 198 in pandera/backends/pandas/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.14.0 polars=1.33.1 )

Ruff (UP045)

pandera/backends/pandas/error_formatters.py:198:28: UP045 Use `X | None` for type annotations
):
"""Consolidate schema error dicts to produce data for error message."""
from pandera.api.pandas.types import is_table

assert schema_errors, (
"schema_errors input cannot be empty. Check how the backend "
"validation logic is handling/raising SchemaError(s)."
)

# Get max_reported_failures from config if not provided
if max_reported_failures is None:
config = get_config_context()
max_reported_failures = config.max_reported_failures

check_failure_cases = []
scalar_check_failure_cases = []

Expand Down
6 changes: 5 additions & 1 deletion pandera/backends/polars/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
get_lazyframe_schema,
)
from pandera.backends.base import BaseSchemaBackend, CoreCheckResult
from pandera.backends.polars.error_formatters import (
format_failure_cases_message,
)
from pandera.constants import CHECK_OUTPUT_KEY
from pandera.errors import (
FailureCaseMetadata,
Expand Down Expand Up @@ -100,7 +103,8 @@ def run_check(
_failure_cases = _failure_cases.drop(CHECK_OUTPUT_KEY)

failure_cases = _failure_cases.collect()
failure_cases_msg = failure_cases.head().rows(named=True)
failure_cases_msg = format_failure_cases_message(failure_cases)

message = (
f"{schema.__class__.__name__} '{schema.name}' failed "
f"validator number {check_index}: "
Expand Down
41 changes: 41 additions & 0 deletions pandera/backends/polars/error_formatters.py
Original file line number Diff line number Diff line change
@@ -1,0 +1,41 @@
"""Make schema error messages human-friendly for Polars."""

from typing import Any, Optional

from pandera.backends.error_formatters import (
format_failure_cases_with_truncation,
)
from pandera.config import get_config_context


def format_failure_cases_message(
failure_cases: Any,
max_reported_failures: Optional[int] = None,

Check failure on line 13 in pandera/backends/polars/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.11 polars=0.20.0 )

Ruff (UP045)

pandera/backends/polars/error_formatters.py:13:28: UP045 Use `X | None` for type annotations

Check failure on line 13 in pandera/backends/polars/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.10 polars=0.20.0 )

Ruff (UP045)

pandera/backends/polars/error_formatters.py:13:28: UP045 Use `X | None` for type annotations

Check failure on line 13 in pandera/backends/polars/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.12 polars=0.20.0 )

Ruff (UP045)

pandera/backends/polars/error_formatters.py:13:28: UP045 Use `X | None` for type annotations

Check failure on line 13 in pandera/backends/polars/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.11 polars=1.33.1 )

Ruff (UP045)

pandera/backends/polars/error_formatters.py:13:28: UP045 Use `X | None` for type annotations

Check failure on line 13 in pandera/backends/polars/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.12 polars=1.33.1 )

Ruff (UP045)

pandera/backends/polars/error_formatters.py:13:28: UP045 Use `X | None` for type annotations

Check failure on line 13 in pandera/backends/polars/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.10 polars=1.33.1 )

Ruff (UP045)

pandera/backends/polars/error_formatters.py:13:28: UP045 Use `X | None` for type annotations

Check failure on line 13 in pandera/backends/polars/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.13 polars=1.33.1 )

Ruff (UP045)

pandera/backends/polars/error_formatters.py:13:28: UP045 Use `X | None` for type annotations

Check failure on line 13 in pandera/backends/polars/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.13 polars=0.20.0 )

Ruff (UP045)

pandera/backends/polars/error_formatters.py:13:28: UP045 Use `X | None` for type annotations

Check failure on line 13 in pandera/backends/polars/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.14.0 polars=0.20.0 )

Ruff (UP045)

pandera/backends/polars/error_formatters.py:13:28: UP045 Use `X | None` for type annotations

Check failure on line 13 in pandera/backends/polars/error_formatters.py

View workflow job for this annotation

GitHub Actions / Linters (python=3.14.0 polars=1.33.1 )

Ruff (UP045)

pandera/backends/polars/error_formatters.py:13:28: UP045 Use `X | None` for type annotations
) -> str:
"""Format failure cases for Polars error messages.

:param failure_cases: Polars DataFrame containing failure cases
:param max_reported_failures: Maximum number of failures to report.
If None, uses config value.
:return: Formatted failure cases string
"""
if max_reported_failures is None:
config = get_config_context()
max_reported_failures = config.max_reported_failures

total_failures = failure_cases.height

def format_all(cases):
return cases.rows(named=True)

def format_limited(cases, limit):
limited = cases.head(limit)
return limited.rows(named=True), limited.height

return format_failure_cases_with_truncation(
failure_cases,
total_failures,
max_reported_failures,
format_all,
format_limited,
)
16 changes: 16 additions & 0 deletions pandera/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ class PanderaConfig:
export PANDERA_VALIDATION_DEPTH=DATA_ONLY
export PANDERA_CACHE_DATAFRAME=True
export PANDERA_KEEP_CACHED_DATAFRAME=True
export PANDERA_MAX_REPORTED_FAILURES=100
"""

validation_enabled: bool = True
Expand All @@ -42,6 +43,9 @@ class PanderaConfig:
validation_depth: ValidationDepth | None = None
cache_dataframe: bool = False
keep_cached_dataframe: bool = False
max_reported_failures: int = (
100 # Default to showing first 100 reported failures
)


def _config_from_env_vars():
Expand All @@ -61,11 +65,20 @@ def _config_from_env_vars():
"PANDERA_KEEP_CACHED_DATAFRAME", "False"
) in {"True", "1"}

max_reported_failures_str = os.environ.get(
"PANDERA_MAX_REPORTED_FAILURES", "100"
)
try:
max_reported_failures = int(max_reported_failures_str)
except ValueError:
max_reported_failures = 100 # Default to 100 on invalid input

return PanderaConfig(
validation_enabled=validation_enabled,
validation_depth=validation_depth,
cache_dataframe=cache_dataframe,
keep_cached_dataframe=keep_cached_dataframe,
max_reported_failures=max_reported_failures,
)


Expand All @@ -80,6 +93,7 @@ def config_context(
validation_depth: ValidationDepth | None = None,
cache_dataframe: bool | None = None,
keep_cached_dataframe: bool | None = None,
max_reported_failures: int | None = None,
):
"""Temporarily set pandera config options to custom settings."""
_outer_config_ctx = get_config_context(validation_depth_default=None)
Expand All @@ -93,6 +107,8 @@ def config_context(
_CONTEXT_CONFIG.cache_dataframe = cache_dataframe
if keep_cached_dataframe is not None:
_CONTEXT_CONFIG.keep_cached_dataframe = keep_cached_dataframe
if max_reported_failures is not None:
_CONTEXT_CONFIG.max_reported_failures = max_reported_failures

yield
finally:
Expand Down
Loading
Loading