From e32efb5d037acf05ca8bc118765398d78f0f26da Mon Sep 17 00:00:00 2001 From: BmnQuentin Date: Fri, 5 Dec 2025 17:07:41 +0100 Subject: [PATCH 1/6] refactor: move IntegerField to pydantic --- frictionless/fields/field_descriptor.py | 131 ++++++++++++++++++------ frictionless/fields/integer.py | 74 +------------ frictionless/schema/field.py | 30 +++++- 3 files changed, 129 insertions(+), 106 deletions(-) diff --git a/frictionless/fields/field_descriptor.py b/frictionless/fields/field_descriptor.py index 1512e72825..0a88036474 100644 --- a/frictionless/fields/field_descriptor.py +++ b/frictionless/fields/field_descriptor.py @@ -5,7 +5,7 @@ import datetime from typing import Any, ClassVar, Dict, List, Literal, Optional, Union -import pydantic +from pydantic import Field as PydanticField, AliasChoices, model_validator, BaseModel from typing_extensions import Self from .. import settings @@ -21,7 +21,7 @@ """Python equivalents of types supported by the Table schema specification""" -class BaseFieldDescriptor(pydantic.BaseModel): +class BaseFieldDescriptor(BaseModel): """Data model of a (unspecialised) field descriptor""" name: str @@ -39,19 +39,19 @@ class BaseFieldDescriptor(pydantic.BaseModel): A description for this field e.g. “The recipient of the funds” """ - missing_values: Optional[List[str]] = pydantic.Field( + missing_values: Optional[List[str]] = PydanticField( default=None, alias="missingValues" ) """ A list of field values to consider as null values """ - example: Optional[str] = None + example: Optional[Any] = None """ An example of a value for the field. """ - @pydantic.model_validator(mode="before") + @model_validator(mode="before") @classmethod def compat(cls, data: Dict[str, Any]) -> Dict[str, Any]: # Backward compatibility for field.format @@ -63,6 +63,24 @@ def compat(cls, data: Dict[str, Any]) -> Dict[str, Any]: return data + @model_validator(mode="after") + def validate_example(self) -> Self: + """Validate that the example value can be converted using read_value() if available. + + Subclasses can override this method for custom validation logic. + For example, BooleanFieldDescriptor checks that the example is in true_values/false_values. + """ + if self.example is not None: + if hasattr(self, "read_value"): + read_value_method = getattr(self, "read_value") + result = read_value_method(self.example) + if result is None: + raise ValueError( + f'example value "{self.example}" for field "{self.name}" is not valid' + ) + + return self + class BooleanFieldDescriptor(BaseFieldDescriptor): """The field contains boolean (true/false) data.""" @@ -72,19 +90,19 @@ class BooleanFieldDescriptor(BaseFieldDescriptor): format: Optional[Literal["default"]] = None constraints: Optional[BaseConstraints[bool]] = None - true_values: Optional[List[str]] = pydantic.Field( + true_values: Optional[List[str]] = PydanticField( default=settings.DEFAULT_TRUE_VALUES, alias="trueValues", - validation_alias=pydantic.AliasChoices("trueValues", "true_values"), + validation_alias=AliasChoices("trueValues", "true_values"), ) """ Values to be interpreted as “true” for boolean fields """ - false_values: Optional[List[str]] = pydantic.Field( + false_values: Optional[List[str]] = PydanticField( default=settings.DEFAULT_FALSE_VALUES, alias="falseValues", - validation_alias=pydantic.AliasChoices("falseValues", "false_values"), + validation_alias=AliasChoices("falseValues", "false_values"), ) """ Values to be interpreted as “false” for boolean fields @@ -122,18 +140,6 @@ def write_value(self, cell: Optional[bool]) -> Optional[str]: return self.true_values[0] if cell else self.false_values[0] return None - @pydantic.model_validator(mode="after") - def validate_example(self) -> Self: - # If example is provided, check it's in true_values or false_values - if self.example is not None: - allowed_values = (self.true_values or []) + (self.false_values or []) - if self.example not in allowed_values: - raise ValueError( - f'example value "{self.example}" for field "{self.name}" is not valid' - ) - - return self - class ArrayFieldDescriptor(BaseFieldDescriptor): """The field contains a valid JSON array.""" @@ -143,7 +149,7 @@ class ArrayFieldDescriptor(BaseFieldDescriptor): constraints: Optional[JSONConstraints] = None # TODO type is not accurate : array item are unnamed, not described etc - array_item: Optional[FieldDescriptor] = pydantic.Field( + array_item: Optional[FieldDescriptor] = PydanticField( default=None, alias="arrayItem" ) @@ -202,7 +208,7 @@ class GeoPointFieldDescriptor(BaseFieldDescriptor): constraints: Optional[BaseConstraints[str]] = None -class CategoryDict(pydantic.BaseModel): +class CategoryDict(BaseModel): value: str label: Optional[str] = None @@ -225,7 +231,7 @@ class IntegerFieldDescriptor(BaseFieldDescriptor): Property to restrict the field to a finite set of possible values """ - categories_ordered: Optional[bool] = pydantic.Field( + categories_ordered: Optional[bool] = PydanticField( default=None, alias="categoriesOrdered" ) """ @@ -233,16 +239,77 @@ class IntegerFieldDescriptor(BaseFieldDescriptor): appearance of the values in the categories property as their natural order. """ - group_char: Optional[str] = pydantic.Field(default=None, alias="groupChar") + group_char: Optional[str] = PydanticField(default=None, alias="groupChar") """ String whose value is used to group digits for integer/number fields """ - bare_number: Optional[bool] = pydantic.Field(default=None, alias="bareNumber") + bare_number: bool = PydanticField( + default=settings.DEFAULT_BARE_NUMBER, alias="bareNumber" + ) """ If false leading and trailing non numbers will be removed for integer/number fields """ + def read_value(self, cell: Any) -> Optional[int]: + """read_value converts the physical (possibly typed) representation to + a logical integer representation. + + See "Data representation" in the glossary for more details. + https://datapackage.org/standard/glossary/#data-representation + + If the physical representation is already typed as an integer, the + value is returned unchanged. + + If the physical representation is a string, then the string is parsed + as an integer. If `bare_number` is False, non-digit characters are + removed first. `None` is returned if the string cannot be parsed. + + If the physical representation is a float or Decimal that represents + a whole number, it is converted to an integer. + + Any other typed input will return `None`. + """ + import re + from decimal import Decimal + + if isinstance(cell, bool): + return None + + elif isinstance(cell, int): + return cell + + elif isinstance(cell, str): + cell = cell.strip() + + # Process the cell (remove non-digit characters if bare_number is False) + if not self.bare_number: + pattern = re.compile(r"((^[^-\d]*)|(\D*$))") + cell = pattern.sub("", cell) + + # Cast the cell + try: + return int(cell) + except Exception: + return None + + elif isinstance(cell, float) and cell.is_integer(): + return int(cell) + elif isinstance(cell, Decimal) and cell % 1 == 0: + return int(cell) + + return None + + def write_value(self, cell: Optional[int]) -> Optional[str]: + """write_value converts the logical integer representation to + a physical (string) representation. + + Returns the integer as a string. + """ + if cell is None: + return None + return str(cell) + IItemType = Literal[ "boolean", @@ -262,7 +329,7 @@ class ListFieldDescriptor(BaseFieldDescriptor): type: Literal["list"] = "list" format: Optional[Literal["default"]] = None - constraints: CollectionConstraints = pydantic.Field( + constraints: CollectionConstraints = PydanticField( default_factory=CollectionConstraints ) @@ -271,7 +338,7 @@ class ListFieldDescriptor(BaseFieldDescriptor): Specifies the character sequence which separates lexically represented list items. """ - item_type: Optional[IItemType] = pydantic.Field(default=None, alias="itemType") + item_type: Optional[IItemType] = PydanticField(default=None, alias="itemType") """ Specifies the list item type in terms of existent Table Schema types. """ @@ -284,17 +351,17 @@ class NumberFieldDescriptor(BaseFieldDescriptor): format: Optional[Literal["default"]] = None constraints: Optional[ValueConstraints[float]] = None - decimal_char: Optional[str] = pydantic.Field(default=None, alias="decimalChar") + decimal_char: Optional[str] = PydanticField(default=None, alias="decimalChar") """ String whose value is used to represent a decimal point for number fields """ - group_char: Optional[str] = pydantic.Field(default=None, alias="groupChar") + group_char: Optional[str] = PydanticField(default=None, alias="groupChar") """ String whose value is used to group digits for integer/number fields """ - bare_number: Optional[bool] = pydantic.Field(default=None, alias="bareNumber") + bare_number: Optional[bool] = PydanticField(default=None, alias="bareNumber") """ If false leading and trailing non numbers will be removed for integer/number fields """ @@ -324,7 +391,7 @@ class StringFieldDescriptor(BaseFieldDescriptor): type: Literal["string"] = "string" format: Optional[IStringFormat] = None - constraints: StringConstraints = pydantic.Field(default_factory=StringConstraints) + constraints: StringConstraints = PydanticField(default_factory=StringConstraints) categories: Optional[ICategories] = None """ diff --git a/frictionless/fields/integer.py b/frictionless/fields/integer.py index 28586607fc..c1fef976ef 100644 --- a/frictionless/fields/integer.py +++ b/frictionless/fields/integer.py @@ -1,17 +1,10 @@ -from __future__ import annotations +from ..schema.field import Field -import re -from decimal import Decimal -from typing import Any -import attrs - -from .. import settings -from ..schema import Field - - -@attrs.define(kw_only=True, repr=False) class IntegerField(Field): + ### TEMP Only required for Metadata compatibility + ### This is required because "metadata_import" makes a distinction based + ### on the "type" property (`is_typed_class`) type = "integer" builtin = True supported_constraints = [ @@ -20,62 +13,3 @@ class IntegerField(Field): "maximum", "enum", ] - - bare_number: bool = settings.DEFAULT_BARE_NUMBER - """ - It specifies that the value is a bare number. If true, the pattern to - remove non digit character does not get applied and vice versa. - The default value is True. - """ - - # Read - - def create_value_reader(self): - # Create pattern - pattern = None - if not self.bare_number: - pattern = re.compile(r"((^[^-\d]*)|(\D*$))") - - # Create reader - def value_reader(cell: Any): - if isinstance(cell, str): - cell = cell.strip() - - # Process the cell - if pattern: - cell = pattern.sub("", cell) - - # Cast the cell - try: - return int(cell) - except Exception: - return None - - elif cell is True or cell is False: - return None - elif isinstance(cell, int): - return cell - elif isinstance(cell, float) and cell.is_integer(): - return int(cell) - elif isinstance(cell, Decimal) and cell % 1 == 0: - return int(cell) - return None - - return value_reader - - # Write - - def create_value_writer(self): - # Create writer - def value_writer(cell: Any): - return str(cell) - - return value_writer - - # Metadata - - metadata_profile_patch = { - "properties": { - "bareNumber": {"type": "boolean"}, - } - } diff --git a/frictionless/schema/field.py b/frictionless/schema/field.py index b55ef46000..20006ec392 100644 --- a/frictionless/schema/field.py +++ b/frictionless/schema/field.py @@ -11,7 +11,11 @@ from .. import errors, settings from ..exception import FrictionlessException -from ..fields.field_descriptor import BooleanFieldDescriptor, FieldDescriptor +from ..fields.field_descriptor import ( + BooleanFieldDescriptor, + FieldDescriptor, + IntegerFieldDescriptor, +) from ..metadata import Metadata from ..system import system @@ -159,6 +163,8 @@ def create_value_reader(self) -> types.IValueReader: def value_reader(cell: Any): if self._descriptor and isinstance(self._descriptor, BooleanFieldDescriptor): return self._descriptor.read_value(cell) + if self._descriptor and isinstance(self._descriptor, IntegerFieldDescriptor): + return self._descriptor.read_value(cell) return cell return value_reader @@ -199,6 +205,8 @@ def create_value_writer(self) -> types.IValueWriter: def value_writer(cell: Any): if self._descriptor and isinstance(self._descriptor, BooleanFieldDescriptor): return self._descriptor.write_value(cell) + if self._descriptor and isinstance(self._descriptor, IntegerFieldDescriptor): + return self._descriptor.write_value(cell) return str(cell) return value_writer @@ -271,15 +279,29 @@ def metadata_import( except pydantic.ValidationError as ve: error = errors.SchemaError(note=str(ve)) raise FrictionlessException(error) + elif field.type == "integer": + try: + field._descriptor = IntegerFieldDescriptor.model_validate(descriptor_copy) + except pydantic.ValidationError as ve: + error = errors.SchemaError(note=str(ve)) + raise FrictionlessException(error) return field def to_descriptor(self, *, validate: bool = False) -> IDescriptor: - if self._descriptor and isinstance(self._descriptor, BooleanFieldDescriptor): - descr = self._descriptor.model_dump(exclude_none=True, exclude_unset=True) + if self._descriptor and isinstance( + self._descriptor, (BooleanFieldDescriptor, IntegerFieldDescriptor) + ): + base_descr = super().to_descriptor(validate=validate) + # Set by_alias=True to get camelCase keys used by Frictionless (bareNumber) instead of snake_case (bare_number) + # Exclude 'name' from descriptor_descr because it may be "shared" (coming from detector.py) + descriptor_descr = self._descriptor.model_dump( + exclude_none=True, exclude_unset=True, by_alias=True, exclude={"name"} + ) ## Temporarily, Field properties have priority over ## Field._descriptor properties - descr = {**descr, **super().to_descriptor(validate=validate)} + ## Merge descriptor_descr into base_descr to preserve base order + descr = {**base_descr, **descriptor_descr} return descr else: return super().to_descriptor(validate=validate) From 049c30ae72c01ea4d85b9b62421edac999b3675e Mon Sep 17 00:00:00 2001 From: BmnQuentin Date: Fri, 5 Dec 2025 17:20:05 +0100 Subject: [PATCH 2/6] fix comment --- frictionless/fields/field_descriptor.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/frictionless/fields/field_descriptor.py b/frictionless/fields/field_descriptor.py index 0a88036474..0e605ddaaa 100644 --- a/frictionless/fields/field_descriptor.py +++ b/frictionless/fields/field_descriptor.py @@ -65,11 +65,7 @@ def compat(cls, data: Dict[str, Any]) -> Dict[str, Any]: @model_validator(mode="after") def validate_example(self) -> Self: - """Validate that the example value can be converted using read_value() if available. - - Subclasses can override this method for custom validation logic. - For example, BooleanFieldDescriptor checks that the example is in true_values/false_values. - """ + """Validate that the example value can be converted using read_value() if available""" if self.example is not None: if hasattr(self, "read_value"): read_value_method = getattr(self, "read_value") From a3fd739337db186032227a9cc412236881a91a92 Mon Sep 17 00:00:00 2001 From: BmnQuentin Date: Wed, 10 Dec 2025 10:28:01 +0100 Subject: [PATCH 3/6] fix regex position --- frictionless/fields/field_descriptor.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/frictionless/fields/field_descriptor.py b/frictionless/fields/field_descriptor.py index 0e605ddaaa..81b8ccc8f6 100644 --- a/frictionless/fields/field_descriptor.py +++ b/frictionless/fields/field_descriptor.py @@ -3,11 +3,13 @@ from __future__ import annotations import datetime -from typing import Any, ClassVar, Dict, List, Literal, Optional, Union +from typing import Any, ClassVar, Dict, List, Literal, Optional, Union, Pattern +import re from pydantic import Field as PydanticField, AliasChoices, model_validator, BaseModel from typing_extensions import Self + from .. import settings from .field_constraints import ( BaseConstraints, @@ -247,6 +249,8 @@ class IntegerFieldDescriptor(BaseFieldDescriptor): If false leading and trailing non numbers will be removed for integer/number fields """ + pattern: ClassVar[Pattern[str]] = re.compile(r"((^[^-\d]*)|(\D*$))") + def read_value(self, cell: Any) -> Optional[int]: """read_value converts the physical (possibly typed) representation to a logical integer representation. @@ -266,7 +270,6 @@ def read_value(self, cell: Any) -> Optional[int]: Any other typed input will return `None`. """ - import re from decimal import Decimal if isinstance(cell, bool): @@ -280,8 +283,7 @@ def read_value(self, cell: Any) -> Optional[int]: # Process the cell (remove non-digit characters if bare_number is False) if not self.bare_number: - pattern = re.compile(r"((^[^-\d]*)|(\D*$))") - cell = pattern.sub("", cell) + cell = self.pattern.sub("", cell) # Cast the cell try: From 321d816ee079f00dcc904c042e423b31d55e4a6f Mon Sep 17 00:00:00 2001 From: BmnQuentin Date: Wed, 10 Dec 2025 11:56:26 +0100 Subject: [PATCH 4/6] fix date --- frictionless/fields/date.py | 52 ------------------------- frictionless/fields/field_descriptor.py | 40 ++++++++++++++++++- frictionless/schema/field.py | 13 ++++++- 3 files changed, 51 insertions(+), 54 deletions(-) diff --git a/frictionless/fields/date.py b/frictionless/fields/date.py index 809f037ec1..d1011ef6c5 100644 --- a/frictionless/fields/date.py +++ b/frictionless/fields/date.py @@ -1,12 +1,7 @@ from __future__ import annotations -from datetime import date, datetime -from typing import Any - import attrs -from .. import settings -from ..platform import platform from ..schema import Field @@ -20,50 +15,3 @@ class DateField(Field): "maximum", "enum", ] - - # Read - - # TODO: use different value_readers based on format (see string) - def create_value_reader(self): - # Create reader - def value_reader(cell: Any): - if isinstance(cell, datetime): - value_time = cell.time() - if ( - value_time.hour == 0 - and value_time.minute == 0 - and value_time.second == 0 - ): - return datetime(cell.year, cell.month, cell.day).date() - else: - return None - if isinstance(cell, date): - return cell - if not isinstance(cell, str): - return None - try: - if self.format == "default": - cell = datetime.strptime(cell, settings.DEFAULT_DATE_PATTERN).date() - elif self.format == "any": - cell = platform.dateutil_parser.parse(cell).date() - else: - cell = datetime.strptime(cell, self.format).date() - except Exception: - return None - return cell - - return value_reader - - # Write - - def create_value_writer(self): - # Create format - format = self.format - if format == settings.DEFAULT_FIELD_FORMAT: - format = settings.DEFAULT_DATE_PATTERN - - # Create writer - def value_writer(cell: Any): - return cell.strftime(format) - - return value_writer diff --git a/frictionless/fields/field_descriptor.py b/frictionless/fields/field_descriptor.py index 81b8ccc8f6..1e1748250e 100644 --- a/frictionless/fields/field_descriptor.py +++ b/frictionless/fields/field_descriptor.py @@ -161,12 +161,50 @@ class AnyFieldDescriptor(BaseFieldDescriptor): class DateFieldDescriptor(BaseFieldDescriptor): - """he field contains a date without a time.""" + """The field contains a date without a time.""" type: Literal["date"] = "date" format: Optional[str] = None constraints: Optional[ValueConstraints[str]] = None + def read_value(self, cell: Any) -> Optional[datetime.date]: + from datetime import date, datetime + from ..platform import platform + + if isinstance(cell, datetime): + value_time = cell.time() + if ( + value_time.hour == 0 + and value_time.minute == 0 + and value_time.second == 0 + ): + return datetime(cell.year, cell.month, cell.day).date() + else: + return None + if isinstance(cell, date): + return cell + if not isinstance(cell, str): + return None + try: + format_value = self.format or "default" + if format_value == "default": + cell = datetime.strptime(cell, settings.DEFAULT_DATE_PATTERN).date() + elif format_value == "any": + cell = platform.dateutil_parser.parse(cell).date() + else: + cell = datetime.strptime(cell, format_value).date() + except Exception: + return None + return cell + + def write_value(self, cell: Optional[datetime.date]) -> Optional[str]: + if cell is None: + return None + format_value = self.format or "default" + if format_value == settings.DEFAULT_FIELD_FORMAT: + format_value = settings.DEFAULT_DATE_PATTERN + return cell.strftime(format_value) + class DatetimeFieldDescriptor(BaseFieldDescriptor): """The field contains a date with a time.""" diff --git a/frictionless/schema/field.py b/frictionless/schema/field.py index 20006ec392..cf96f3418f 100644 --- a/frictionless/schema/field.py +++ b/frictionless/schema/field.py @@ -13,6 +13,7 @@ from ..exception import FrictionlessException from ..fields.field_descriptor import ( BooleanFieldDescriptor, + DateFieldDescriptor, FieldDescriptor, IntegerFieldDescriptor, ) @@ -165,6 +166,8 @@ def value_reader(cell: Any): return self._descriptor.read_value(cell) if self._descriptor and isinstance(self._descriptor, IntegerFieldDescriptor): return self._descriptor.read_value(cell) + if self._descriptor and isinstance(self._descriptor, DateFieldDescriptor): + return self._descriptor.read_value(cell) return cell return value_reader @@ -207,6 +210,8 @@ def value_writer(cell: Any): return self._descriptor.write_value(cell) if self._descriptor and isinstance(self._descriptor, IntegerFieldDescriptor): return self._descriptor.write_value(cell) + if self._descriptor and isinstance(self._descriptor, DateFieldDescriptor): + return self._descriptor.write_value(cell) return str(cell) return value_writer @@ -285,12 +290,18 @@ def metadata_import( except pydantic.ValidationError as ve: error = errors.SchemaError(note=str(ve)) raise FrictionlessException(error) + elif field.type == "date": + try: + field._descriptor = DateFieldDescriptor.model_validate(descriptor_copy) + except pydantic.ValidationError as ve: + error = errors.SchemaError(note=str(ve)) + raise FrictionlessException(error) return field def to_descriptor(self, *, validate: bool = False) -> IDescriptor: if self._descriptor and isinstance( - self._descriptor, (BooleanFieldDescriptor, IntegerFieldDescriptor) + self._descriptor, (BooleanFieldDescriptor, IntegerFieldDescriptor, DateFieldDescriptor) ): base_descr = super().to_descriptor(validate=validate) # Set by_alias=True to get camelCase keys used by Frictionless (bareNumber) instead of snake_case (bare_number) From 2f5f70647d9cdb8a1e21ebee4cccc2b2dc43c75c Mon Sep 17 00:00:00 2001 From: BmnQuentin Date: Wed, 10 Dec 2025 12:50:10 +0100 Subject: [PATCH 5/6] first modularization test --- frictionless/fields/base_field_descriptor.py | 65 +++++ frictionless/fields/boolean.py | 3 +- frictionless/fields/boolean_descriptor.py | 50 ++++ frictionless/fields/date.py | 3 +- frictionless/fields/date_descriptor.py | 53 ++++ frictionless/fields/field_descriptor.py | 284 +------------------ frictionless/fields/integer.py | 2 +- frictionless/fields/integer_descriptor.py | 87 ++++++ frictionless/schema/field.py | 12 +- 9 files changed, 276 insertions(+), 283 deletions(-) create mode 100644 frictionless/fields/base_field_descriptor.py create mode 100644 frictionless/fields/boolean_descriptor.py create mode 100644 frictionless/fields/date_descriptor.py create mode 100644 frictionless/fields/integer_descriptor.py diff --git a/frictionless/fields/base_field_descriptor.py b/frictionless/fields/base_field_descriptor.py new file mode 100644 index 0000000000..d89bbac610 --- /dev/null +++ b/frictionless/fields/base_field_descriptor.py @@ -0,0 +1,65 @@ +"""base_field_descriptor.py provides the base Pydantic model for all field descriptors""" + +from __future__ import annotations + +from pydantic import BaseModel, Field as PydanticField, model_validator +from typing import Any, Dict, List, Optional +from typing_extensions import Self + + +class BaseFieldDescriptor(BaseModel): + """Data model of a (unspecialised) field descriptor""" + + name: str + """ + The field descriptor MUST contain a name property. + """ + + title: Optional[str] = None + """ + A human readable label or title for the field + """ + + description: Optional[str] = None + """ + A description for this field e.g. "The recipient of the funds" + """ + + missing_values: Optional[List[str]] = PydanticField( + default=None, alias="missingValues" + ) + """ + A list of field values to consider as null values + """ + + example: Optional[Any] = None + """ + An example of a value for the field. + """ + + @model_validator(mode="before") + @classmethod + def compat(cls, data: Dict[str, Any]) -> Dict[str, Any]: + # Backward compatibility for field.format + + format_ = data.get("format") + if format_: + if format_.startswith("fmt:"): + data["format"] = format_[4:] + + return data + + @model_validator(mode="after") + def validate_example(self) -> Self: + """Validate that the example value can be converted using read_value() if available""" + if self.example is not None: + if hasattr(self, "read_value"): + read_value_method = getattr(self, "read_value") + result = read_value_method(self.example) + if result is None: + raise ValueError( + f'example value "{self.example}" for field "{self.name}" is not valid' + ) + + return self + diff --git a/frictionless/fields/boolean.py b/frictionless/fields/boolean.py index 04db7e159a..365f8a6eef 100644 --- a/frictionless/fields/boolean.py +++ b/frictionless/fields/boolean.py @@ -1,8 +1,9 @@ +from __future__ import annotations from ..schema.field import Field - class BooleanField(Field): ### TEMP Only required for Metadata compatibility ### This is required because "metadata_import" makes a distinction based ### on the "type" property (`is_typed_class`) type = "boolean" + \ No newline at end of file diff --git a/frictionless/fields/boolean_descriptor.py b/frictionless/fields/boolean_descriptor.py new file mode 100644 index 0000000000..031540bebe --- /dev/null +++ b/frictionless/fields/boolean_descriptor.py @@ -0,0 +1,50 @@ +from typing import Any, ClassVar, List, Literal, Optional + +from pydantic import Field as PydanticField, AliasChoices + +from .. import settings +from .base_field_descriptor import BaseFieldDescriptor +from .field_constraints import BaseConstraints + +class BooleanFieldDescriptor(BaseFieldDescriptor): + """The field contains boolean (true/false) data.""" + + type: ClassVar[Literal["boolean"]] = "boolean" + + format: Optional[Literal["default"]] = None + constraints: Optional[BaseConstraints[bool]] = None + + true_values: Optional[List[str]] = PydanticField( + default=settings.DEFAULT_TRUE_VALUES, + alias="trueValues", + validation_alias=AliasChoices("trueValues", "true_values"), + ) + """ + Values to be interpreted as "true" for boolean fields + """ + + false_values: Optional[List[str]] = PydanticField( + default=settings.DEFAULT_FALSE_VALUES, + alias="falseValues", + validation_alias=AliasChoices("falseValues", "false_values"), + ) + """ + Values to be interpreted as "false" for boolean fields + """ + + def read_value(self, cell: Any) -> Optional[bool]: + if isinstance(cell, bool): + return cell + + if isinstance(cell, str): + if self.true_values and cell in self.true_values: + return True + if self.false_values and cell in self.false_values: + return False + + return None + + def write_value(self, cell: Optional[bool]) -> Optional[str]: + if self.true_values and self.false_values: + return self.true_values[0] if cell else self.false_values[0] + return None diff --git a/frictionless/fields/date.py b/frictionless/fields/date.py index d1011ef6c5..b13521ca1b 100644 --- a/frictionless/fields/date.py +++ b/frictionless/fields/date.py @@ -1,10 +1,8 @@ from __future__ import annotations import attrs - from ..schema import Field - @attrs.define(kw_only=True, repr=False) class DateField(Field): type = "date" @@ -15,3 +13,4 @@ class DateField(Field): "maximum", "enum", ] + diff --git a/frictionless/fields/date_descriptor.py b/frictionless/fields/date_descriptor.py new file mode 100644 index 0000000000..b332c55508 --- /dev/null +++ b/frictionless/fields/date_descriptor.py @@ -0,0 +1,53 @@ +import datetime +from typing import Any, Literal, Optional + + +from .. import settings +from .base_field_descriptor import BaseFieldDescriptor +from .field_constraints import ValueConstraints + + +class DateFieldDescriptor(BaseFieldDescriptor): + """The field contains a date without a time.""" + + type: Literal["date"] = "date" + format: Optional[str] = None + constraints: Optional[ValueConstraints[str]] = None + + def read_value(self, cell: Any) -> Optional[datetime.date]: + from datetime import date, datetime + from ..platform import platform + + if isinstance(cell, datetime): + value_time = cell.time() + if ( + value_time.hour == 0 + and value_time.minute == 0 + and value_time.second == 0 + ): + return datetime(cell.year, cell.month, cell.day).date() + else: + return None + if isinstance(cell, date): + return cell + if not isinstance(cell, str): + return None + try: + format_value = self.format or "default" + if format_value == "default": + cell = datetime.strptime(cell, settings.DEFAULT_DATE_PATTERN).date() + elif format_value == "any": + cell = platform.dateutil_parser.parse(cell).date() + else: + cell = datetime.strptime(cell, format_value).date() + except Exception: + return None + return cell + + def write_value(self, cell: Optional[datetime.date]) -> Optional[str]: + if cell is None: + return None + format_value = self.format or "default" + if format_value == settings.DEFAULT_FIELD_FORMAT: + format_value = settings.DEFAULT_DATE_PATTERN + return cell.strftime(format_value) diff --git a/frictionless/fields/field_descriptor.py b/frictionless/fields/field_descriptor.py index 1e1748250e..f0640608c5 100644 --- a/frictionless/fields/field_descriptor.py +++ b/frictionless/fields/field_descriptor.py @@ -1,16 +1,11 @@ -"""field_descriptor.py provides pydantic Models for Field descriptors""" - from __future__ import annotations import datetime -from typing import Any, ClassVar, Dict, List, Literal, Optional, Union, Pattern -import re - -from pydantic import Field as PydanticField, AliasChoices, model_validator, BaseModel -from typing_extensions import Self +from typing import List, Literal, Optional, Union +from pydantic import Field as PydanticField, BaseModel -from .. import settings +from .base_field_descriptor import BaseFieldDescriptor from .field_constraints import ( BaseConstraints, CollectionConstraints, @@ -19,125 +14,6 @@ ValueConstraints, ) -TableSchemaTypes = Union[bool, str, float, int] -"""Python equivalents of types supported by the Table schema specification""" - - -class BaseFieldDescriptor(BaseModel): - """Data model of a (unspecialised) field descriptor""" - - name: str - """ - The field descriptor MUST contain a name property. - """ - - title: Optional[str] = None - """ - A human readable label or title for the field - """ - - description: Optional[str] = None - """ - A description for this field e.g. “The recipient of the funds” - """ - - missing_values: Optional[List[str]] = PydanticField( - default=None, alias="missingValues" - ) - """ - A list of field values to consider as null values - """ - - example: Optional[Any] = None - """ - An example of a value for the field. - """ - - @model_validator(mode="before") - @classmethod - def compat(cls, data: Dict[str, Any]) -> Dict[str, Any]: - # Backward compatibility for field.format - - format_ = data.get("format") - if format_: - if format_.startswith("fmt:"): - data["format"] = format_[4:] - - return data - - @model_validator(mode="after") - def validate_example(self) -> Self: - """Validate that the example value can be converted using read_value() if available""" - if self.example is not None: - if hasattr(self, "read_value"): - read_value_method = getattr(self, "read_value") - result = read_value_method(self.example) - if result is None: - raise ValueError( - f'example value "{self.example}" for field "{self.name}" is not valid' - ) - - return self - - -class BooleanFieldDescriptor(BaseFieldDescriptor): - """The field contains boolean (true/false) data.""" - - type: ClassVar[Literal["boolean"]] = "boolean" - - format: Optional[Literal["default"]] = None - constraints: Optional[BaseConstraints[bool]] = None - - true_values: Optional[List[str]] = PydanticField( - default=settings.DEFAULT_TRUE_VALUES, - alias="trueValues", - validation_alias=AliasChoices("trueValues", "true_values"), - ) - """ - Values to be interpreted as “true” for boolean fields - """ - - false_values: Optional[List[str]] = PydanticField( - default=settings.DEFAULT_FALSE_VALUES, - alias="falseValues", - validation_alias=AliasChoices("falseValues", "false_values"), - ) - """ - Values to be interpreted as “false” for boolean fields - """ - - def read_value(self, cell: TableSchemaTypes) -> Optional[bool]: - """read_value converts the physical (possibly typed) representation to - a logical boolean representation. - - See "Data representation" in the glossary for more details. - https://datapackage.org/standard/glossary/#data-representation - - If the physical representation is already typed as a boolean, the - value is returned unchanged. - - If the physical representation is a string, then the string is parsed - as a boolean depending on true_values and false_values options. `None` - is returned if the string cannot be parsed. - - Any other typed input will return `None`. - """ - if isinstance(cell, bool): - return cell - - if isinstance(cell, str): - if self.true_values and cell in self.true_values: - return True - if self.false_values and cell in self.false_values: - return False - - return None - - def write_value(self, cell: Optional[bool]) -> Optional[str]: - if self.true_values and self.false_values: - return self.true_values[0] if cell else self.false_values[0] - return None - class ArrayFieldDescriptor(BaseFieldDescriptor): """The field contains a valid JSON array.""" @@ -146,8 +22,9 @@ class ArrayFieldDescriptor(BaseFieldDescriptor): format: Optional[Literal["default"]] = None constraints: Optional[JSONConstraints] = None - # TODO type is not accurate : array item are unnamed, not described etc - array_item: Optional[FieldDescriptor] = PydanticField( + # TODO type is not accurate : array item are unnamed, not described etc + # Using string annotation to avoid circular import + array_item: Optional["FieldDescriptor"] = PydanticField( default=None, alias="arrayItem" ) @@ -160,52 +37,6 @@ class AnyFieldDescriptor(BaseFieldDescriptor): constraints: Optional[BaseConstraints[str]] = None -class DateFieldDescriptor(BaseFieldDescriptor): - """The field contains a date without a time.""" - - type: Literal["date"] = "date" - format: Optional[str] = None - constraints: Optional[ValueConstraints[str]] = None - - def read_value(self, cell: Any) -> Optional[datetime.date]: - from datetime import date, datetime - from ..platform import platform - - if isinstance(cell, datetime): - value_time = cell.time() - if ( - value_time.hour == 0 - and value_time.minute == 0 - and value_time.second == 0 - ): - return datetime(cell.year, cell.month, cell.day).date() - else: - return None - if isinstance(cell, date): - return cell - if not isinstance(cell, str): - return None - try: - format_value = self.format or "default" - if format_value == "default": - cell = datetime.strptime(cell, settings.DEFAULT_DATE_PATTERN).date() - elif format_value == "any": - cell = platform.dateutil_parser.parse(cell).date() - else: - cell = datetime.strptime(cell, format_value).date() - except Exception: - return None - return cell - - def write_value(self, cell: Optional[datetime.date]) -> Optional[str]: - if cell is None: - return None - format_value = self.format or "default" - if format_value == settings.DEFAULT_FIELD_FORMAT: - format_value = settings.DEFAULT_DATE_PATTERN - return cell.strftime(format_value) - - class DatetimeFieldDescriptor(BaseFieldDescriptor): """The field contains a date with a time.""" @@ -245,6 +76,7 @@ class GeoPointFieldDescriptor(BaseFieldDescriptor): class CategoryDict(BaseModel): + """Category dictionary for field categories.""" value: str label: Optional[str] = None @@ -253,98 +85,7 @@ class CategoryDict(BaseModel): List[str], List[CategoryDict], ] - - -class IntegerFieldDescriptor(BaseFieldDescriptor): - """The field contains integers - that is whole numbers.""" - - type: Literal["integer"] = "integer" - format: Optional[Literal["default"]] = None - constraints: Optional[ValueConstraints[int]] = None - - categories: Optional[ICategories] = None - """ - Property to restrict the field to a finite set of possible values - """ - - categories_ordered: Optional[bool] = PydanticField( - default=None, alias="categoriesOrdered" - ) - """ - When categoriesOrdered is true, implementations SHOULD regard the order of - appearance of the values in the categories property as their natural order. - """ - - group_char: Optional[str] = PydanticField(default=None, alias="groupChar") - """ - String whose value is used to group digits for integer/number fields - """ - - bare_number: bool = PydanticField( - default=settings.DEFAULT_BARE_NUMBER, alias="bareNumber" - ) - """ - If false leading and trailing non numbers will be removed for integer/number fields - """ - - pattern: ClassVar[Pattern[str]] = re.compile(r"((^[^-\d]*)|(\D*$))") - - def read_value(self, cell: Any) -> Optional[int]: - """read_value converts the physical (possibly typed) representation to - a logical integer representation. - - See "Data representation" in the glossary for more details. - https://datapackage.org/standard/glossary/#data-representation - - If the physical representation is already typed as an integer, the - value is returned unchanged. - - If the physical representation is a string, then the string is parsed - as an integer. If `bare_number` is False, non-digit characters are - removed first. `None` is returned if the string cannot be parsed. - - If the physical representation is a float or Decimal that represents - a whole number, it is converted to an integer. - - Any other typed input will return `None`. - """ - from decimal import Decimal - - if isinstance(cell, bool): - return None - - elif isinstance(cell, int): - return cell - - elif isinstance(cell, str): - cell = cell.strip() - - # Process the cell (remove non-digit characters if bare_number is False) - if not self.bare_number: - cell = self.pattern.sub("", cell) - - # Cast the cell - try: - return int(cell) - except Exception: - return None - - elif isinstance(cell, float) and cell.is_integer(): - return int(cell) - elif isinstance(cell, Decimal) and cell % 1 == 0: - return int(cell) - - return None - - def write_value(self, cell: Optional[int]) -> Optional[str]: - """write_value converts the logical integer representation to - a physical (string) representation. - - Returns the integer as a string. - """ - if cell is None: - return None - return str(cell) +"""Categories type used by IntegerFieldDescriptor and StringFieldDescriptor""" IItemType = Literal[ @@ -464,17 +205,16 @@ class YearmonthFieldDescriptor(BaseFieldDescriptor): format: Optional[Literal["default"]] = None constraints: Optional[ValueConstraints[str]] = None - FieldDescriptor = Union[ AnyFieldDescriptor, - ArrayFieldDescriptor, - BooleanFieldDescriptor, - DateFieldDescriptor, + ArrayFieldDescriptor, # wip + # BooleanFieldDescriptor, # v + # DateFieldDescriptor, # v DatetimeFieldDescriptor, DurationFieldDescriptor, GeoJSONFieldDescriptor, GeoPointFieldDescriptor, - IntegerFieldDescriptor, + # IntegerFieldDescriptor, # v ListFieldDescriptor, NumberFieldDescriptor, ObjectFieldDescriptor, diff --git a/frictionless/fields/integer.py b/frictionless/fields/integer.py index c1fef976ef..40951afbaa 100644 --- a/frictionless/fields/integer.py +++ b/frictionless/fields/integer.py @@ -1,6 +1,6 @@ +from __future__ import annotations from ..schema.field import Field - class IntegerField(Field): ### TEMP Only required for Metadata compatibility ### This is required because "metadata_import" makes a distinction based diff --git a/frictionless/fields/integer_descriptor.py b/frictionless/fields/integer_descriptor.py new file mode 100644 index 0000000000..675e336bcb --- /dev/null +++ b/frictionless/fields/integer_descriptor.py @@ -0,0 +1,87 @@ + +import re +from decimal import Decimal +from typing import Any, ClassVar, Literal, Optional, Pattern, Union, List + +from pydantic import Field as PydanticField, BaseModel + +from .. import settings +from .base_field_descriptor import BaseFieldDescriptor +from .field_constraints import ValueConstraints + + +class CategoryDict(BaseModel): + """Category dictionary for field categories.""" + value: str + label: Optional[str] = None + +ICategories = Union[ + List[str], + List[CategoryDict], +] + +class IntegerFieldDescriptor(BaseFieldDescriptor): + """The field contains integers - that is whole numbers.""" + + type: Literal["integer"] = "integer" + format: Optional[Literal["default"]] = None + constraints: Optional[ValueConstraints[int]] = None + + categories: Optional[ICategories] = None + """ + Property to restrict the field to a finite set of possible values + """ + + categories_ordered: Optional[bool] = PydanticField( + default=None, alias="categoriesOrdered" + ) + """ + When categoriesOrdered is true, implementations SHOULD regard the order of + appearance of the values in the categories property as their natural order. + """ + + group_char: Optional[str] = PydanticField(default=None, alias="groupChar") + """ + String whose value is used to group digits for integer/number fields + """ + + bare_number: bool = PydanticField( + default=settings.DEFAULT_BARE_NUMBER, alias="bareNumber" + ) + """ + If false leading and trailing non numbers will be removed for integer/number fields + """ + + pattern: ClassVar[Pattern[str]] = re.compile(r"((^[^-\d]*)|(\D*$))") + + def read_value(self, cell: Any) -> Optional[int]: + if isinstance(cell, bool): + return None + + elif isinstance(cell, int): + return cell + + elif isinstance(cell, str): + cell = cell.strip() + + # Process the cell (remove non-digit characters if bare_number is False) + if not self.bare_number: + cell = self.pattern.sub("", cell) + + # Cast the cell + try: + return int(cell) + except Exception: + return None + + elif isinstance(cell, float) and cell.is_integer(): + return int(cell) + elif isinstance(cell, Decimal) and cell % 1 == 0: + return int(cell) + + return None + + def write_value(self, cell: Optional[int]) -> Optional[str]: + if cell is None: + return None + return str(cell) diff --git a/frictionless/schema/field.py b/frictionless/schema/field.py index cf96f3418f..b6aa6eef69 100644 --- a/frictionless/schema/field.py +++ b/frictionless/schema/field.py @@ -11,12 +11,10 @@ from .. import errors, settings from ..exception import FrictionlessException -from ..fields.field_descriptor import ( - BooleanFieldDescriptor, - DateFieldDescriptor, - FieldDescriptor, - IntegerFieldDescriptor, -) +from ..fields.boolean_descriptor import BooleanFieldDescriptor +from ..fields.date_descriptor import DateFieldDescriptor +from ..fields.integer_descriptor import IntegerFieldDescriptor +from ..fields.field_descriptor import FieldDescriptor from ..metadata import Metadata from ..system import system @@ -30,7 +28,7 @@ class Field(Metadata): """Field representation""" - _descriptor: Optional[FieldDescriptor] = None + _descriptor: Optional[BooleanFieldDescriptor | IntegerFieldDescriptor | DateFieldDescriptor | FieldDescriptor] = None name: str """ From 2a435c80a47ba6218a9034cf8e716f3b5f428dec Mon Sep 17 00:00:00 2001 From: BmnQuentin Date: Wed, 10 Dec 2025 15:52:32 +0100 Subject: [PATCH 6/6] global logic for field and field_descriptor --- frictionless/fields/array_descriptor.py | 0 frictionless/fields/field_descriptor.py | 10 +++++++--- frictionless/schema/field.py | 10 +++++----- 3 files changed, 12 insertions(+), 8 deletions(-) create mode 100644 frictionless/fields/array_descriptor.py diff --git a/frictionless/fields/array_descriptor.py b/frictionless/fields/array_descriptor.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/frictionless/fields/field_descriptor.py b/frictionless/fields/field_descriptor.py index f0640608c5..32b9d95e9c 100644 --- a/frictionless/fields/field_descriptor.py +++ b/frictionless/fields/field_descriptor.py @@ -14,6 +14,10 @@ ValueConstraints, ) +from .boolean_descriptor import BooleanFieldDescriptor +from .date_descriptor import DateFieldDescriptor +from .integer_descriptor import IntegerFieldDescriptor + class ArrayFieldDescriptor(BaseFieldDescriptor): """The field contains a valid JSON array.""" @@ -208,13 +212,13 @@ class YearmonthFieldDescriptor(BaseFieldDescriptor): FieldDescriptor = Union[ AnyFieldDescriptor, ArrayFieldDescriptor, # wip - # BooleanFieldDescriptor, # v - # DateFieldDescriptor, # v + BooleanFieldDescriptor, # v + DateFieldDescriptor, # v DatetimeFieldDescriptor, DurationFieldDescriptor, GeoJSONFieldDescriptor, GeoPointFieldDescriptor, - # IntegerFieldDescriptor, # v + IntegerFieldDescriptor, # v ListFieldDescriptor, NumberFieldDescriptor, ObjectFieldDescriptor, diff --git a/frictionless/schema/field.py b/frictionless/schema/field.py index b6aa6eef69..4114bedef5 100644 --- a/frictionless/schema/field.py +++ b/frictionless/schema/field.py @@ -11,10 +11,10 @@ from .. import errors, settings from ..exception import FrictionlessException -from ..fields.boolean_descriptor import BooleanFieldDescriptor -from ..fields.date_descriptor import DateFieldDescriptor -from ..fields.integer_descriptor import IntegerFieldDescriptor -from ..fields.field_descriptor import FieldDescriptor +# from ..fields.boolean_descriptor import BooleanFieldDescriptor +# from ..fields.date_descriptor import DateFieldDescriptor +# from ..fields.integer_descriptor import IntegerFieldDescriptor +from ..fields.field_descriptor import BooleanFieldDescriptor, DateFieldDescriptor, IntegerFieldDescriptor, FieldDescriptor from ..metadata import Metadata from ..system import system @@ -28,7 +28,7 @@ class Field(Metadata): """Field representation""" - _descriptor: Optional[BooleanFieldDescriptor | IntegerFieldDescriptor | DateFieldDescriptor | FieldDescriptor] = None + _descriptor: Optional[ FieldDescriptor] = None name: str """