From 3814247e81fd277bc6e17e2c55baa0e1c21d7782 Mon Sep 17 00:00:00 2001
From: Stefan VanBuren <svanburen@buf.build>
Date: Wed, 27 Aug 2025 13:56:55 -0400
Subject: [PATCH] Minor tweaks / cleanup

* Use re2 instead of re in more places
* Simplify config
* Simplify `isinstance` calls with tuples
* Use comprehensions when reasonable
---
 protovalidate/internal/extra_func.py    |  9 ++---
 protovalidate/internal/rules.py         |  4 +-
 protovalidate/internal/string_format.py | 50 +++++--------------------
 protovalidate/validator.py              |  5 +--
 pyproject.toml                          |  6 ---
 5 files changed, 16 insertions(+), 58 deletions(-)

diff --git a/protovalidate/internal/extra_func.py b/protovalidate/internal/extra_func.py
index 40bb677e..7245dc61 100644
--- a/protovalidate/internal/extra_func.py
+++ b/protovalidate/internal/extra_func.py
@@ -13,7 +13,6 @@
 # limitations under the License.
 
 import math
-import re
 import typing
 from urllib import parse as urlparse
 
@@ -25,7 +24,7 @@
 from protovalidate.internal.rules import MessageType, field_to_cel
 
 # See https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
-_email_regex = re.compile(
+_email_regex = re2.compile(
     r"^[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*$"
 )
 
@@ -107,9 +106,9 @@ def cel_is_ip_prefix(val: celtypes.Value, *args) -> celpy.Result:
         msg = "invalid argument, expected string or bytes"
         raise celpy.CELEvalError(msg)
     version = 0
-    strict = celtypes.BoolType(False)
+    strict = False
     if len(args) == 1 and isinstance(args[0], celtypes.BoolType):
-        strict = args[0]
+        strict = bool(args[0])
     elif len(args) == 1 and isinstance(args[0], celtypes.IntType):
         version = args[0]
     elif len(args) == 1 and (not isinstance(args[0], celtypes.BoolType) or not isinstance(args[0], celtypes.IntType)):
@@ -117,7 +116,7 @@ def cel_is_ip_prefix(val: celtypes.Value, *args) -> celpy.Result:
         raise celpy.CELEvalError(msg)
     elif len(args) == 2 and isinstance(args[0], celtypes.IntType) and isinstance(args[1], celtypes.BoolType):
         version = args[0]
-        strict = args[1]
+        strict = bool(args[1])
     elif len(args) == 2 and (not isinstance(args[0], celtypes.IntType) or not isinstance(args[1], celtypes.BoolType)):
         msg = "invalid argument, expected int and bool"
         raise celpy.CELEvalError(msg)
diff --git a/protovalidate/internal/rules.py b/protovalidate/internal/rules.py
index 8ce6975f..6eb08bbf 100644
--- a/protovalidate/internal/rules.py
+++ b/protovalidate/internal/rules.py
@@ -413,14 +413,14 @@ def validate(self, ctx: RuleContext, msg: message.Message):
             ctx.add(
                 Violation(
                     rule_id="message.oneof",
-                    message=f"only one of {', '.join([field.name for field in self._fields])} can be set",
+                    message=f"only one of {', '.join(field.name for field in self._fields)} can be set",
                 )
             )
         if self._required and num_set_fields == 0:
             ctx.add(
                 Violation(
                     rule_id="message.oneof",
-                    message=f"one of {', '.join([field.name for field in self._fields])} must be set",
+                    message=f"one of {', '.join(field.name for field in self._fields)} must be set",
                 )
             )
 
diff --git a/protovalidate/internal/string_format.py b/protovalidate/internal/string_format.py
index 0755d7bf..87e0f709 100644
--- a/protovalidate/internal/string_format.py
+++ b/protovalidate/internal/string_format.py
@@ -122,11 +122,7 @@ def __format_exponential(self, arg: celtypes.Value, precision: int) -> str:
         raise celpy.CELEvalError(msg)
 
     def __format_int(self, arg: celtypes.Value) -> str:
-        if (
-            isinstance(arg, celtypes.IntType)
-            or isinstance(arg, celtypes.UintType)
-            or isinstance(arg, celtypes.DoubleType)
-        ):
+        if isinstance(arg, (celtypes.IntType, celtypes.UintType, celtypes.DoubleType)):
             result = self.__validate_number(arg)
             if result is not None:
                 return result
@@ -138,9 +134,7 @@ def __format_int(self, arg: celtypes.Value) -> str:
         raise celpy.CELEvalError(msg)
 
     def __format_hex(self, arg: celtypes.Value) -> str:
-        if isinstance(arg, celtypes.IntType):
-            return f"{arg:x}"
-        if isinstance(arg, celtypes.UintType):
+        if isinstance(arg, (celtypes.IntType, celtypes.UintType)):
             return f"{arg:x}"
         if isinstance(arg, celtypes.BytesType):
             return arg.hex()
@@ -153,9 +147,7 @@ def __format_hex(self, arg: celtypes.Value) -> str:
         raise celpy.CELEvalError(msg)
 
     def __format_oct(self, arg: celtypes.Value) -> str:
-        if isinstance(arg, celtypes.IntType):
-            return f"{arg:o}"
-        if isinstance(arg, celtypes.UintType):
+        if isinstance(arg, (celtypes.IntType, celtypes.UintType)):
             return f"{arg:o}"
         msg = (
             "error during formatting: octal clause can only be used on integers, was given "
@@ -164,17 +156,12 @@ def __format_oct(self, arg: celtypes.Value) -> str:
         raise celpy.CELEvalError(msg)
 
     def __format_bin(self, arg: celtypes.Value) -> str:
-        if isinstance(arg, celtypes.IntType):
-            return f"{arg:b}"
-        if isinstance(arg, celtypes.UintType):
-            return f"{arg:b}"
-        if isinstance(arg, celtypes.BoolType):
+        if isinstance(arg, (celtypes.IntType, celtypes.UintType, celtypes.BoolType)):
             return f"{arg:b}"
         msg = (
             "error during formatting: only integers and bools can be formatted as binary, was given "
             f"{self.__type_str(type(arg))}"
         )
-
         raise celpy.CELEvalError(msg)
 
     def __format_string(self, arg: celtypes.Value) -> str:
@@ -189,6 +176,7 @@ def __format_string(self, arg: celtypes.Value) -> str:
             decoded = arg.decode("utf-8", errors="replace")
             # Collapse any contiguous placeholders into one
             return re.sub("\\ufffd+", "\ufffd", decoded)
+
         if isinstance(arg, celtypes.DoubleType):
             result = self.__validate_number(arg)
             if result is not None:
@@ -206,7 +194,7 @@ def __format_string(self, arg: celtypes.Value) -> str:
         if isinstance(arg, celtypes.MapType):
             return self.__format_map(arg)
         if isinstance(arg, celtypes.StringType):
-            return f"{arg}"
+            return arg
         if isinstance(arg, celtypes.TimestampType):
             base = arg.isoformat()
             if arg.getMilliseconds() != 0:
@@ -215,31 +203,11 @@ def __format_string(self, arg: celtypes.Value) -> str:
         return "unknown"
 
     def __format_list(self, arg: celtypes.ListType) -> str:
-        result = "["
-        for i in range(len(arg)):
-            if i > 0:
-                result += ", "
-            result += self.__format_string(arg[i])
-        result += "]"
-        return result
+        return "[" + ", ".join(self.__format_string(val) for val in arg) + "]"
 
     def __format_map(self, arg: celtypes.MapType) -> str:
-        m = {}
-        for cel_key, cel_val in arg.items():
-            key = self.__format_string(cel_key)
-            val = self.__format_string(cel_val)
-            m[key] = val
-
-        m = dict(sorted(m.items()))
-
-        result = "{"
-        for i, (key, val) in enumerate(m.items()):
-            if i > 0:
-                result += ", "
-            result += key + ": " + val
-
-        result += "}"
-        return result
+        m = {self.__format_string(cel_key): self.__format_string(cel_val) for cel_key, cel_val in arg.items()}
+        return "{" + ", ".join(key + ": " + val for key, val in sorted(m.items())) + "}"
 
     def __format_duration(self, arg: celtypes.DurationType) -> str:
         return f"{arg.seconds + Decimal(arg.microseconds) / Decimal(1_000_000):f}s"
diff --git a/protovalidate/validator.py b/protovalidate/validator.py
index 28b3d9be..b5e0f13b 100644
--- a/protovalidate/validator.py
+++ b/protovalidate/validator.py
@@ -105,10 +105,7 @@ def to_proto(self) -> validate_pb2.Violations:
         """
         Provides the Protobuf form of the validation errors.
         """
-        result = validate_pb2.Violations()
-        for violation in self._violations:
-            result.violations.append(violation.proto)
-        return result
+        return validate_pb2.Violations(violations=[violation.proto for violation in self._violations])
 
     @property
     def violations(self) -> list[Violation]:
diff --git a/pyproject.toml b/pyproject.toml
index 45d3cc4e..12cbe1e8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -79,8 +79,6 @@ lint.select = [
 	"YTT",
 ]
 lint.ignore = [
-	# Allow boolean positional values in function calls, like `dict.get(..., True)`.
-	"FBT003",
 	# Ignore complexity
 	"C901",
 	"PLR0911",
@@ -90,10 +88,6 @@ lint.ignore = [
 	# Ignore magic values - in this library, most are obvious in context.
 	"PLR2004",
 ]
-lint.unfixable = [
-	# Don't autofix unused imports.
-	"F401",
-]
 
 [tool.ruff.lint.isort]
 known-first-party = ["protovalidate", "buf"]