Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion protovalidate/internal/extra_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from celpy import celtypes

from protovalidate.internal import string_format
from protovalidate.internal.matches import cel_matches
from protovalidate.internal.rules import MessageType, field_to_cel

# See https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address
Expand Down Expand Up @@ -1554,12 +1555,13 @@ def __peek(self, char: str) -> bool:


def make_extra_funcs(locale: str) -> dict[str, celpy.CELFunction]:
# TODO(#257): Fix types and add tests for StringFormat.
# For now, ignoring the type.
string_fmt = string_format.StringFormat(locale) # type: ignore
return {
# Missing standard functions
"format": string_fmt.format,
# Overridden standard functions
"matches": cel_matches,
# protovalidate specific functions
"getField": cel_get_field,
"isNan": cel_is_nan,
Expand Down
66 changes: 66 additions & 0 deletions protovalidate/internal/matches.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# Copyright 2023-2025 Buf Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import re

import celpy
from celpy import celtypes

# Patterns that are supported in Python's re package and not in re2.
# RE2: https://github.com/google/re2/wiki/syntax
invalid_patterns = [
r"\\[1-9]", # backreference
r"\\k<\w+>", # backreference
r"\(\?\=", # lookahead
r"\(\?\!", # negative lookahead
r"\(\?\<\=", # lookbehind
r"\(\?\<\!", # negative lookbehind
r"\\c[A-Z]", # control character
r"\\u[0-9a-fA-F]{4}", # UTF-16 code-unit
r"\\0(?!\d)", # NUL
r"\[\\b.*\]", # Backspace eg: [\b]
]


def cel_matches(text: celtypes.Value, pattern: celtypes.Value) -> celpy.Result:
"""Return True if the given pattern matches text. False otherwise.

CEL uses RE2 syntax which diverges from Python re in various ways. Ideally, we
would use the google-re2 package, which is an extra dep in celpy, but at press
time it does not provide a pre-built binary for the latest version of Python (3.13)
which means those using this version will run into many issues.

Instead of foisting this issue on users, we instead mimic re2 syntax by failing
to compile the regex for patterns not compatible with re2.
"""
if not isinstance(text, celtypes.StringType):
msg = "invalid argument for text, expected string"
raise celpy.CELEvalError(msg)
if not isinstance(pattern, celtypes.StringType):
msg = "invalid argument for pattern, expected string"
raise celpy.CELEvalError(msg)

# Simulate re2 by failing on any patterns not compatible with re2 syntax
for invalid_pattern in invalid_patterns:
r = re.search(invalid_pattern, pattern)
if r is not None:
msg = f"error evaluating pattern {pattern}, invalid RE2 syntax"
raise celpy.CELEvalError(msg)

try:
m = re.search(pattern, text)
except re.error as ex:
return celpy.CELEvalError("match error", ex.__class__, ex.args)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this match handling in celpy? or should we pull it out in an f-string + msg like the others?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to match what celpy was doing here yeah: See code here.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, gotcha, my searching was not finding that code. works for me!


return celtypes.BoolType(m is not None)
44 changes: 44 additions & 0 deletions tests/matches_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright 2023-2025 Buf Technologies, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

import celpy
from celpy import celtypes

from protovalidate.internal import extra_func

invalid_patterns = [
r"\1",
r"\k<name>",
r"Jack(?=Sprat)",
"Jack(?!Sprat)",
"(?<=Sprat)Jack",
"(?<!Sprat)Jack",
r"\cM\cJ",
r"\u0041",
r"\0 \01 \0a \012",
r"[\b]",
]


class TestMatches(unittest.TestCase):
def test_invalid_re2_syntax(self):
for pattern in invalid_patterns:
cel_pattern = celtypes.StringType(pattern)
try:
extra_func.cel_matches(celtypes.StringType("test"), cel_pattern)
self.fail(f"expected an error on pattern {cel_pattern}")
except celpy.CELEvalError as e:
self.assertEqual(str(e), f"error evaluating pattern {cel_pattern}, invalid RE2 syntax")
Loading