Add supplemental format conformance tests (#294)

smaye81 · web-flow · commit ce512201f9f6 · 2025-05-29T13:37:45.000-04:00
The current conformance tests for `string.format` are not exhaustive and do not account for all scenarios in the [docs](https://github.com/google/cel-spec/blob/master/doc/extensions/strings.md). One such example is a test for invalid UTF-8. This adds the ability to specify supplemental conformance tests in the form of another textproto file. The content is merged with the actual cel conformance tests and then run against our implementation. This allows us to specify our own tests not yet covered in the official conformance tests. As a result, this includes two tests for invalid UTF-8, which incidentally turned up a bug involving collapsing placeholders for contiguous invalid UTF-8 bytes. Note that a PR has been created [here](google/cel-spec#473) to add these tests to the spec. Once added and released, they can be removed from our supplemental tests.
diff --git a/src/main/java/build/buf/protovalidate/Format.java b/src/main/java/build/buf/protovalidate/Format.java
@@ -144,7 +144,9 @@ private static String formatString(Val val) {
         }
         return val.value().toString();
       case Bytes:
-        return new String((byte[]) val.value(), StandardCharsets.UTF_8);
+        String byteStr = new String((byte[]) val.value(), StandardCharsets.UTF_8);
+        // Collapse any contiguous placeholders into one
+        return byteStr.replaceAll("\\ufffd+", "\ufffd");
       case Double:
         Optional<String> result = validateNumber(val);
         if (result.isPresent()) {
diff --git a/src/test/java/build/buf/protovalidate/FormatTest.java b/src/test/java/build/buf/protovalidate/FormatTest.java
@@ -30,10 +30,10 @@
 import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
 import java.util.stream.Stream;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Named;
@@ -79,34 +79,33 @@ class FormatTest {
           "object inside map");
 
   @BeforeAll
-  static void setUp() throws Exception {
-    byte[] encoded =
-        Files.readAllBytes(
-            Paths.get("src/test/resources/testdata/string_ext_" + CEL_SPEC_VERSION + ".textproto"));
-    String data = new String(encoded, StandardCharsets.UTF_8);
-    SimpleTestFile.Builder bldr = SimpleTestFile.newBuilder();
-    TextFormat.getParser().merge(data, bldr);
-    SimpleTestFile testData = bldr.build();
-
-    List<SimpleTestSection> sections = testData.getSectionList();
+  private static void setUp() throws Exception {
+    // The test data from the cel-spec conformance tests
+    List<SimpleTestSection> celSpecSections =
+        loadTestData("src/test/resources/testdata/string_ext_" + CEL_SPEC_VERSION + ".textproto");
+    // Our supplemental tests of functionality not in the cel conformance file, but defined in the
+    // spec.
+    List<SimpleTestSection> supplementalSections =
+        loadTestData("src/test/resources/testdata/string_ext_supplemental.textproto");
+
+    // Combine the test data from both files into one
+    List<SimpleTestSection> sections =
+        Stream.concat(celSpecSections.stream(), supplementalSections.stream())
+            .collect(Collectors.toList());
 
     // Find the format tests which test successful formatting
-    // Defaults to an empty list if nothing is found
     formatTests =
         sections.stream()
             .filter(s -> s.getName().equals("format"))
-            .findFirst()
-            .map(SimpleTestSection::getTestList)
-            .orElse(Collections.emptyList());
+            .flatMap(s -> s.getTestList().stream())
+            .collect(Collectors.toList());
 
     // Find the format error tests which test errors during formatting
-    // Defaults to an empty list if nothing is found
     formatErrorTests =
         sections.stream()
             .filter(s -> s.getName().equals("format_errors"))
-            .findFirst()
-            .map(SimpleTestSection::getTestList)
-            .orElse(Collections.emptyList());
+            .flatMap(s -> s.getTestList().stream())
+            .collect(Collectors.toList());
 
     env = Env.newEnv(Library.Lib(new ValidateLibrary()));
   }
@@ -127,6 +126,17 @@ void testFormatError(SimpleTest test) {
     assertThat(result.getVal().type().typeEnum()).isEqualTo(TypeEnum.Err);
   }
 
+  // Loads test data from the given text format file
+  private static List<SimpleTestSection> loadTestData(String fileName) throws Exception {
+    byte[] encoded = Files.readAllBytes(Paths.get(fileName));
+    String data = new String(encoded, StandardCharsets.UTF_8);
+    SimpleTestFile.Builder bldr = SimpleTestFile.newBuilder();
+    TextFormat.getParser().merge(data, bldr);
+    SimpleTestFile testData = bldr.build();
+
+    return testData.getSectionList();
+  }
+
   // Runs a test by extending the cel environment with the specified
   // types, variables and declarations, then evaluating it with the cel runtime.
   private static Program.EvalResult evaluate(SimpleTest test) {
diff --git a/src/test/resources/testdata/string_ext_supplemental.textproto b/src/test/resources/testdata/string_ext_supplemental.textproto
@@ -0,0 +1,26 @@
+# proto-file: ../../../proto/cel/expr/conformance/test/simple.proto
+# proto-message: cel.expr.conformance.test.SimpleTestFile
+
+# Ideally these tests should be in the cel-spec conformance test suite.
+# Until they are added, we can use this to test for additional functionality
+# listed in the spec.
+
+name: "string_ext_supplemental"
+description: "Supplemental tests for the strings extension library."
+section: {
+  name: "format"
+  test: {
+    name: "bytes support for string with invalid utf-8 encoding"
+    expr: '"%s".format([b"\xF0abc\x8C\xF0xyz"])'
+    value: {
+      string_value: '\ufffdabc\ufffdxyz',
+    }
+  }
+  test: {
+    name: "bytes support for string with only invalid utf-8 sequences"
+    expr: '"%s".format([b"\xF0\x8C\xF0"])'
+    value: {
+      string_value: '\ufffd',
+    }
+  }
+}