Skip to content

Commit ce51220

Browse files
authored
Add supplemental format conformance tests (#294)
The current conformance tests for `string.format` are not exhaustive and do not account for all scenarios in the [docs](https://github.com/google/cel-spec/blob/master/doc/extensions/strings.md). One such example is a test for invalid UTF-8. This adds the ability to specify supplemental conformance tests in the form of another textproto file. The content is merged with the actual cel conformance tests and then run against our implementation. This allows us to specify our own tests not yet covered in the official conformance tests. As a result, this includes two tests for invalid UTF-8, which incidentally turned up a bug involving collapsing placeholders for contiguous invalid UTF-8 bytes. Note that a PR has been created [here](google/cel-spec#473) to add these tests to the spec. Once added and released, they can be removed from our supplemental tests.
1 parent 256eb5c commit ce51220

File tree

3 files changed

+58
-20
lines changed

3 files changed

+58
-20
lines changed

src/main/java/build/buf/protovalidate/Format.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,9 @@ private static String formatString(Val val) {
144144
}
145145
return val.value().toString();
146146
case Bytes:
147-
return new String((byte[]) val.value(), StandardCharsets.UTF_8);
147+
String byteStr = new String((byte[]) val.value(), StandardCharsets.UTF_8);
148+
// Collapse any contiguous placeholders into one
149+
return byteStr.replaceAll("\\ufffd+", "\ufffd");
148150
case Double:
149151
Optional<String> result = validateNumber(val);
150152
if (result.isPresent()) {

src/test/java/build/buf/protovalidate/FormatTest.java

Lines changed: 29 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -30,10 +30,10 @@
3030
import java.nio.file.Paths;
3131
import java.util.ArrayList;
3232
import java.util.Arrays;
33-
import java.util.Collections;
3433
import java.util.HashMap;
3534
import java.util.List;
3635
import java.util.Map;
36+
import java.util.stream.Collectors;
3737
import java.util.stream.Stream;
3838
import org.junit.jupiter.api.BeforeAll;
3939
import org.junit.jupiter.api.Named;
@@ -79,34 +79,33 @@ class FormatTest {
7979
"object inside map");
8080

8181
@BeforeAll
82-
static void setUp() throws Exception {
83-
byte[] encoded =
84-
Files.readAllBytes(
85-
Paths.get("src/test/resources/testdata/string_ext_" + CEL_SPEC_VERSION + ".textproto"));
86-
String data = new String(encoded, StandardCharsets.UTF_8);
87-
SimpleTestFile.Builder bldr = SimpleTestFile.newBuilder();
88-
TextFormat.getParser().merge(data, bldr);
89-
SimpleTestFile testData = bldr.build();
90-
91-
List<SimpleTestSection> sections = testData.getSectionList();
82+
private static void setUp() throws Exception {
83+
// The test data from the cel-spec conformance tests
84+
List<SimpleTestSection> celSpecSections =
85+
loadTestData("src/test/resources/testdata/string_ext_" + CEL_SPEC_VERSION + ".textproto");
86+
// Our supplemental tests of functionality not in the cel conformance file, but defined in the
87+
// spec.
88+
List<SimpleTestSection> supplementalSections =
89+
loadTestData("src/test/resources/testdata/string_ext_supplemental.textproto");
90+
91+
// Combine the test data from both files into one
92+
List<SimpleTestSection> sections =
93+
Stream.concat(celSpecSections.stream(), supplementalSections.stream())
94+
.collect(Collectors.toList());
9295

9396
// Find the format tests which test successful formatting
94-
// Defaults to an empty list if nothing is found
9597
formatTests =
9698
sections.stream()
9799
.filter(s -> s.getName().equals("format"))
98-
.findFirst()
99-
.map(SimpleTestSection::getTestList)
100-
.orElse(Collections.emptyList());
100+
.flatMap(s -> s.getTestList().stream())
101+
.collect(Collectors.toList());
101102

102103
// Find the format error tests which test errors during formatting
103-
// Defaults to an empty list if nothing is found
104104
formatErrorTests =
105105
sections.stream()
106106
.filter(s -> s.getName().equals("format_errors"))
107-
.findFirst()
108-
.map(SimpleTestSection::getTestList)
109-
.orElse(Collections.emptyList());
107+
.flatMap(s -> s.getTestList().stream())
108+
.collect(Collectors.toList());
110109

111110
env = Env.newEnv(Library.Lib(new ValidateLibrary()));
112111
}
@@ -127,6 +126,17 @@ void testFormatError(SimpleTest test) {
127126
assertThat(result.getVal().type().typeEnum()).isEqualTo(TypeEnum.Err);
128127
}
129128

129+
// Loads test data from the given text format file
130+
private static List<SimpleTestSection> loadTestData(String fileName) throws Exception {
131+
byte[] encoded = Files.readAllBytes(Paths.get(fileName));
132+
String data = new String(encoded, StandardCharsets.UTF_8);
133+
SimpleTestFile.Builder bldr = SimpleTestFile.newBuilder();
134+
TextFormat.getParser().merge(data, bldr);
135+
SimpleTestFile testData = bldr.build();
136+
137+
return testData.getSectionList();
138+
}
139+
130140
// Runs a test by extending the cel environment with the specified
131141
// types, variables and declarations, then evaluating it with the cel runtime.
132142
private static Program.EvalResult evaluate(SimpleTest test) {
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# proto-file: ../../../proto/cel/expr/conformance/test/simple.proto
2+
# proto-message: cel.expr.conformance.test.SimpleTestFile
3+
4+
# Ideally these tests should be in the cel-spec conformance test suite.
5+
# Until they are added, we can use this to test for additional functionality
6+
# listed in the spec.
7+
8+
name: "string_ext_supplemental"
9+
description: "Supplemental tests for the strings extension library."
10+
section: {
11+
name: "format"
12+
test: {
13+
name: "bytes support for string with invalid utf-8 encoding"
14+
expr: '"%s".format([b"\xF0abc\x8C\xF0xyz"])'
15+
value: {
16+
string_value: '\ufffdabc\ufffdxyz',
17+
}
18+
}
19+
test: {
20+
name: "bytes support for string with only invalid utf-8 sequences"
21+
expr: '"%s".format([b"\xF0\x8C\xF0"])'
22+
value: {
23+
string_value: '\ufffd',
24+
}
25+
}
26+
}

0 commit comments

Comments
 (0)