From 64753551997e5d32035da5b33c59876681b42b49 Mon Sep 17 00:00:00 2001 From: Alexander Graul Date: Tue, 12 Aug 2025 11:47:24 +0200 Subject: [PATCH] Simplify utils.json.find_json function The previous implementation computed all combinations of potential JSON documents and tried to `json.loads()`them. That resumted in num({) * num(}) tries, which could take hours on large inputs. The approach implemented with this change simplifies the work we do: we only look for opening '{' and '[' characters, and try to parse the rest of input string with JSONDecoder.raw_decode. This method ignores extraneous data at the end and is faster than doing it ourselves in Python. --- changelog/68258.fixed.md | 1 + salt/utils/json.py | 76 +++++++++-------------------------- tests/unit/utils/test_json.py | 6 +++ 3 files changed, 25 insertions(+), 58 deletions(-) create mode 100644 changelog/68258.fixed.md diff --git a/changelog/68258.fixed.md b/changelog/68258.fixed.md new file mode 100644 index 000000000000..a9afeccef71b --- /dev/null +++ b/changelog/68258.fixed.md @@ -0,0 +1 @@ +Simplied and sped up `utils.json.find_json` function diff --git a/salt/utils/json.py b/salt/utils/json.py index c74cce6ee859..1605e75f9f3c 100644 --- a/salt/utils/json.py +++ b/salt/utils/json.py @@ -2,6 +2,7 @@ Functions to work with JSON """ +import contextlib import json import logging @@ -24,66 +25,25 @@ def __split(raw): return raw.splitlines() -def find_json(raw): - """ - Pass in a raw string and load the json when it starts. This allows for a - string to start or end with garbage but the JSON be cleanly loaded - """ - ret = {} - lines = __split(raw) - lengths = list(map(len, lines)) - starts = [] - ends = [] - - # Search for possible starts and ends of the json fragments - for ind, line in enumerate(lines): - line = line.lstrip() - line = line[0] if line else line - if line == "{" or line == "[": - starts.append((ind, line)) - if line == "}" or line == "]": - ends.append((ind, line)) - - # List all the possible pairs of starts and ends, - # and fill the length of each block to sort by size after - starts_ends = [] - for start, start_char in starts: - for end, end_br in reversed(ends): - if end > start and ( - (start_char == "{" and end_br == "}") - or (start_char == "[" and end_br == "]") - ): - starts_ends.append((start, end, sum(lengths[start : end + 1]))) - - # Iterate through all the possible pairs starting from the largest - starts_ends.sort(key=lambda x: (x[2], x[1] - x[0], x[0]), reverse=True) - for start, end, _ in starts_ends: - # Try filtering non-JSON text right after the last closing character - end_str = lines[end].lstrip()[0] - working = "\n".join(lines[start:end]) + end_str - try: - ret = json.loads(working) - return ret - except ValueError: - continue +def find_json(s: str): + """Pass in a string and load JSON within it. - # Fall back to old implementation for backward compatibility - # expecting json after the text - for ind, _ in enumerate(lines): - try: - working = "\n".join(lines[ind:]) - except UnicodeDecodeError: - working = "\n".join(salt.utils.data.decode(lines[ind:])) + The string may contain non-JSON text before and after the JSON document. - try: - ret = json.loads(working) - except ValueError: - continue - if ret: - return ret - if not ret: - # Not json, raise an error - raise ValueError + Raises ValueError if no valid JSON was found. + """ + decoder = json.JSONDecoder() + + # We look for the beginning of JSON objects / arrays and let raw_decode() handle + # extraneous data at the end. + for idx, char in enumerate(s): + if char == "{" or char == "[": + # JSONDecodeErrors are expected on stray '{'/'[' in the non-JSON part + with contextlib.suppress(json.JSONDecodeError): + data, _ = decoder.raw_decode(s[idx:]) + return data + + raise ValueError def import_json(): diff --git a/tests/unit/utils/test_json.py b/tests/unit/utils/test_json.py index 4aa123bf8061..3770d2c73f5c 100644 --- a/tests/unit/utils/test_json.py +++ b/tests/unit/utils/test_json.py @@ -49,6 +49,12 @@ class JSONTestCase(TestCase): ) ) + def test_find_json_unbalanced_brace_in_string(self): + test_sample_json = '{"title": "I like curly braces like this one:{"}' + expected_ret = {"title": "I like curly braces like this one:{"} + ret = salt.utils.json.find_json(test_sample_json) + self.assertDictEqual(ret, expected_ret) + def test_find_json(self): test_sample_json = """ {