Skip to content

Commit 90f47d8

Browse files
committed
Simplify utils.find_json function
Instead of trying to parse all combinations of json blobs to find the largest valid one, track open JSON objects and lists and try to parse them one-by-one, ordered by size.
1 parent 4a84820 commit 90f47d8

File tree

1 file changed

+35
-55
lines changed

1 file changed

+35
-55
lines changed

salt/utils/json.py

Lines changed: 35 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -24,66 +24,46 @@ def __split(raw):
2424
return raw.splitlines()
2525

2626

27-
def find_json(raw):
28-
"""
29-
Pass in a raw string and load the json when it starts. This allows for a
30-
string to start or end with garbage but the JSON be cleanly loaded
27+
def find_json(s):
28+
"""Pass in a string and load json within it.
29+
30+
The string may contain non-json text before and after the a json blob.
3131
"""
32-
ret = {}
33-
lines = __split(raw)
34-
lengths = list(map(len, lines))
35-
starts = []
36-
ends = []
37-
38-
# Search for possible starts and ends of the json fragments
39-
for ind, line in enumerate(lines):
40-
line = line.lstrip()
41-
line = line[0] if line else line
42-
if line == "{" or line == "[":
43-
starts.append((ind, line))
44-
if line == "}" or line == "]":
45-
ends.append((ind, line))
46-
47-
# List all the possible pairs of starts and ends,
48-
# and fill the length of each block to sort by size after
49-
starts_ends = []
50-
for start, start_char in starts:
51-
for end, end_br in reversed(ends):
52-
if end > start and (
53-
(start_char == "{" and end_br == "}")
54-
or (start_char == "[" and end_br == "]")
55-
):
56-
starts_ends.append((start, end, sum(lengths[start : end + 1])))
57-
58-
# Iterate through all the possible pairs starting from the largest
59-
starts_ends.sort(key=lambda x: (x[2], x[1] - x[0], x[0]), reverse=True)
60-
for start, end, _ in starts_ends:
61-
# Try filtering non-JSON text right after the last closing character
62-
end_str = lines[end].lstrip()[0]
63-
working = "\n".join(lines[start:end]) + end_str
32+
# Contains tuples (start_idx, end_idx, length) for JSON objects / lists
33+
json_structures = []
34+
35+
# Track currently open objects
36+
open_obj = []
37+
open_list = []
38+
39+
# Iterate through all chars, saving JSON structures when they are closed
40+
for idx, char in enumerate(s):
41+
if char == "{":
42+
open_obj.append(idx)
43+
elif char == "}":
44+
start = open_obj.pop()
45+
json_structures.append((start, idx, idx - start))
46+
elif char == "[":
47+
open_list.append(idx)
48+
elif char == "]":
49+
start = open_list.pop()
50+
json_structures.append((start, idx, idx - start))
51+
52+
def length_start(x):
53+
"""Sort key function. Sort by length, then by start index."""
54+
return x[2], x[0]
55+
56+
json_structures.sort(key=length_start)
57+
58+
while json_structures:
59+
start, end, _ = json_structures.pop()
60+
substring = s[start : end + 1]
6461
try:
65-
ret = json.loads(working)
62+
ret = json.loads(substring)
6663
return ret
6764
except ValueError:
6865
continue
69-
70-
# Fall back to old implementation for backward compatibility
71-
# expecting json after the text
72-
for ind, _ in enumerate(lines):
73-
try:
74-
working = "\n".join(lines[ind:])
75-
except UnicodeDecodeError:
76-
working = "\n".join(salt.utils.data.decode(lines[ind:]))
77-
78-
try:
79-
ret = json.loads(working)
80-
except ValueError:
81-
continue
82-
if ret:
83-
return ret
84-
if not ret:
85-
# Not json, raise an error
86-
raise ValueError
66+
raise ValueError
8767

8868

8969
def import_json():

0 commit comments

Comments
 (0)