Skip to content

Commit 2a17ab3

Browse files
feat: support JSON schema (#63)
<!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit - **New Features** - Added functionality to generate JSON schemas from arguments, enabling integration with JSON editors like Visual Studio Code. - **Documentation** - Introduced new documentation on generating JSON schemas from arguments. - **Tests** - Added tests to validate JSON schema generation and type conversion. - **Chores** - Updated dependencies to include `jsonschema` for testing. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: Jinzhe Zeng <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent d34601e commit 2a17ab3

File tree

7 files changed

+249
-2
lines changed

7 files changed

+249
-2
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,4 @@ Please refer to test files for detailed usage.
3030
- [PEP 484](https://peps.python.org/pep-0484/) type annotations
3131
- Native integration with [Sphinx](https://github.com/sphinx-doc/sphinx), [DP-GUI](https://github.com/deepmodeling/dpgui), and [Jupyter Notebook](https://jupyter.org/)
3232
- JSON encoder for `Argument` and `Variant` classes
33+
- Generate [JSON schema](https://json-schema.org/) from an `Argument`, which can be further integrated with JSON editors such as [Visual Studio Code](https://code.visualstudio.com/)

dargs/dargs.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -460,11 +460,15 @@ def _check_data(self, value: Any, path=None):
460460
)
461461

462462
def _check_strict(self, value: dict, path=None):
463-
allowed_keys = self.flatten_sub(value, path).keys()
463+
allowed_keys = set(self.flatten_sub(value, path).keys())
464464
# curpath = [*path, self.name]
465465
if not len(allowed_keys):
466466
# no allowed keys defined, allow any keys
467467
return
468+
# A special case to allow $schema in any dict to be compatible with vscode + json schema
469+
# https://code.visualstudio.com/docs/languages/json#_mapping-in-the-json
470+
# considering usually it's not a typo of users when they use $schema
471+
allowed_keys.add("$schema")
468472
for name in value.keys():
469473
if name not in allowed_keys:
470474
dym_message = did_you_mean(name, allowed_keys)

dargs/json_schema.py

Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
"""Generate JSON schema from a given dargs.Argument."""
2+
3+
from __future__ import annotations
4+
5+
from typing import Any
6+
7+
from dargs.dargs import Argument, _Flags
8+
9+
try:
10+
from typing import get_origin
11+
except ImportError:
12+
from typing_extensions import get_origin
13+
14+
15+
def generate_json_schema(argument: Argument, id: str = "") -> dict:
16+
"""Generate JSON schema from a given dargs.Argument.
17+
18+
Parameters
19+
----------
20+
argument : Argument
21+
The argument to generate JSON schema.
22+
id : str, optional
23+
The URL of the schema, by default "".
24+
25+
Returns
26+
-------
27+
dict
28+
The JSON schema. Use :func:`json.dump` to save it to a file
29+
or :func:`json.dumps` to get a string.
30+
31+
Examples
32+
--------
33+
Dump the JSON schema of DeePMD-kit to a file:
34+
35+
>>> from dargs.json_schema import generate_json_schema
36+
>>> from deepmd.utils.argcheck import gen_args
37+
>>> import json
38+
>>> from dargs import Argument
39+
>>> a = Argument("DeePMD-kit", dtype=dict, sub_fields=gen_args())
40+
>>> schema = generate_json_schema(a)
41+
>>> with open("deepmd.json", "w") as f:
42+
... json.dump(schema, f, indent=2)
43+
"""
44+
schema = {
45+
"$schema": "https://json-schema.org/draft/2020-12/schema",
46+
"$id": id,
47+
"title": argument.name,
48+
**_convert_single_argument(argument),
49+
}
50+
return schema
51+
52+
53+
def _convert_single_argument(argument: Argument) -> dict:
54+
"""Convert a single argument to JSON schema.
55+
56+
Parameters
57+
----------
58+
argument : Argument
59+
The argument to convert.
60+
61+
Returns
62+
-------
63+
dict
64+
The JSON schema of the argument.
65+
"""
66+
data = {
67+
"description": argument.doc,
68+
"type": list({_convert_types(tt) for tt in argument.dtype}),
69+
}
70+
if argument.default is not _Flags.NONE:
71+
data["default"] = argument.default
72+
properties = {
73+
**{
74+
nn: _convert_single_argument(aa)
75+
for aa in argument.sub_fields.values()
76+
for nn in (aa.name, *aa.alias)
77+
},
78+
**{
79+
vv.flag_name: {
80+
"type": "string",
81+
"enum": list(vv.choice_dict.keys()) + list(vv.choice_alias.keys()),
82+
"default": vv.default_tag,
83+
"description": vv.doc,
84+
}
85+
for vv in argument.sub_variants.values()
86+
},
87+
}
88+
required = [
89+
aa.name
90+
for aa in argument.sub_fields.values()
91+
if not aa.optional and not aa.alias
92+
] + [vv.flag_name for vv in argument.sub_variants.values() if not vv.optional]
93+
allof = [
94+
{
95+
"if": {
96+
"oneOf": [
97+
{
98+
"properties": {vv.flag_name: {"const": kkaa}},
99+
}
100+
for kkaa in (kk, *aa.alias)
101+
],
102+
"required": [vv.flag_name]
103+
if not (vv.optional and vv.default_tag == kk)
104+
else [],
105+
},
106+
"then": _convert_single_argument(aa),
107+
}
108+
for vv in argument.sub_variants.values()
109+
for kk, aa in vv.choice_dict.items()
110+
]
111+
allof += [
112+
{"oneOf": [{"required": [nn]} for nn in (aa.name, *aa.alias)]}
113+
for aa in argument.sub_fields.values()
114+
if not aa.optional and aa.alias
115+
]
116+
if not argument.repeat:
117+
data["properties"] = properties
118+
data["required"] = required
119+
if allof:
120+
data["allOf"] = allof
121+
else:
122+
data["items"] = {
123+
"type": "object",
124+
"properties": properties,
125+
"required": required,
126+
}
127+
if allof:
128+
data["items"]["allOf"] = allof
129+
return data
130+
131+
132+
def _convert_types(T: type | Any | None) -> str:
133+
"""Convert a type to JSON schema type.
134+
135+
Parameters
136+
----------
137+
T : type | Any | None
138+
The type to convert.
139+
140+
Returns
141+
-------
142+
str
143+
The JSON schema type.
144+
"""
145+
# string, number, integer, object, array, boolean, null
146+
if T is None or T is type(None):
147+
return "null"
148+
elif T is str:
149+
return "string"
150+
elif T in (int, float):
151+
return "number"
152+
elif T is bool:
153+
return "boolean"
154+
elif T is list or get_origin(T) is list:
155+
return "array"
156+
elif T is dict or get_origin(T) is dict:
157+
return "object"
158+
raise ValueError(f"Unknown type: {T}")

docs/json_schema.md

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
## Generate JSON schema from an argument
2+
3+
One can use {func}`dargs.json_schema_generate_json_schema` to generate [JSON schema](https://json-schema.org/).
4+
5+
```py
6+
import json
7+
8+
from dargs import Argument
9+
from dargs.json_schema import generate_json_schema
10+
from deepmd.utils.argcheck import gen_args
11+
12+
13+
a = Argument("DeePMD-kit", dtype=dict, sub_fields=gen_args())
14+
schema = generate_json_schema(a)
15+
with open("deepmd.json", "w") as f:
16+
json.dump(schema, f, indent=2)
17+
```
18+
19+
JSON schema can be used in several JSON editors. For example, in [Visual Studio Code](https://code.visualstudio.com/), you can [configure JSON schema](https://code.visualstudio.com/docs/languages/json#_json-schemas-and-settings) in the project `settings.json`:
20+
21+
```json
22+
{
23+
"json.schemas": [
24+
{
25+
"fileMatch": [
26+
"/**/*.json"
27+
],
28+
"url": "./deepmd.json"
29+
}
30+
]
31+
}
32+
```
33+
34+
VS Code also allows one to [specify the JSON schema in a JSON file](https://code.visualstudio.com/docs/languages/json#_mapping-in-the-json) with the `$schema` key.
35+
To be compatible, dargs will not throw an error for `$schema` in the strict mode even if `$schema` is not defined in the argument.
36+
37+
```json
38+
{
39+
"$schema": "./deepmd.json",
40+
"model": {}
41+
}
42+
```

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ repository = "https://github.com/deepmodeling/dargs"
3030
[project.optional-dependencies]
3131
test = [
3232
"ipython",
33+
"jsonschema",
3334
]
3435
typecheck = [
3536
"basedpyright==1.12.2",

tests/dpmdargs.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ def descrpt_hybrid_args():
216216
"type",
217217
[
218218
Argument("loc_frame", dict, descrpt_local_frame_args()),
219-
Argument("se_a", dict, descrpt_se_a_args()),
219+
Argument("se_e2_a", dict, descrpt_se_a_args(), alias=["se_a"]),
220220
Argument("se_r", dict, descrpt_se_r_args()),
221221
Argument(
222222
"se_a_3be", dict, descrpt_se_a_3be_args(), alias=["se_at"]
@@ -764,8 +764,19 @@ def normalize(data):
764764
return data
765765

766766

767+
def gen_args() -> Argument:
768+
ma = model_args()
769+
lra = learning_rate_args()
770+
la = loss_args()
771+
ta = training_args()
772+
773+
base = Argument("base", dict, [ma, lra, la, ta])
774+
return base
775+
776+
767777
example_json_str = """
768778
{
779+
"$schema": "this should be ignored by dargs",
769780
"_comment": " model parameters",
770781
"model": {
771782
"type_map": ["O", "H"],

tests/test_json_schema.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from __future__ import annotations
2+
3+
import json
4+
import unittest
5+
6+
from jsonschema import validate
7+
8+
from dargs.json_schema import _convert_types, generate_json_schema
9+
10+
from .dpmdargs import example_json_str, gen_args
11+
12+
13+
class TestJsonSchema(unittest.TestCase):
14+
def test_json_schema(self):
15+
args = gen_args()
16+
schema = generate_json_schema(args)
17+
data = json.loads(example_json_str)
18+
validate(data, schema)
19+
20+
def test_convert_types(self):
21+
self.assertEqual(_convert_types(int), "number")
22+
self.assertEqual(_convert_types(str), "string")
23+
self.assertEqual(_convert_types(float), "number")
24+
self.assertEqual(_convert_types(bool), "boolean")
25+
self.assertEqual(_convert_types(None), "null")
26+
self.assertEqual(_convert_types(type(None)), "null")
27+
self.assertEqual(_convert_types(list), "array")
28+
self.assertEqual(_convert_types(dict), "object")
29+
with self.assertRaises(ValueError):
30+
_convert_types(set)

0 commit comments

Comments
 (0)