Skip to content

Commit 5ced77a

Browse files
authored
Judge API V2 | Merge Judge and Inference configs (#1776)
1 parent 0be6323 commit 5ced77a

File tree

15 files changed

+591
-526
lines changed

15 files changed

+591
-526
lines changed
Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,36 @@
1-
system_instruction: |
2-
You are an expert evaluator tasked with assessing the relevance of an answer to a given question.
1+
judge_params:
2+
system_instruction: |
3+
You are an expert evaluator tasked with assessing the relevance of an answer to a given question.
34
4-
Specifically, you need to assess whether the answer:
5-
- Responds to the specific question being asked
6-
- Stays on topic and doesn't drift to unrelated subjects
7-
- Provides information that is pertinent to what was requested
5+
Specifically, you need to assess whether the answer:
6+
- Responds to the specific question being asked
7+
- Stays on topic and doesn't drift to unrelated subjects
8+
- Provides information that is pertinent to what was requested
89
9-
Note: An answer can be relevant even if it's incomplete, incorrect, or admits uncertainty.
10+
Note: An answer can be relevant even if it's incomplete, incorrect, or admits uncertainty.
1011
11-
prompt_template: |
12-
Here is the data:
13-
[BEGIN DATA]
14-
***
15-
[Question]:
16-
{question}
17-
***
18-
[Answer]:
19-
{answer}
20-
***
21-
[END DATA]
12+
prompt_template: |
13+
Here is the data:
14+
[BEGIN DATA]
15+
***
16+
[Question]:
17+
{question}
18+
***
19+
[Answer]:
20+
{answer}
21+
***
22+
[END DATA]
2223
23-
response_format: JSON
24-
judgment_type: BOOL
25-
include_explanation: True
24+
response_format: JSON
25+
judgment_type: BOOL
26+
include_explanation: True
27+
28+
inference_config:
29+
model:
30+
model_name: "gpt-4o"
31+
32+
engine: OPENAI
33+
34+
generation:
35+
max_new_tokens: 8192
36+
temperature: 1.0

src/oumi/__init__.py

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,6 @@ def judge_dataset(config: JudgeConfig, dataset: BaseSftDataset) -> list[dict[str
244244

245245
def judge_v2_dataset(
246246
judge_config: JudgeConfigV2 | str,
247-
inference_config: InferenceConfig,
248247
dataset: list[dict[str, str]],
249248
) -> list[JudgeOutput]:
250249
"""Judge a dataset using Oumi's Judge framework.
@@ -258,10 +257,7 @@ def judge_v2_dataset(
258257
3. Returns structured JudgeOutput objects containing parsed results.
259258
260259
Args:
261-
judge_config: JudgeConfig object or path to a judge config;
262-
includes prompt template, response format, and output field specifications.
263-
inference_config: The configuration for inference, including model settings,
264-
generation parameters, and engine type.
260+
judge_config: JudgeConfig object or path to a judge config file.
265261
dataset: List of dictionaries containing input data for evaluation. Each
266262
dictionary should contain key-value pairs that match placeholders in
267263
the judge's prompt template (e.g., {'question': '...', 'answer': '...'}).
@@ -274,24 +270,30 @@ def judge_v2_dataset(
274270
- field_scores: Numeric scores for applicable fields
275271
276272
Example:
277-
>>> config = JudgeConfig(
278-
... prompt_template="Is this answer helpful? "
279-
... "Question: {question} Answer: {answer}",
280-
... judgment_type=JudgeOutputType.BOOL,
281-
... response_format=JudgeResponseFormat.JSON
282-
... ...
273+
>>> judge_config = JudgeConfig( # doctest: +SKIP
274+
... judge_params=JudgeParams(
275+
... prompt_template="Is this helpful? {question}, {answer}",
276+
... response_format=JudgeResponseFormat.XML,
277+
... judgment_type=JudgeOutputType.BOOL,
278+
... include_explanation=False
279+
... ),
280+
... inference_config=InferenceConfig(
281+
... model=ModelParams(model_name="gpt-4.1"),
282+
... generation=GenerationParams(max_tokens=100),
283+
... engine=InferenceEngineType.OPENAI
284+
... )
283285
... )
284286
>>> dataset = [
285287
... {'question': 'What is 2+2?', 'answer': '4'},
286288
... {'question': 'How to cook?', 'answer': 'I dont know'}
287289
... ]
288-
>>> judged_outputs = judge_dataset(judge_config, inference_config, dataset)
290+
>>> judged_outputs = judge_dataset(judge_config, dataset)
289291
>>> for output in judged_outputs:
290292
... print(output.field_values) # e.g., {'judgment': True}
291293
"""
292294
import oumi.judge_v2
293295

294-
return oumi.judge_v2.judge_dataset(judge_config, inference_config, dataset)
296+
return oumi.judge_v2.judge_dataset(judge_config, dataset)
295297

296298

297299
def train(

src/oumi/cli/judge_v2.py

Lines changed: 2 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,40 +13,23 @@
1313
# limitations under the License.
1414

1515
from pathlib import Path
16-
from typing import TYPE_CHECKING, Annotated, Optional
16+
from typing import Annotated, Optional
1717

1818
import typer
1919
from rich.table import Table
2020

2121
from oumi.cli import cli_utils
2222

23-
if TYPE_CHECKING:
24-
from oumi.core.configs import InferenceConfig
25-
26-
27-
def _load_inference_config(config: str, extra_args: list[str]) -> "InferenceConfig":
28-
from oumi.core.configs import InferenceConfig
29-
30-
if not Path(config).exists():
31-
typer.echo(f"Config file not found: '{config}'")
32-
raise typer.Exit(code=1)
33-
34-
return InferenceConfig.from_yaml_and_arg_list(config, extra_args)
35-
3623

3724
def judge_file(
3825
ctx: typer.Context,
3926
judge_config: Annotated[
4027
str,
4128
typer.Option(
4229
"--judge-config",
43-
help="Path to the judge config file or built-in judge name",
30+
help="Path to the judge config file",
4431
),
4532
],
46-
inference_config: Annotated[
47-
str,
48-
typer.Option("--inference-config", help="Path to the inference config file"),
49-
],
5033
input_file: Annotated[
5134
str, typer.Option("--input-file", help="Path to the dataset input file (jsonl)")
5235
],
@@ -68,14 +51,6 @@ def judge_file(
6851
# Resolve judge config
6952
judge_config_obj = JudgeConfig.from_path(path=judge_config, extra_args=extra_args)
7053

71-
# Load inference config from file
72-
inference_config_path = str(
73-
cli_utils.resolve_and_fetch_config(
74-
inference_config,
75-
)
76-
)
77-
inference_config_obj = _load_inference_config(inference_config_path, extra_args)
78-
7954
# Ensure the dataset input file exists
8055
if not Path(input_file).exists():
8156
typer.echo(f"Input file not found: '{input_file}'")
@@ -84,7 +59,6 @@ def judge_file(
8459
# Judge the dataset
8560
judge_outputs = judge_v2.judge_file(
8661
judge_config=judge_config_obj,
87-
inference_config=inference_config_obj,
8862
input_file=input_file,
8963
output_file=output_file,
9064
)

src/oumi/core/configs/__init__.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@
6060
- :class:`~oumi.core.configs.judge_config.JudgeAttribute`
6161
- :class:`~oumi.core.configs.judge_config.JudgeAttributeValueType`
6262
- :class:`~oumi.core.configs.judge_config_v2.JudgeConfig`
63-
- :class:`~oumi.core.configs.judge_config_v2.JudgeOutputType`
64-
- :class:`~oumi.core.configs.judge_config_v2.JudgeResponseFormat`
63+
- :class:`~oumi.core.configs.params.judge_params.JudgeOutputType`
64+
- :class:`~oumi.core.configs.params.judge_params.JudgeResponseFormat`
6565
6666
Example:
6767
>>> from oumi.core.configs import ModelParams, TrainingConfig, TrainingParams
@@ -93,10 +93,6 @@
9393
from oumi.core.configs.judge_config_v2 import (
9494
JudgeConfig as JudgeConfigV2,
9595
)
96-
from oumi.core.configs.judge_config_v2 import (
97-
JudgeOutputType,
98-
JudgeResponseFormat,
99-
)
10096
from oumi.core.configs.params.data_params import (
10197
DataParams,
10298
DatasetParams,
@@ -120,6 +116,10 @@
120116
from oumi.core.configs.params.generation_params import GenerationParams
121117
from oumi.core.configs.params.grpo_params import GrpoParams
122118
from oumi.core.configs.params.guided_decoding_params import GuidedDecodingParams
119+
from oumi.core.configs.params.judge_params import (
120+
JudgeOutputType,
121+
JudgeResponseFormat,
122+
)
123123
from oumi.core.configs.params.model_params import ModelParams
124124
from oumi.core.configs.params.peft_params import (
125125
LoraWeightInitialization,

0 commit comments

Comments
 (0)