oumi-ai
diff --git a/‎configs/projects/judges/qa/relevance.yaml‎
Lines changed: 32 additions & 21 deletions b/‎configs/projects/judges/qa/relevance.yaml‎
Lines changed: 32 additions & 21 deletions
diff --git a/‎src/oumi/__init__.py‎
Lines changed: 15 additions & 13 deletions b/‎src/oumi/__init__.py‎
Lines changed: 15 additions & 13 deletions
diff --git a/‎src/oumi/cli/judge_v2.py‎
Lines changed: 2 additions & 28 deletions b/‎src/oumi/cli/judge_v2.py‎
Lines changed: 2 additions & 28 deletions
diff --git a/‎src/oumi/core/configs/__init__.py‎
Lines changed: 6 additions & 6 deletions b/‎src/oumi/core/configs/__init__.py‎
Lines changed: 6 additions & 6 deletions
@@ -1,25 +1,36 @@
-system_instruction: |
-  You are an expert evaluator tasked with assessing the relevance of an answer to a given question.
+judge_params:
+  system_instruction: |
+    You are an expert evaluator tasked with assessing the relevance of an answer to a given question.
 
-  Specifically, you need to assess whether the answer:
-  - Responds to the specific question being asked
-  - Stays on topic and doesn't drift to unrelated subjects
-  - Provides information that is pertinent to what was requested
+    Specifically, you need to assess whether the answer:
+    - Responds to the specific question being asked
+    - Stays on topic and doesn't drift to unrelated subjects
+    - Provides information that is pertinent to what was requested
 
-  Note: An answer can be relevant even if it's incomplete, incorrect, or admits uncertainty.
+    Note: An answer can be relevant even if it's incomplete, incorrect, or admits uncertainty.
 
-prompt_template: |
-  Here is the data:
-  [BEGIN DATA]
-  ***
-  [Question]:
-  {question}
-  ***
-  [Answer]:
-  {answer}
-  ***
-  [END DATA]
+  prompt_template: |
+    Here is the data:
+    [BEGIN DATA]
+    ***
+    [Question]:
+    {question}
+    ***
+    [Answer]:
+    {answer}
+    ***
+    [END DATA]
 
-response_format: JSON
-judgment_type: BOOL
-include_explanation: True
+  response_format: JSON
+  judgment_type: BOOL
+  include_explanation: True
+
+inference_config:
+  model:
+    model_name: "gpt-4o"
+
+  engine: OPENAI
+
+  generation:
+    max_new_tokens: 8192
+    temperature: 1.0
@@ -244,7 +244,6 @@ def judge_dataset(config: JudgeConfig, dataset: BaseSftDataset) -> list[dict[str
 
 def judge_v2_dataset(
     judge_config: JudgeConfigV2 | str,
-    inference_config: InferenceConfig,
     dataset: list[dict[str, str]],
 ) -> list[JudgeOutput]:
     """Judge a dataset using Oumi's Judge framework.
@@ -258,10 +257,7 @@ def judge_v2_dataset(
         3. Returns structured JudgeOutput objects containing parsed results.
 
     Args:
-        judge_config: JudgeConfig object or path to a judge config;
-            includes prompt template, response format, and output field specifications.
-        inference_config: The configuration for inference, including model settings,
-            generation parameters, and engine type.
+        judge_config: JudgeConfig object or path to a judge config file.
         dataset: List of dictionaries containing input data for evaluation. Each
             dictionary should contain key-value pairs that match placeholders in
             the judge's prompt template (e.g., {'question': '...', 'answer': '...'}).
@@ -274,24 +270,30 @@ def judge_v2_dataset(
             - field_scores: Numeric scores for applicable fields
 
     Example:
-        >>> config = JudgeConfig(
-        ...     prompt_template="Is this answer helpful? "
-        ...                     "Question: {question} Answer: {answer}",
-        ...     judgment_type=JudgeOutputType.BOOL,
-        ...     response_format=JudgeResponseFormat.JSON
-        ...     ...
+        >>> judge_config = JudgeConfig( # doctest: +SKIP
+        ...     judge_params=JudgeParams(
+        ...         prompt_template="Is this helpful? {question}, {answer}",
+        ...         response_format=JudgeResponseFormat.XML,
+        ...         judgment_type=JudgeOutputType.BOOL,
+        ...         include_explanation=False
+        ...     ),
+        ...     inference_config=InferenceConfig(
+        ...         model=ModelParams(model_name="gpt-4.1"),
+        ...         generation=GenerationParams(max_tokens=100),
+        ...         engine=InferenceEngineType.OPENAI
+        ...     )
         ... )
         >>> dataset = [
         ...     {'question': 'What is 2+2?', 'answer': '4'},
         ...     {'question': 'How to cook?', 'answer': 'I dont know'}
         ... ]
-        >>> judged_outputs = judge_dataset(judge_config, inference_config, dataset)
+        >>> judged_outputs = judge_dataset(judge_config, dataset)
         >>> for output in judged_outputs:
         ...     print(output.field_values)  # e.g., {'judgment': True}
     """
     import oumi.judge_v2
 
-    return oumi.judge_v2.judge_dataset(judge_config, inference_config, dataset)
+    return oumi.judge_v2.judge_dataset(judge_config, dataset)
 
 
 def train(
 
@@ -13,40 +13,23 @@
 # limitations under the License.
 
 from pathlib import Path
-from typing import TYPE_CHECKING, Annotated, Optional
+from typing import Annotated, Optional
 
 import typer
 from rich.table import Table
 
 from oumi.cli import cli_utils
 
-if TYPE_CHECKING:
-    from oumi.core.configs import InferenceConfig
-
-
-def _load_inference_config(config: str, extra_args: list[str]) -> "InferenceConfig":
-    from oumi.core.configs import InferenceConfig
-
-    if not Path(config).exists():
-        typer.echo(f"Config file not found: '{config}'")
-        raise typer.Exit(code=1)
-
-    return InferenceConfig.from_yaml_and_arg_list(config, extra_args)
-
 
 def judge_file(
     ctx: typer.Context,
     judge_config: Annotated[
         str,
         typer.Option(
             "--judge-config",
-            help="Path to the judge config file or built-in judge name",
+            help="Path to the judge config file",
         ),
     ],
-    inference_config: Annotated[
-        str,
-        typer.Option("--inference-config", help="Path to the inference config file"),
-    ],
     input_file: Annotated[
         str, typer.Option("--input-file", help="Path to the dataset input file (jsonl)")
     ],
@@ -68,14 +51,6 @@ def judge_file(
     # Resolve judge config
     judge_config_obj = JudgeConfig.from_path(path=judge_config, extra_args=extra_args)
 
-    # Load inference config from file
-    inference_config_path = str(
-        cli_utils.resolve_and_fetch_config(
-            inference_config,
-        )
-    )
-    inference_config_obj = _load_inference_config(inference_config_path, extra_args)
-
     # Ensure the dataset input file exists
     if not Path(input_file).exists():
         typer.echo(f"Input file not found: '{input_file}'")
@@ -84,7 +59,6 @@ def judge_file(
     # Judge the dataset
     judge_outputs = judge_v2.judge_file(
         judge_config=judge_config_obj,
-        inference_config=inference_config_obj,
         input_file=input_file,
         output_file=output_file,
     )
 
@@ -60,8 +60,8 @@
     - :class:`~oumi.core.configs.judge_config.JudgeAttribute`
     - :class:`~oumi.core.configs.judge_config.JudgeAttributeValueType`
     - :class:`~oumi.core.configs.judge_config_v2.JudgeConfig`
-    - :class:`~oumi.core.configs.judge_config_v2.JudgeOutputType`
-    - :class:`~oumi.core.configs.judge_config_v2.JudgeResponseFormat`
+    - :class:`~oumi.core.configs.params.judge_params.JudgeOutputType`
+    - :class:`~oumi.core.configs.params.judge_params.JudgeResponseFormat`
 
 Example:
     >>> from oumi.core.configs import ModelParams, TrainingConfig, TrainingParams
@@ -93,10 +93,6 @@
 from oumi.core.configs.judge_config_v2 import (
     JudgeConfig as JudgeConfigV2,
 )
-from oumi.core.configs.judge_config_v2 import (
-    JudgeOutputType,
-    JudgeResponseFormat,
-)
 from oumi.core.configs.params.data_params import (
     DataParams,
     DatasetParams,
@@ -120,6 +116,10 @@
 from oumi.core.configs.params.generation_params import GenerationParams
 from oumi.core.configs.params.grpo_params import GrpoParams
 from oumi.core.configs.params.guided_decoding_params import GuidedDecodingParams
+from oumi.core.configs.params.judge_params import (
+    JudgeOutputType,
+    JudgeResponseFormat,
+)
 from oumi.core.configs.params.model_params import ModelParams
 from oumi.core.configs.params.peft_params import (
     LoraWeightInitialization,