diff --git a/src/uipath/_cli/_evals/_models/_evaluation_set.py b/src/uipath/_cli/_evals/_models/_evaluation_set.py index c51fcb561..714a88034 100644 --- a/src/uipath/_cli/_evals/_models/_evaluation_set.py +++ b/src/uipath/_cli/_evals/_models/_evaluation_set.py @@ -1,9 +1,15 @@ -from enum import Enum, IntEnum -from typing import Annotated, Any, Literal, Union +from enum import IntEnum +from typing import Any, Literal from pydantic import BaseModel, ConfigDict, Field from pydantic.alias_generators import to_camel +from uipath._cli._evals.mocks.types import ( + InputMockingStrategy, + MockingStrategy, + ToolSimulation, +) + class EvaluatorReference(BaseModel): """Reference to an evaluator with optional weight. @@ -52,32 +58,6 @@ def serialize(instance: "EvaluatorReference") -> Any: ) -class EvaluationSimulationTool(BaseModel): - name: str = Field(..., alias="name") - - -class MockingStrategyType(str, Enum): - LLM = "llm" - MOCKITO = "mockito" - UNKNOWN = "unknown" - - -class BaseMockingStrategy(BaseModel): - pass - - -class ModelSettings(BaseModel): - """Model Generation Parameters.""" - - model: str = Field(..., alias="model") - temperature: float | str | None = Field(default=None, alias="temperature") - top_p: float | None = Field(default=None, alias="topP") - top_k: int | None = Field(default=None, alias="topK") - frequency_penalty: float | None = Field(default=None, alias="frequencyPenalty") - presence_penalty: float | None = Field(default=None, alias="presencePenalty") - max_tokens: int | None = Field(default=None, alias="maxTokens") - - class EvaluationSetModelSettings(BaseModel): """Model setting overrides within evaluation sets with ID.""" @@ -88,75 +68,6 @@ class EvaluationSetModelSettings(BaseModel): temperature: float | str | None = Field(default=None, alias="temperature") -class LLMMockingStrategy(BaseMockingStrategy): - type: Literal[MockingStrategyType.LLM] = MockingStrategyType.LLM - prompt: str = Field(..., alias="prompt") - tools_to_simulate: list[EvaluationSimulationTool] = Field( - ..., alias="toolsToSimulate" - ) - model: ModelSettings | None = Field(None, alias="model") - - model_config = ConfigDict( - validate_by_name=True, validate_by_alias=True, extra="allow" - ) - - -class InputMockingStrategy(BaseModel): - prompt: str = Field(..., alias="prompt") - model: ModelSettings | None = Field(None, alias="model") - - model_config = ConfigDict( - validate_by_name=True, validate_by_alias=True, extra="allow" - ) - - -class MockingArgument(BaseModel): - args: list[Any] = Field(default_factory=lambda: [], alias="args") - kwargs: dict[str, Any] = Field(default_factory=lambda: {}, alias="kwargs") - - -class MockingAnswerType(str, Enum): - RETURN = "return" - RAISE = "raise" - - -class MockingAnswer(BaseModel): - type: MockingAnswerType - value: Any = Field(..., alias="value") - - -class MockingBehavior(BaseModel): - function: str = Field(..., alias="function") - arguments: MockingArgument = Field(..., alias="arguments") - then: list[MockingAnswer] = Field(..., alias="then") - - -class MockitoMockingStrategy(BaseMockingStrategy): - type: Literal[MockingStrategyType.MOCKITO] = MockingStrategyType.MOCKITO - behaviors: list[MockingBehavior] = Field(..., alias="config") - - model_config = ConfigDict( - validate_by_name=True, validate_by_alias=True, extra="allow" - ) - - -KnownMockingStrategy = Annotated[ - Union[LLMMockingStrategy, MockitoMockingStrategy], - Field(discriminator="type"), -] - - -class UnknownMockingStrategy(BaseMockingStrategy): - type: str = Field(..., alias="type") - - model_config = ConfigDict( - validate_by_name=True, validate_by_alias=True, extra="allow" - ) - - -MockingStrategy = Union[KnownMockingStrategy, UnknownMockingStrategy] - - class EvaluationItem(BaseModel): """Individual evaluation item within an evaluation set.""" @@ -201,7 +112,7 @@ class LegacyEvaluationItem(BaseModel): simulation_instructions: str | None = Field( default=None, alias="simulationInstructions" ) - tools_to_simulate: list[EvaluationSimulationTool] = Field( + tools_to_simulate: list[ToolSimulation] = Field( default_factory=list, alias="toolsToSimulate" ) diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py index 089f7da9a..04f24c680 100644 --- a/src/uipath/_cli/_evals/_runtime.py +++ b/src/uipath/_cli/_evals/_runtime.py @@ -88,6 +88,7 @@ clear_execution_context, set_execution_context, ) +from .mocks.types import MockingContext logger = logging.getLogger(__name__) @@ -507,7 +508,15 @@ async def _execute_eval( eval_item, runtime ) - set_execution_context(eval_item, self.span_collector, execution_id) + set_execution_context( + MockingContext( + strategy=eval_item.mocking_strategy, + name=eval_item.name, + inputs=eval_item.inputs, + ), + span_collector=self.span_collector, + execution_id=execution_id, + ) await self.event_bus.publish( EvaluationEvents.CREATE_EVAL_RUN, @@ -761,8 +770,16 @@ async def _generate_input_for_eval( self, eval_item: EvaluationItem, runtime: UiPathRuntimeProtocol ) -> EvaluationItem: """Use LLM to generate a mock input for an evaluation item.""" + expected_output = ( + getattr(eval_item, "evaluation_criterias", None) + or getattr(eval_item, "expected_output", None) + or {} + ) generated_input = await generate_llm_input( - eval_item, (await self.get_schema(runtime)).input + eval_item.input_mocking_strategy, + (await self.get_schema(runtime)).input, + expected_behavior=eval_item.expected_agent_behavior or "", + expected_output=expected_output, ) updated_eval_item = eval_item.model_copy(update={"inputs": generated_input}) return updated_eval_item diff --git a/src/uipath/_cli/_evals/mocks/input_mocker.py b/src/uipath/_cli/_evals/mocks/input_mocker.py index 14427a851..0c77aed9f 100644 --- a/src/uipath/_cli/_evals/mocks/input_mocker.py +++ b/src/uipath/_cli/_evals/mocks/input_mocker.py @@ -4,7 +4,9 @@ from datetime import datetime from typing import Any -from uipath._cli._evals._models._evaluation_set import EvaluationItem +from uipath._cli._evals.mocks.types import ( + InputMockingStrategy, +) from uipath.platform import UiPath from uipath.tracing import traced @@ -54,8 +56,10 @@ def get_input_mocking_prompt( @traced(name="__mocker__", recording=False) async def generate_llm_input( - evaluation_item: EvaluationItem, + mocking_strategy: InputMockingStrategy, input_schema: dict[str, Any], + expected_behavior: str, + expected_output: dict[str, Any], ) -> dict[str, Any]: """Generate synthetic input using an LLM based on the evaluation context.""" from .mocks import cache_manager_context @@ -68,18 +72,12 @@ async def generate_llm_input( if "additionalProperties" not in input_schema: input_schema["additionalProperties"] = False - expected_output = ( - getattr(evaluation_item, "evaluation_criterias", None) - or getattr(evaluation_item, "expected_output", None) - or {} - ) - prompt_generation_args = { "input_schema": json.dumps(input_schema), - "input_generation_instructions": evaluation_item.input_mocking_strategy.prompt - if evaluation_item.input_mocking_strategy + "input_generation_instructions": mocking_strategy.prompt + if mocking_strategy else "", - "expected_behavior": evaluation_item.expected_agent_behavior or "", + "expected_behavior": expected_behavior or "", "expected_output": json.dumps(expected_output), } @@ -94,11 +92,7 @@ async def generate_llm_input( }, } - model_parameters = ( - evaluation_item.input_mocking_strategy.model - if evaluation_item.input_mocking_strategy - else None - ) + model_parameters = mocking_strategy.model if mocking_strategy else None completion_kwargs = ( model_parameters.model_dump(by_alias=False, exclude_none=True) if model_parameters diff --git a/src/uipath/_cli/_evals/mocks/llm_mocker.py b/src/uipath/_cli/_evals/mocks/llm_mocker.py index 6766579d5..5e2c55d8c 100644 --- a/src/uipath/_cli/_evals/mocks/llm_mocker.py +++ b/src/uipath/_cli/_evals/mocks/llm_mocker.py @@ -6,10 +6,13 @@ from pydantic import BaseModel, TypeAdapter +from uipath._cli._evals.mocks.types import ( + LLMMockingStrategy, + MockingContext, +) from uipath.tracing import traced from uipath.tracing._utils import _SpanUtils -from .._models._evaluation_set import EvaluationItem, LLMMockingStrategy from .._models._mocks import ExampleCall from .mocker import ( Mocker, @@ -74,29 +77,27 @@ def pydantic_to_dict_safe(obj: Any) -> Any: class LLMMocker(Mocker): """LLM Based Mocker.""" - def __init__(self, evaluation_item: EvaluationItem): + def __init__(self, context: MockingContext): """LLM Mocker constructor.""" - self.evaluation_item = evaluation_item - assert isinstance(self.evaluation_item.mocking_strategy, LLMMockingStrategy) + self.context = context + assert isinstance(self.context.strategy, LLMMockingStrategy) @traced(name="__mocker__", recording=False) async def response( self, func: Callable[[T], R], params: dict[str, Any], *args: T, **kwargs ) -> R: """Respond with mocked response generated by an LLM.""" - assert isinstance(self.evaluation_item.mocking_strategy, LLMMockingStrategy) + assert isinstance(self.context.strategy, LLMMockingStrategy) function_name = params.get("name") or func.__name__ - if function_name in [ - x.name for x in self.evaluation_item.mocking_strategy.tools_to_simulate - ]: + if function_name in [x.name for x in self.context.strategy.tools_to_simulate]: from uipath.platform import UiPath from uipath.platform.chat._llm_gateway_service import _cleanup_schema from .mocks import ( cache_manager_context, - evaluation_context, execution_id_context, + mocking_context, span_collector_context, ) @@ -127,10 +128,10 @@ async def response( ] test_run_history = "(empty)" - eval_item = evaluation_context.get() + ctx = mocking_context.get() span_collector = span_collector_context.get() execution_id = execution_id_context.get() - if eval_item and span_collector and execution_id: + if ctx and span_collector and execution_id: spans = span_collector.get_spans(execution_id) test_run_history = _SpanUtils.spans_to_llm_context(spans) @@ -154,17 +155,17 @@ async def response( "kwargs": kwargs, }, "agentInfo": { # This is incomplete - # "agentName": self.evaluation_item.name, # to be obtained. - "actionName": self.evaluation_item.name, # Not sure if this is right? - "userInput": self.evaluation_item.inputs, + "agentName": self.context.name, + "actionName": self.context.name, # Not sure if this is right? + "userInput": self.context.inputs, }, - "testRunProctorInstructions": self.evaluation_item.mocking_strategy.prompt, + "testRunProctorInstructions": self.context.strategy.prompt, } prompt_generation_args = { k: json.dumps(pydantic_to_dict_safe(v)) for k, v in prompt_input.items() } - model_parameters = self.evaluation_item.mocking_strategy.model + model_parameters = self.context.strategy.model completion_kwargs = ( model_parameters.model_dump(by_alias=False, exclude_none=True) if model_parameters diff --git a/src/uipath/_cli/_evals/mocks/mocker_factory.py b/src/uipath/_cli/_evals/mocks/mocker_factory.py index a3bdd47cd..0b5380537 100644 --- a/src/uipath/_cli/_evals/mocks/mocker_factory.py +++ b/src/uipath/_cli/_evals/mocks/mocker_factory.py @@ -1,25 +1,25 @@ """Mocker Factory.""" -from uipath._cli._evals._models._evaluation_set import ( - EvaluationItem, - LLMMockingStrategy, - MockitoMockingStrategy, -) from uipath._cli._evals.mocks.llm_mocker import LLMMocker from uipath._cli._evals.mocks.mocker import Mocker from uipath._cli._evals.mocks.mockito_mocker import MockitoMocker +from uipath._cli._evals.mocks.types import ( + LLMMockingStrategy, + MockingContext, + MockitoMockingStrategy, +) class MockerFactory: """Mocker factory.""" @staticmethod - def create(evaluation_item: EvaluationItem) -> Mocker: + def create(context: MockingContext) -> Mocker: """Create a mocker instance.""" - match evaluation_item.mocking_strategy: + match context.strategy: case LLMMockingStrategy(): - return LLMMocker(evaluation_item) + return LLMMocker(context) case MockitoMockingStrategy(): - return MockitoMocker(evaluation_item) + return MockitoMocker(context) case _: raise ValueError("Unknown mocking strategy") diff --git a/src/uipath/_cli/_evals/mocks/mockito_mocker.py b/src/uipath/_cli/_evals/mocks/mockito_mocker.py index 4644a4489..de0dbdb5c 100644 --- a/src/uipath/_cli/_evals/mocks/mockito_mocker.py +++ b/src/uipath/_cli/_evals/mocks/mockito_mocker.py @@ -9,17 +9,17 @@ ) from pydantic import JsonValue -from uipath._cli._evals._models._evaluation_set import ( - EvaluationItem, - MockingAnswerType, - MockitoMockingStrategy, -) from uipath._cli._evals.mocks.mocker import ( Mocker, R, T, UiPathMockResponseGenerationError, ) +from uipath._cli._evals.mocks.types import ( + MockingAnswerType, + MockingContext, + MockitoMockingStrategy, +) class Stub: @@ -63,15 +63,15 @@ def _resolve_value(config: JsonValue) -> Any: class MockitoMocker(Mocker): """Mockito Mocker.""" - def __init__(self, evaluation_item: EvaluationItem): + def __init__(self, context: MockingContext): """Instantiate a mockito mocker.""" - self.evaluation_item = evaluation_item - assert isinstance(self.evaluation_item.mocking_strategy, MockitoMockingStrategy) + self.context = context + assert isinstance(self.context.strategy, MockitoMockingStrategy) self.stub = Stub() mock_obj = mocking.Mock(self.stub) - for behavior in self.evaluation_item.mocking_strategy.behaviors: + for behavior in self.context.strategy.behaviors: resolved_args = _resolve_value(behavior.arguments.args) resolved_kwargs = _resolve_value(behavior.arguments.kwargs) @@ -96,15 +96,13 @@ async def response( self, func: Callable[[T], R], params: dict[str, Any], *args: T, **kwargs ) -> R: """Return mocked response or raise appropriate errors.""" - if not isinstance( - self.evaluation_item.mocking_strategy, MockitoMockingStrategy - ): + if not isinstance(self.context.strategy, MockitoMockingStrategy): raise UiPathMockResponseGenerationError("Mocking strategy misconfigured.") # No behavior configured → call real function is_mocked = any( behavior.function == params["name"] - for behavior in self.evaluation_item.mocking_strategy.behaviors + for behavior in self.context.strategy.behaviors ) if not is_mocked: diff --git a/src/uipath/_cli/_evals/mocks/mocks.py b/src/uipath/_cli/_evals/mocks/mocks.py index 238ab38fc..c5ffe8246 100644 --- a/src/uipath/_cli/_evals/mocks/mocks.py +++ b/src/uipath/_cli/_evals/mocks/mocks.py @@ -4,15 +4,19 @@ from contextvars import ContextVar from typing import Any, Callable -from uipath._cli._evals._models._evaluation_set import EvaluationItem from uipath._cli._evals._span_collection import ExecutionSpanCollector from uipath._cli._evals.mocks.cache_manager import CacheManager from uipath._cli._evals.mocks.mocker import Mocker, UiPathNoMockFoundError from uipath._cli._evals.mocks.mocker_factory import MockerFactory +from uipath._cli._evals.mocks.types import ( + LLMMockingStrategy, + MockingContext, + MockitoMockingStrategy, +) # Context variables for evaluation items and mockers -evaluation_context: ContextVar[EvaluationItem | None] = ContextVar( - "evaluation", default=None +mocking_context: ContextVar[MockingContext | None] = ContextVar( + "mocking_context", default=None ) mocker_context: ContextVar[Mocker | None] = ContextVar("mocker", default=None) @@ -33,20 +37,20 @@ def set_execution_context( - eval_item: EvaluationItem, + context: MockingContext | None, span_collector: ExecutionSpanCollector, - execution_id: str, + execution_id: str | None = None, ) -> None: """Set the execution context for an evaluation run for mocking and trace access.""" - evaluation_context.set(eval_item) + mocking_context.set(context) try: - if eval_item.mocking_strategy: - mocker_context.set(MockerFactory.create(eval_item)) + if context: + mocker_context.set(MockerFactory.create(context)) else: mocker_context.set(None) except Exception: - logger.warning(f"Failed to create mocker for evaluation {eval_item.name}") + logger.warning("Failed to create mocker.") mocker_context.set(None) span_collector_context.set(span_collector) @@ -55,7 +59,7 @@ def set_execution_context( def clear_execution_context() -> None: """Clear the execution context after evaluation completes.""" - evaluation_context.set(None) + mocking_context.set(None) mocker_context.set(None) span_collector_context.set(None) execution_id_context.set(None) @@ -70,25 +74,20 @@ def _normalize_tool_name(name: str) -> str: def is_tool_simulated(tool_name: str) -> bool: - """Check if a tool will be simulated based on the current evaluation context. + """Check if a tool will be simulated based on the current mocking strategy context. Args: tool_name: The name of the tool to check. Returns: - True if we're in an evaluation context and the tool is configured + True if we're in an mocking strategy context and the tool is configured to be simulated, False otherwise. """ - eval_item = evaluation_context.get() - if eval_item is None or eval_item.mocking_strategy is None: + ctx = mocking_context.get() + strategy = ctx.strategy if ctx else None + if strategy is None: return False - from uipath._cli._evals._models._evaluation_set import ( - LLMMockingStrategy, - MockitoMockingStrategy, - ) - - strategy = eval_item.mocking_strategy normalized_tool_name = _normalize_tool_name(tool_name) if isinstance(strategy, LLMMockingStrategy): diff --git a/src/uipath/_cli/_evals/mocks/types.py b/src/uipath/_cli/_evals/mocks/types.py new file mode 100644 index 000000000..a90656c63 --- /dev/null +++ b/src/uipath/_cli/_evals/mocks/types.py @@ -0,0 +1,103 @@ +from enum import Enum +from typing import Annotated, Any, Literal, Union + +from pydantic import BaseModel, ConfigDict, Field + + +class MockingStrategyType(str, Enum): + LLM = "llm" + MOCKITO = "mockito" + UNKNOWN = "unknown" + + +class BaseMockingStrategy(BaseModel): + pass + + +class ToolSimulation(BaseModel): + name: str = Field(..., alias="name") + + +class ModelSettings(BaseModel): + """Model Generation Parameters.""" + + model: str = Field(..., alias="model") + temperature: float | str | None = Field(default=None, alias="temperature") + top_p: float | None = Field(default=None, alias="topP") + top_k: int | None = Field(default=None, alias="topK") + frequency_penalty: float | None = Field(default=None, alias="frequencyPenalty") + presence_penalty: float | None = Field(default=None, alias="presencePenalty") + max_tokens: int | None = Field(default=None, alias="maxTokens") + + +class LLMMockingStrategy(BaseMockingStrategy): + type: Literal[MockingStrategyType.LLM] = MockingStrategyType.LLM + prompt: str = Field(..., alias="prompt") + tools_to_simulate: list[ToolSimulation] = Field(..., alias="toolsToSimulate") + model: ModelSettings | None = Field(None, alias="model") + + model_config = ConfigDict( + validate_by_name=True, validate_by_alias=True, extra="allow" + ) + + +class InputMockingStrategy(BaseModel): + prompt: str = Field(..., alias="prompt") + model: ModelSettings | None = Field(None, alias="model") + + model_config = ConfigDict( + validate_by_name=True, validate_by_alias=True, extra="allow" + ) + + +class MockingArgument(BaseModel): + args: list[Any] = Field(default_factory=lambda: [], alias="args") + kwargs: dict[str, Any] = Field(default_factory=lambda: {}, alias="kwargs") + + +class MockingAnswerType(str, Enum): + RETURN = "return" + RAISE = "raise" + + +class MockingAnswer(BaseModel): + type: MockingAnswerType + value: Any = Field(..., alias="value") + + +class MockingBehavior(BaseModel): + function: str = Field(..., alias="function") + arguments: MockingArgument = Field(..., alias="arguments") + then: list[MockingAnswer] = Field(..., alias="then") + + +class MockitoMockingStrategy(BaseMockingStrategy): + type: Literal[MockingStrategyType.MOCKITO] = MockingStrategyType.MOCKITO + behaviors: list[MockingBehavior] = Field(..., alias="config") + + model_config = ConfigDict( + validate_by_name=True, validate_by_alias=True, extra="allow" + ) + + +KnownMockingStrategy = Annotated[ + Union[LLMMockingStrategy, MockitoMockingStrategy], + Field(discriminator="type"), +] + + +class UnknownMockingStrategy(BaseMockingStrategy): + type: str = Field(..., alias="type") + + model_config = ConfigDict( + validate_by_name=True, validate_by_alias=True, extra="allow" + ) + + +MockingStrategy = Union[KnownMockingStrategy, UnknownMockingStrategy] + + +class MockingContext(BaseModel): + strategy: MockingStrategy | None + inputs: dict[str, Any] = Field(default_factory=lambda: {}) + name: str = Field(default="debug") diff --git a/src/uipath/_cli/_utils/_eval_set.py b/src/uipath/_cli/_utils/_eval_set.py index 3bd7dffd4..b7b7b2ea4 100644 --- a/src/uipath/_cli/_utils/_eval_set.py +++ b/src/uipath/_cli/_utils/_eval_set.py @@ -7,11 +7,10 @@ from uipath._cli._evals._models._evaluation_set import ( EvaluationItem, EvaluationSet, - InputMockingStrategy, LegacyEvaluationItem, LegacyEvaluationSet, - LLMMockingStrategy, ) +from uipath._cli._evals.mocks.types import InputMockingStrategy, LLMMockingStrategy from uipath._cli._utils._console import ConsoleLogger console = ConsoleLogger() diff --git a/tests/cli/eval/mocks/test_input_mocker.py b/tests/cli/eval/mocks/test_input_mocker.py index 198838310..c9b3d8341 100644 --- a/tests/cli/eval/mocks/test_input_mocker.py +++ b/tests/cli/eval/mocks/test_input_mocker.py @@ -6,11 +6,10 @@ from uipath._cli._evals._models._evaluation_set import ( EvaluationItem, - InputMockingStrategy, - ModelSettings, ) from uipath._cli._evals.mocks.cache_manager import CacheManager from uipath._cli._evals.mocks.input_mocker import generate_llm_input +from uipath._cli._evals.mocks.types import InputMockingStrategy, ModelSettings @pytest.mark.asyncio @@ -98,7 +97,12 @@ async def test_generate_llm_input_with_model_settings( }, ) - result = await generate_llm_input(eval_item, input_schema) + result = await generate_llm_input( + eval_item.input_mocking_strategy, + input_schema, + expected_behavior=eval_item.expected_agent_behavior, + expected_output={"result": 35}, + ) # Verify the mocked input is correct assert result == {"query": "Calculate 5 times 7"} diff --git a/tests/cli/eval/mocks/test_mocks.py b/tests/cli/eval/mocks/test_mocks.py index 05f07e78f..45779bff7 100644 --- a/tests/cli/eval/mocks/test_mocks.py +++ b/tests/cli/eval/mocks/test_mocks.py @@ -9,8 +9,6 @@ from uipath._cli._evals._models._evaluation_set import ( EvaluationItem, - LLMMockingStrategy, - MockitoMockingStrategy, ) from uipath._cli._evals.mocks.cache_manager import CacheManager from uipath._cli._evals.mocks.mocker import UiPathMockResponseGenerationError @@ -20,6 +18,11 @@ is_tool_simulated, set_execution_context, ) +from uipath._cli._evals.mocks.types import ( + LLMMockingStrategy, + MockingContext, + MockitoMockingStrategy, +) from uipath.eval.mocks import mockable _mock_span_collector = MagicMock() @@ -61,7 +64,15 @@ def test_returns_false_when_mocking_strategy_is_none(self): "mockingStrategy": None, } evaluation = EvaluationItem(**evaluation_item) - set_execution_context(evaluation, _mock_span_collector, "test-execution-id") + set_execution_context( + MockingContext( + strategy=evaluation.mocking_strategy, + name=evaluation.name, + inputs=evaluation.inputs, + ), + _mock_span_collector, + "test-execution-id", + ) assert is_tool_simulated("any_tool") is False clear_execution_context() @@ -80,7 +91,15 @@ def test_returns_true_for_llm_strategy_simulated_tool(self): }, } evaluation = EvaluationItem(**evaluation_item) - set_execution_context(evaluation, _mock_span_collector, "test-execution-id") + set_execution_context( + MockingContext( + strategy=evaluation.mocking_strategy, + name=evaluation.name, + inputs=evaluation.inputs, + ), + _mock_span_collector, + "test-execution-id", + ) assert is_tool_simulated("my_tool") is True assert is_tool_simulated("other_tool") is True @@ -100,7 +119,15 @@ def test_returns_false_for_llm_strategy_non_simulated_tool(self): }, } evaluation = EvaluationItem(**evaluation_item) - set_execution_context(evaluation, _mock_span_collector, "test-execution-id") + set_execution_context( + MockingContext( + strategy=evaluation.mocking_strategy, + name=evaluation.name, + inputs=evaluation.inputs, + ), + _mock_span_collector, + "test-execution-id", + ) assert is_tool_simulated("not_simulated_tool") is False clear_execution_context() @@ -124,7 +151,15 @@ def test_returns_true_for_mockito_strategy_simulated_tool(self): }, } evaluation = EvaluationItem(**evaluation_item) - set_execution_context(evaluation, _mock_span_collector, "test-execution-id") + set_execution_context( + MockingContext( + strategy=evaluation.mocking_strategy, + name=evaluation.name, + inputs=evaluation.inputs, + ), + _mock_span_collector, + "test-execution-id", + ) assert is_tool_simulated("my_tool") is True clear_execution_context() @@ -148,7 +183,15 @@ def test_returns_false_for_mockito_strategy_non_simulated_tool(self): }, } evaluation = EvaluationItem(**evaluation_item) - set_execution_context(evaluation, _mock_span_collector, "test-execution-id") + set_execution_context( + MockingContext( + strategy=evaluation.mocking_strategy, + name=evaluation.name, + inputs=evaluation.inputs, + ), + _mock_span_collector, + "test-execution-id", + ) assert is_tool_simulated("not_simulated_tool") is False clear_execution_context() @@ -168,7 +211,15 @@ def test_handles_underscore_space_normalization_llm(self): }, } evaluation = EvaluationItem(**evaluation_item) - set_execution_context(evaluation, _mock_span_collector, "test-execution-id") + set_execution_context( + MockingContext( + strategy=evaluation.mocking_strategy, + name=evaluation.name, + inputs=evaluation.inputs, + ), + _mock_span_collector, + "test-execution-id", + ) assert is_tool_simulated("my_tool") is True clear_execution_context() @@ -193,7 +244,15 @@ def test_handles_underscore_space_normalization_mockito(self): }, } evaluation = EvaluationItem(**evaluation_item) - set_execution_context(evaluation, _mock_span_collector, "test-execution-id") + set_execution_context( + MockingContext( + strategy=evaluation.mocking_strategy, + name=evaluation.name, + inputs=evaluation.inputs, + ), + _mock_span_collector, + "test-execution-id", + ) assert is_tool_simulated("my_tool") is True clear_execution_context() @@ -234,7 +293,15 @@ def foofoo(*args, **kwargs): assert isinstance(evaluation.mocking_strategy, MockitoMockingStrategy) # Act & Assert - set_execution_context(evaluation, _mock_span_collector, "test-execution-id") + set_execution_context( + MockingContext( + strategy=evaluation.mocking_strategy, + name=evaluation.name, + inputs=evaluation.inputs, + ), + _mock_span_collector, + "test-execution-id", + ) assert foo() == "bar1" assert foo() == "bar2" assert foo() == "bar2" @@ -246,13 +313,29 @@ def foofoo(*args, **kwargs): assert foofoo() evaluation.mocking_strategy.behaviors[0].arguments.kwargs = {"x": 1} - set_execution_context(evaluation, _mock_span_collector, "test-execution-id") + set_execution_context( + MockingContext( + strategy=evaluation.mocking_strategy, + name=evaluation.name, + inputs=evaluation.inputs, + ), + _mock_span_collector, + "test-execution-id", + ) assert foo(x=1) == "bar1" evaluation.mocking_strategy.behaviors[0].arguments.kwargs = { "x": {"_target_": "mockito.any"} } - set_execution_context(evaluation, _mock_span_collector, "test-execution-id") + set_execution_context( + MockingContext( + strategy=evaluation.mocking_strategy, + name=evaluation.name, + inputs=evaluation.inputs, + ), + _mock_span_collector, + "test-execution-id", + ) assert foo(x=2) == "bar1" @@ -292,7 +375,15 @@ async def foofoo(*args, **kwargs): assert isinstance(evaluation.mocking_strategy, MockitoMockingStrategy) # Act & Assert - set_execution_context(evaluation, _mock_span_collector, "test-execution-id") + set_execution_context( + MockingContext( + strategy=evaluation.mocking_strategy, + name=evaluation.name, + inputs=evaluation.inputs, + ), + _mock_span_collector, + "test-execution-id", + ) assert await foo() == "bar1" assert await foo() == "bar2" assert await foo() == "bar2" @@ -304,13 +395,29 @@ async def foofoo(*args, **kwargs): assert await foofoo() evaluation.mocking_strategy.behaviors[0].arguments.kwargs = {"x": 1} - set_execution_context(evaluation, _mock_span_collector, "test-execution-id") + set_execution_context( + MockingContext( + strategy=evaluation.mocking_strategy, + name=evaluation.name, + inputs=evaluation.inputs, + ), + _mock_span_collector, + "test-execution-id", + ) assert await foo(x=1) == "bar1" evaluation.mocking_strategy.behaviors[0].arguments.kwargs = { "x": {"_target_": "mockito.any"} } - set_execution_context(evaluation, _mock_span_collector, "test-execution-id") + set_execution_context( + MockingContext( + strategy=evaluation.mocking_strategy, + name=evaluation.name, + inputs=evaluation.inputs, + ), + _mock_span_collector, + "test-execution-id", + ) assert await foo(x=2) == "bar1" @@ -384,11 +491,19 @@ def foofoo(*args, **kwargs): }, ) # Act & Assert - set_execution_context(evaluation, _mock_span_collector, "test-execution-id") + set_execution_context( + MockingContext( + strategy=evaluation.mocking_strategy, + name=evaluation.name, + inputs=evaluation.inputs, + ), + _mock_span_collector, + "test-execution-id", + ) assert foo() == "bar1" - mock_request = httpx_mock.get_request() + mock_request = httpx_mock.get_request(method="POST") assert mock_request request = json.loads(mock_request.content.decode("utf-8")) assert request["response_format"] == { @@ -485,7 +600,15 @@ async def foofoo(*args, **kwargs): }, ) # Act & Assert - set_execution_context(evaluation, _mock_span_collector, "test-execution-id") + set_execution_context( + MockingContext( + strategy=evaluation.mocking_strategy, + name=evaluation.name, + inputs=evaluation.inputs, + ), + _mock_span_collector, + "test-execution-id", + ) assert await foo() == "bar1" @@ -585,7 +708,15 @@ def foo(*args, **kwargs) -> dict[str, Any]: }, ) # Act & Assert - set_execution_context(evaluation, _mock_span_collector, "test-execution-id") + set_execution_context( + MockingContext( + strategy=evaluation.mocking_strategy, + name=evaluation.name, + inputs=evaluation.inputs, + ), + _mock_span_collector, + "test-execution-id", + ) assert foo() == {"content": "bar1"} mock_request = httpx_mock.get_request() @@ -678,7 +809,15 @@ async def foo(*args, **kwargs) -> dict[str, Any]: }, ) # Act & Assert - set_execution_context(evaluation, _mock_span_collector, "test-execution-id") + set_execution_context( + MockingContext( + strategy=evaluation.mocking_strategy, + name=evaluation.name, + inputs=evaluation.inputs, + ), + _mock_span_collector, + "test-execution-id", + ) assert await foo() == {"content": "bar1"} mock_request = httpx_mock.get_request()