-
Notifications
You must be signed in to change notification settings - Fork 20
fix: decouple mocking from evals #1148
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,10 +6,13 @@ | |
|
|
||
| from pydantic import BaseModel, TypeAdapter | ||
|
|
||
| from uipath._cli._evals.mocks.strategy import ( | ||
| LLMMockingStrategy, | ||
| MockingStrategy, | ||
| ) | ||
| from uipath.tracing import traced | ||
| from uipath.tracing._utils import _SpanUtils | ||
|
|
||
| from .._models._evaluation_set import EvaluationItem, LLMMockingStrategy | ||
| from .._models._mocks import ExampleCall | ||
| from .mocker import ( | ||
| Mocker, | ||
|
|
@@ -74,29 +77,27 @@ def pydantic_to_dict_safe(obj: Any) -> Any: | |
| class LLMMocker(Mocker): | ||
| """LLM Based Mocker.""" | ||
|
|
||
| def __init__(self, evaluation_item: EvaluationItem): | ||
| def __init__(self, mocking_strategy: MockingStrategy): | ||
| """LLM Mocker constructor.""" | ||
| self.evaluation_item = evaluation_item | ||
| assert isinstance(self.evaluation_item.mocking_strategy, LLMMockingStrategy) | ||
| self.mocking_strategy = mocking_strategy | ||
| assert isinstance(self.mocking_strategy, LLMMockingStrategy) | ||
|
|
||
| @traced(name="__mocker__", recording=False) | ||
| async def response( | ||
| self, func: Callable[[T], R], params: dict[str, Any], *args: T, **kwargs | ||
| ) -> R: | ||
| """Respond with mocked response generated by an LLM.""" | ||
| assert isinstance(self.evaluation_item.mocking_strategy, LLMMockingStrategy) | ||
| assert isinstance(self.mocking_strategy, LLMMockingStrategy) | ||
|
|
||
| function_name = params.get("name") or func.__name__ | ||
| if function_name in [ | ||
| x.name for x in self.evaluation_item.mocking_strategy.tools_to_simulate | ||
| ]: | ||
| if function_name in [x.name for x in self.mocking_strategy.tools_to_simulate]: | ||
| from uipath.platform import UiPath | ||
| from uipath.platform.chat._llm_gateway_service import _cleanup_schema | ||
|
|
||
| from .mocks import ( | ||
| cache_manager_context, | ||
| evaluation_context, | ||
| execution_id_context, | ||
| mocking_strategy_context, | ||
| span_collector_context, | ||
| ) | ||
|
|
||
|
|
@@ -127,10 +128,10 @@ async def response( | |
| ] | ||
|
|
||
| test_run_history = "(empty)" | ||
| eval_item = evaluation_context.get() | ||
| strategy = mocking_strategy_context.get() | ||
| span_collector = span_collector_context.get() | ||
| execution_id = execution_id_context.get() | ||
| if eval_item and span_collector and execution_id: | ||
| if strategy and span_collector and execution_id: | ||
| spans = span_collector.get_spans(execution_id) | ||
| test_run_history = _SpanUtils.spans_to_llm_context(spans) | ||
|
|
||
|
|
@@ -155,16 +156,16 @@ async def response( | |
| }, | ||
| "agentInfo": { # This is incomplete | ||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @akshaylive why do we need this
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The prompts were mostly copied over from URT (check L41). It does make sense for mocker to know about the agent's context -- especially the eval inputs. Maybe we need to create a separate class like this?: |
||
| # "agentName": self.evaluation_item.name, # to be obtained. | ||
| "actionName": self.evaluation_item.name, # Not sure if this is right? | ||
| "userInput": self.evaluation_item.inputs, | ||
| # "actionName": self.evaluation_item.name, # Not sure if this is right? | ||
| # "userInput": self.evaluation_item.inputs, | ||
| }, | ||
| "testRunProctorInstructions": self.evaluation_item.mocking_strategy.prompt, | ||
| "testRunProctorInstructions": self.mocking_strategy.prompt, | ||
| } | ||
| prompt_generation_args = { | ||
| k: json.dumps(pydantic_to_dict_safe(v)) | ||
| for k, v in prompt_input.items() | ||
| } | ||
| model_parameters = self.evaluation_item.mocking_strategy.model | ||
| model_parameters = self.mocking_strategy.model | ||
| completion_kwargs = ( | ||
| model_parameters.model_dump(by_alias=False, exclude_none=True) | ||
| if model_parameters | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,25 +1,25 @@ | ||
| """Mocker Factory.""" | ||
|
|
||
| from uipath._cli._evals._models._evaluation_set import ( | ||
| EvaluationItem, | ||
| LLMMockingStrategy, | ||
| MockitoMockingStrategy, | ||
| ) | ||
| from uipath._cli._evals.mocks.llm_mocker import LLMMocker | ||
| from uipath._cli._evals.mocks.mocker import Mocker | ||
| from uipath._cli._evals.mocks.mockito_mocker import MockitoMocker | ||
| from uipath._cli._evals.mocks.strategy import ( | ||
| LLMMockingStrategy, | ||
| MockingStrategy, | ||
| MockitoMockingStrategy, | ||
| ) | ||
|
|
||
|
|
||
| class MockerFactory: | ||
| """Mocker factory.""" | ||
|
|
||
| @staticmethod | ||
| def create(evaluation_item: EvaluationItem) -> Mocker: | ||
| def create(strategy: MockingStrategy) -> Mocker: | ||
| """Create a mocker instance.""" | ||
| match evaluation_item.mocking_strategy: | ||
| match strategy: | ||
| case LLMMockingStrategy(): | ||
| return LLMMocker(evaluation_item) | ||
| return LLMMocker(strategy) | ||
| case MockitoMockingStrategy(): | ||
| return MockitoMocker(evaluation_item) | ||
| return MockitoMocker(strategy) | ||
| case _: | ||
| raise ValueError("Unknown mocking strategy") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's strange for input mocker to be using expectation values. The prompts were reused so we didn't think much of this. Do you know what happens in prod during simulations -- is it
{}?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Secondly,
evaluation_criteriasis a map ofevaluator_id -> criterias. For URT/"legacy evaluation items", these are repeated during load, so this object will have a lot of repeated values.My $0.02 is that we should completely get rid of these fields from input simulation but we can do that in a separate PR. @bai-uipath : could you follow up on this with the right POCs?