Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "uipath"
version = "2.5.9"
version = "2.5.10"
description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools."
readme = { file = "README.md", content-type = "text/markdown" }
requires-python = ">=3.11"
Expand Down
107 changes: 9 additions & 98 deletions src/uipath/_cli/_evals/_models/_evaluation_set.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
from enum import Enum, IntEnum
from typing import Annotated, Any, Literal, Union
from enum import IntEnum
from typing import Any, Literal

from pydantic import BaseModel, ConfigDict, Field
from pydantic.alias_generators import to_camel

from uipath._cli._evals.mocks.strategy import (
InputMockingStrategy,
MockingStrategy,
ToolSimulation,
)


class EvaluatorReference(BaseModel):
"""Reference to an evaluator with optional weight.
Expand Down Expand Up @@ -52,32 +58,6 @@ def serialize(instance: "EvaluatorReference") -> Any:
)


class EvaluationSimulationTool(BaseModel):
name: str = Field(..., alias="name")


class MockingStrategyType(str, Enum):
LLM = "llm"
MOCKITO = "mockito"
UNKNOWN = "unknown"


class BaseMockingStrategy(BaseModel):
pass


class ModelSettings(BaseModel):
"""Model Generation Parameters."""

model: str = Field(..., alias="model")
temperature: float | str | None = Field(default=None, alias="temperature")
top_p: float | None = Field(default=None, alias="topP")
top_k: int | None = Field(default=None, alias="topK")
frequency_penalty: float | None = Field(default=None, alias="frequencyPenalty")
presence_penalty: float | None = Field(default=None, alias="presencePenalty")
max_tokens: int | None = Field(default=None, alias="maxTokens")


class EvaluationSetModelSettings(BaseModel):
"""Model setting overrides within evaluation sets with ID."""

Expand All @@ -88,75 +68,6 @@ class EvaluationSetModelSettings(BaseModel):
temperature: float | str | None = Field(default=None, alias="temperature")


class LLMMockingStrategy(BaseMockingStrategy):
type: Literal[MockingStrategyType.LLM] = MockingStrategyType.LLM
prompt: str = Field(..., alias="prompt")
tools_to_simulate: list[EvaluationSimulationTool] = Field(
..., alias="toolsToSimulate"
)
model: ModelSettings | None = Field(None, alias="model")

model_config = ConfigDict(
validate_by_name=True, validate_by_alias=True, extra="allow"
)


class InputMockingStrategy(BaseModel):
prompt: str = Field(..., alias="prompt")
model: ModelSettings | None = Field(None, alias="model")

model_config = ConfigDict(
validate_by_name=True, validate_by_alias=True, extra="allow"
)


class MockingArgument(BaseModel):
args: list[Any] = Field(default_factory=lambda: [], alias="args")
kwargs: dict[str, Any] = Field(default_factory=lambda: {}, alias="kwargs")


class MockingAnswerType(str, Enum):
RETURN = "return"
RAISE = "raise"


class MockingAnswer(BaseModel):
type: MockingAnswerType
value: Any = Field(..., alias="value")


class MockingBehavior(BaseModel):
function: str = Field(..., alias="function")
arguments: MockingArgument = Field(..., alias="arguments")
then: list[MockingAnswer] = Field(..., alias="then")


class MockitoMockingStrategy(BaseMockingStrategy):
type: Literal[MockingStrategyType.MOCKITO] = MockingStrategyType.MOCKITO
behaviors: list[MockingBehavior] = Field(..., alias="config")

model_config = ConfigDict(
validate_by_name=True, validate_by_alias=True, extra="allow"
)


KnownMockingStrategy = Annotated[
Union[LLMMockingStrategy, MockitoMockingStrategy],
Field(discriminator="type"),
]


class UnknownMockingStrategy(BaseMockingStrategy):
type: str = Field(..., alias="type")

model_config = ConfigDict(
validate_by_name=True, validate_by_alias=True, extra="allow"
)


MockingStrategy = Union[KnownMockingStrategy, UnknownMockingStrategy]


class EvaluationItem(BaseModel):
"""Individual evaluation item within an evaluation set."""

Expand Down Expand Up @@ -201,7 +112,7 @@ class LegacyEvaluationItem(BaseModel):
simulation_instructions: str | None = Field(
default=None, alias="simulationInstructions"
)
tools_to_simulate: list[EvaluationSimulationTool] = Field(
tools_to_simulate: list[ToolSimulation] = Field(
default_factory=list, alias="toolsToSimulate"
)

Expand Down
14 changes: 12 additions & 2 deletions src/uipath/_cli/_evals/_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,9 @@ async def _execute_eval(
eval_item, runtime
)

set_execution_context(eval_item, self.span_collector, execution_id)
set_execution_context(
eval_item.mocking_strategy, self.span_collector, execution_id
)

await self.event_bus.publish(
EvaluationEvents.CREATE_EVAL_RUN,
Expand Down Expand Up @@ -868,8 +870,16 @@ async def _generate_input_for_eval(
self, eval_item: EvaluationItem, runtime: UiPathRuntimeProtocol
) -> EvaluationItem:
"""Use LLM to generate a mock input for an evaluation item."""
expected_output = (
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's strange for input mocker to be using expectation values. The prompts were reused so we didn't think much of this. Do you know what happens in prod during simulations -- is it {}?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Secondly, evaluation_criterias is a map of evaluator_id -> criterias. For URT/"legacy evaluation items", these are repeated during load, so this object will have a lot of repeated values.

My $0.02 is that we should completely get rid of these fields from input simulation but we can do that in a separate PR. @bai-uipath : could you follow up on this with the right POCs?

getattr(eval_item, "evaluation_criterias", None)
or getattr(eval_item, "expected_output", None)
or {}
)
generated_input = await generate_llm_input(
eval_item, (await self.get_schema(runtime)).input
eval_item.input_mocking_strategy,
(await self.get_schema(runtime)).input,
expected_behavior=eval_item.expected_agent_behavior or "",
expected_output=expected_output,
)
updated_eval_item = eval_item.model_copy(update={"inputs": generated_input})
return updated_eval_item
Expand Down
26 changes: 10 additions & 16 deletions src/uipath/_cli/_evals/mocks/input_mocker.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@
from datetime import datetime
from typing import Any

from uipath._cli._evals._models._evaluation_set import EvaluationItem
from uipath._cli._evals.mocks.strategy import (
InputMockingStrategy,
)
from uipath.platform import UiPath
from uipath.tracing import traced

Expand Down Expand Up @@ -54,8 +56,10 @@ def get_input_mocking_prompt(

@traced(name="__mocker__", recording=False)
async def generate_llm_input(
evaluation_item: EvaluationItem,
mocking_strategy: InputMockingStrategy,
input_schema: dict[str, Any],
expected_behavior: str,
expected_output: dict[str, Any],
) -> dict[str, Any]:
"""Generate synthetic input using an LLM based on the evaluation context."""
from .mocks import cache_manager_context
Expand All @@ -68,18 +72,12 @@ async def generate_llm_input(
if "additionalProperties" not in input_schema:
input_schema["additionalProperties"] = False

expected_output = (
getattr(evaluation_item, "evaluation_criterias", None)
or getattr(evaluation_item, "expected_output", None)
or {}
)

prompt_generation_args = {
"input_schema": json.dumps(input_schema),
"input_generation_instructions": evaluation_item.input_mocking_strategy.prompt
if evaluation_item.input_mocking_strategy
"input_generation_instructions": mocking_strategy.prompt
if mocking_strategy
else "",
"expected_behavior": evaluation_item.expected_agent_behavior or "",
"expected_behavior": expected_behavior or "",
"expected_output": json.dumps(expected_output),
}

Expand All @@ -94,11 +92,7 @@ async def generate_llm_input(
},
}

model_parameters = (
evaluation_item.input_mocking_strategy.model
if evaluation_item.input_mocking_strategy
else None
)
model_parameters = mocking_strategy.model if mocking_strategy else None
completion_kwargs = (
model_parameters.model_dump(by_alias=False, exclude_none=True)
if model_parameters
Expand Down
31 changes: 16 additions & 15 deletions src/uipath/_cli/_evals/mocks/llm_mocker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@

from pydantic import BaseModel, TypeAdapter

from uipath._cli._evals.mocks.strategy import (
LLMMockingStrategy,
MockingStrategy,
)
from uipath.tracing import traced
from uipath.tracing._utils import _SpanUtils

from .._models._evaluation_set import EvaluationItem, LLMMockingStrategy
from .._models._mocks import ExampleCall
from .mocker import (
Mocker,
Expand Down Expand Up @@ -74,29 +77,27 @@ def pydantic_to_dict_safe(obj: Any) -> Any:
class LLMMocker(Mocker):
"""LLM Based Mocker."""

def __init__(self, evaluation_item: EvaluationItem):
def __init__(self, mocking_strategy: MockingStrategy):
"""LLM Mocker constructor."""
self.evaluation_item = evaluation_item
assert isinstance(self.evaluation_item.mocking_strategy, LLMMockingStrategy)
self.mocking_strategy = mocking_strategy
assert isinstance(self.mocking_strategy, LLMMockingStrategy)

@traced(name="__mocker__", recording=False)
async def response(
self, func: Callable[[T], R], params: dict[str, Any], *args: T, **kwargs
) -> R:
"""Respond with mocked response generated by an LLM."""
assert isinstance(self.evaluation_item.mocking_strategy, LLMMockingStrategy)
assert isinstance(self.mocking_strategy, LLMMockingStrategy)

function_name = params.get("name") or func.__name__
if function_name in [
x.name for x in self.evaluation_item.mocking_strategy.tools_to_simulate
]:
if function_name in [x.name for x in self.mocking_strategy.tools_to_simulate]:
from uipath.platform import UiPath
from uipath.platform.chat._llm_gateway_service import _cleanup_schema

from .mocks import (
cache_manager_context,
evaluation_context,
execution_id_context,
mocking_strategy_context,
span_collector_context,
)

Expand Down Expand Up @@ -127,10 +128,10 @@ async def response(
]

test_run_history = "(empty)"
eval_item = evaluation_context.get()
strategy = mocking_strategy_context.get()
span_collector = span_collector_context.get()
execution_id = execution_id_context.get()
if eval_item and span_collector and execution_id:
if strategy and span_collector and execution_id:
spans = span_collector.get_spans(execution_id)
test_run_history = _SpanUtils.spans_to_llm_context(spans)

Expand All @@ -155,16 +156,16 @@ async def response(
},
"agentInfo": { # This is incomplete
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@akshaylive why do we need this agentInfo?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The prompts were mostly copied over from URT (check L41). It does make sense for mocker to know about the agent's context -- especially the eval inputs. Maybe we need to create a separate class like this?:

class MockItem(BaseModel): # Equivalent to evaluation item
    inputs: Any
    name: str = Field(default="debug", ..)
    mocking_strategy: MockingStrategy

# "agentName": self.evaluation_item.name, # to be obtained.
"actionName": self.evaluation_item.name, # Not sure if this is right?
"userInput": self.evaluation_item.inputs,
# "actionName": self.evaluation_item.name, # Not sure if this is right?
# "userInput": self.evaluation_item.inputs,
},
"testRunProctorInstructions": self.evaluation_item.mocking_strategy.prompt,
"testRunProctorInstructions": self.mocking_strategy.prompt,
}
prompt_generation_args = {
k: json.dumps(pydantic_to_dict_safe(v))
for k, v in prompt_input.items()
}
model_parameters = self.evaluation_item.mocking_strategy.model
model_parameters = self.mocking_strategy.model
completion_kwargs = (
model_parameters.model_dump(by_alias=False, exclude_none=True)
if model_parameters
Expand Down
18 changes: 9 additions & 9 deletions src/uipath/_cli/_evals/mocks/mocker_factory.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
"""Mocker Factory."""

from uipath._cli._evals._models._evaluation_set import (
EvaluationItem,
LLMMockingStrategy,
MockitoMockingStrategy,
)
from uipath._cli._evals.mocks.llm_mocker import LLMMocker
from uipath._cli._evals.mocks.mocker import Mocker
from uipath._cli._evals.mocks.mockito_mocker import MockitoMocker
from uipath._cli._evals.mocks.strategy import (
LLMMockingStrategy,
MockingStrategy,
MockitoMockingStrategy,
)


class MockerFactory:
"""Mocker factory."""

@staticmethod
def create(evaluation_item: EvaluationItem) -> Mocker:
def create(strategy: MockingStrategy) -> Mocker:
"""Create a mocker instance."""
match evaluation_item.mocking_strategy:
match strategy:
case LLMMockingStrategy():
return LLMMocker(evaluation_item)
return LLMMocker(strategy)
case MockitoMockingStrategy():
return MockitoMocker(evaluation_item)
return MockitoMocker(strategy)
case _:
raise ValueError("Unknown mocking strategy")
Loading