diff --git a/pyproject.toml b/pyproject.toml index 9fe1addf3..3e091986e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "uipath" -version = "2.5.8" +version = "2.5.9" description = "Python SDK and CLI for UiPath Platform, enabling programmatic interaction with automation services, process management, and deployment tools." readme = { file = "README.md", content-type = "text/markdown" } requires-python = ">=3.11" diff --git a/src/uipath/_cli/_evals/_evaluate.py b/src/uipath/_cli/_evals/_evaluate.py index 4ef73bbe1..60713b0f3 100644 --- a/src/uipath/_cli/_evals/_evaluate.py +++ b/src/uipath/_cli/_evals/_evaluate.py @@ -6,6 +6,7 @@ from uipath._cli._evals._runtime import UiPathEvalContext, UiPathEvalRuntime from uipath._events._event_bus import EventBus +from uipath.tracing import LlmOpsHttpExporter async def evaluate( @@ -13,12 +14,16 @@ async def evaluate( trace_manager: UiPathTraceManager, eval_context: UiPathEvalContext, event_bus: EventBus, + job_exporter: LlmOpsHttpExporter | None, + studio_web_tracking_exporter: LlmOpsHttpExporter | None, ) -> UiPathRuntimeResult: async with UiPathEvalRuntime( factory=runtime_factory, context=eval_context, trace_manager=trace_manager, event_bus=event_bus, + job_exporter=job_exporter, + studio_web_tracking_exporter=studio_web_tracking_exporter, ) as eval_runtime: results = await eval_runtime.execute() await event_bus.wait_for_all(timeout=10) diff --git a/src/uipath/_cli/_evals/_progress_reporter.py b/src/uipath/_cli/_evals/_progress_reporter.py index 6c924ccc6..4bd1e1719 100644 --- a/src/uipath/_cli/_evals/_progress_reporter.py +++ b/src/uipath/_cli/_evals/_progress_reporter.py @@ -422,17 +422,6 @@ async def handle_update_eval_run(self, payload: EvalRunUpdatedEvent) -> None: try: eval_run_id = self.eval_run_ids.get(payload.execution_id) - # Use evalRunId as the trace_id for agent execution spans - # This makes all agent spans children of the eval run trace - if eval_run_id: - self.spans_exporter.trace_id = eval_run_id - else: - # Fallback to evalSetRunId if eval_run_id not available yet - if self.eval_set_execution_id: - self.spans_exporter.trace_id = self.eval_set_run_ids.get( - self.eval_set_execution_id - ) - self.spans_exporter.export(payload.spans) for eval_result in payload.eval_results: diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py index 9b21f4d60..0d7bf730a 100644 --- a/src/uipath/_cli/_evals/_runtime.py +++ b/src/uipath/_cli/_evals/_runtime.py @@ -308,6 +308,8 @@ def __init__( factory: UiPathRuntimeFactoryProtocol, trace_manager: UiPathTraceManager, event_bus: EventBus, + job_exporter: LlmOpsHttpExporter | None, + studio_web_tracking_exporter: LlmOpsHttpExporter | None, ): self.context: UiPathEvalContext = context # Wrap the factory to support model settings overrides @@ -322,11 +324,27 @@ def __init__( self.trace_manager.tracer_span_processors.append(span_processor) self.trace_manager.tracer_provider.add_span_processor(span_processor) - # Live tracking processor for real-time span updates - live_tracking_exporter = LlmOpsHttpExporter() - live_tracking_processor = LiveTrackingSpanProcessor(live_tracking_exporter) - self.trace_manager.tracer_span_processors.append(live_tracking_processor) - self.trace_manager.tracer_provider.add_span_processor(live_tracking_processor) + # Job exporter tracking processor for real-time span updates + if job_exporter: + self.job_exporter = job_exporter + job_tracking_processor = LiveTrackingSpanProcessor(self.job_exporter) + self.trace_manager.tracer_span_processors.append(job_tracking_processor) + self.trace_manager.tracer_provider.add_span_processor( + job_tracking_processor + ) + + # Studio Web tracking processor for real-time span updates + if studio_web_tracking_exporter: + self.studio_web_tracking_exporter = studio_web_tracking_exporter + studio_web_tracking_processor = LiveTrackingSpanProcessor( + self.studio_web_tracking_exporter + ) + self.trace_manager.tracer_span_processors.append( + studio_web_tracking_processor + ) + self.trace_manager.tracer_provider.add_span_processor( + studio_web_tracking_processor + ) self.logs_exporter: ExecutionLogsExporter = ExecutionLogsExporter() # Use job_id if available (for single runtime runs), otherwise generate UUID diff --git a/src/uipath/_cli/cli_eval.py b/src/uipath/_cli/cli_eval.py index ae9441dc2..59edf0ec3 100644 --- a/src/uipath/_cli/cli_eval.py +++ b/src/uipath/_cli/cli_eval.py @@ -1,5 +1,6 @@ import ast import asyncio +import logging import os from typing import Any @@ -28,6 +29,7 @@ from ._utils._console import ConsoleLogger from ._utils._eval_set import EvalHelpers +logger = logging.getLogger(__name__) console = ConsoleLogger() @@ -203,8 +205,18 @@ def eval( async def execute_eval(): event_bus = EventBus() + # Only create studio web exporter when reporting to Studio Web + studio_web_tracking_exporter = None if should_register_progress_reporter: - progress_reporter = StudioWebProgressReporter(LlmOpsHttpExporter()) + studio_web_tracking_exporter = LlmOpsHttpExporter() + if eval_context.eval_set_run_id: + studio_web_tracking_exporter.trace_id = ( + eval_context.eval_set_run_id + ) + + progress_reporter = StudioWebProgressReporter( + studio_web_tracking_exporter + ) await progress_reporter.subscribe_to_eval_runtime_events(event_bus) console_reporter = ConsoleProgressReporter() @@ -223,8 +235,11 @@ async def execute_eval(): # Set job_id in eval context for single runtime runs eval_context.job_id = ctx.job_id + # Create job exporter for live tracking + job_exporter = None if ctx.job_id: - trace_manager.add_span_exporter(LlmOpsHttpExporter()) + job_exporter = LlmOpsHttpExporter() + trace_manager.add_span_exporter(job_exporter) if trace_file: trace_manager.add_span_exporter( @@ -247,11 +262,18 @@ async def execute_eval(): trace_manager, eval_context, event_bus, + job_exporter, + studio_web_tracking_exporter, ) else: # Fall back to execution without overwrites ctx.result = await evaluate( - runtime_factory, trace_manager, eval_context, event_bus + runtime_factory, + trace_manager, + eval_context, + event_bus, + job_exporter, + studio_web_tracking_exporter, ) finally: if runtime_factory: diff --git a/tests/cli/eval/test_eval_runtime_metadata.py b/tests/cli/eval/test_eval_runtime_metadata.py index a05c2e0ae..6c6b5a4f2 100644 --- a/tests/cli/eval/test_eval_runtime_metadata.py +++ b/tests/cli/eval/test_eval_runtime_metadata.py @@ -29,6 +29,7 @@ UiPathEvalRuntime, ) from uipath._events._event_bus import EventBus +from uipath.tracing import LlmOpsHttpExporter class MockRuntimeSchema(UiPathRuntimeSchema): @@ -164,7 +165,14 @@ async def create_runtime(): return BaseTestRuntime() factory = MockFactory(create_runtime) - return UiPathEvalRuntime(context, factory, trace_manager, event_bus) + return UiPathEvalRuntime( + context, + factory, + trace_manager, + event_bus, + LlmOpsHttpExporter(), + LlmOpsHttpExporter(), + ) def test_finds_model_in_direct_runtime(self, eval_runtime): """Test finding agent model directly on runtime.""" @@ -228,7 +236,14 @@ async def create_runtime(): factory = MockFactory(create_runtime) event_bus = EventBus() trace_manager = UiPathTraceManager() - eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus) + eval_runtime = UiPathEvalRuntime( + context, + factory, + trace_manager, + event_bus, + LlmOpsHttpExporter(), + LlmOpsHttpExporter(), + ) runtime = await create_runtime() model = await eval_runtime._get_agent_model(runtime) @@ -243,7 +258,14 @@ async def create_runtime(): factory = MockFactory(create_runtime) event_bus = EventBus() trace_manager = UiPathTraceManager() - eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus) + eval_runtime = UiPathEvalRuntime( + context, + factory, + trace_manager, + event_bus, + LlmOpsHttpExporter(), + LlmOpsHttpExporter(), + ) runtime = await create_runtime() model = await eval_runtime._get_agent_model(runtime) @@ -258,7 +280,14 @@ async def create_runtime(): factory = MockFactory(create_runtime) event_bus = EventBus() trace_manager = UiPathTraceManager() - eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus) + eval_runtime = UiPathEvalRuntime( + context, + factory, + trace_manager, + event_bus, + LlmOpsHttpExporter(), + LlmOpsHttpExporter(), + ) runtime = await create_runtime() @@ -277,7 +306,14 @@ async def create_good_runtime(): factory = MockFactory(create_good_runtime) event_bus = EventBus() trace_manager = UiPathTraceManager() - eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus) + eval_runtime = UiPathEvalRuntime( + context, + factory, + trace_manager, + event_bus, + LlmOpsHttpExporter(), + LlmOpsHttpExporter(), + ) # Create a bad runtime that raises during get_agent_model class BadRuntime(BaseTestRuntime): @@ -310,7 +346,14 @@ async def create_runtime(): factory = MockFactory(create_runtime) event_bus = EventBus() trace_manager = UiPathTraceManager() - eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus) + eval_runtime = UiPathEvalRuntime( + context, + factory, + trace_manager, + event_bus, + LlmOpsHttpExporter(), + LlmOpsHttpExporter(), + ) runtime = await create_runtime() schema = await eval_runtime.get_schema(runtime) @@ -326,7 +369,14 @@ async def create_runtime(): factory = MockFactory(create_runtime) event_bus = EventBus() trace_manager = UiPathTraceManager() - eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus) + eval_runtime = UiPathEvalRuntime( + context, + factory, + trace_manager, + event_bus, + LlmOpsHttpExporter(), + LlmOpsHttpExporter(), + ) runtime = await create_runtime() @@ -346,7 +396,14 @@ async def create_runtime(): factory = MockFactory(create_runtime) event_bus = EventBus() trace_manager = UiPathTraceManager() - eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus) + eval_runtime = UiPathEvalRuntime( + context, + factory, + trace_manager, + event_bus, + LlmOpsHttpExporter(), + LlmOpsHttpExporter(), + ) runtime = await create_runtime() @@ -393,7 +450,14 @@ async def create_runtime(): factory = MockFactory(create_runtime) event_bus = EventBus() trace_manager = UiPathTraceManager() - eval_runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus) + eval_runtime = UiPathEvalRuntime( + context, + factory, + trace_manager, + event_bus, + LlmOpsHttpExporter(), + LlmOpsHttpExporter(), + ) model = await eval_runtime._get_agent_model(resumable_runtime) assert model == "gpt-4o-from-agent-json" diff --git a/tests/cli/eval/test_eval_tracing_integration.py b/tests/cli/eval/test_eval_tracing_integration.py index 3a9cf3d33..36cd5e36a 100644 --- a/tests/cli/eval/test_eval_tracing_integration.py +++ b/tests/cli/eval/test_eval_tracing_integration.py @@ -124,6 +124,8 @@ async def test_execute_creates_eval_set_run_span( factory=mock_factory, trace_manager=mock_trace_manager, event_bus=mock_event_bus, + job_exporter=MagicMock(), + studio_web_tracking_exporter=MagicMock(), ) # Mock initiate_evaluation to return empty results @@ -168,6 +170,8 @@ async def test_execute_includes_eval_set_run_id_when_provided( factory=mock_factory, trace_manager=mock_trace_manager, event_bus=mock_event_bus, + job_exporter=MagicMock(), + studio_web_tracking_exporter=MagicMock(), ) mock_eval_set = MagicMock() @@ -248,6 +252,8 @@ async def test_execute_eval_creates_evaluation_span( factory=mock_factory, trace_manager=mock_trace_manager, event_bus=mock_event_bus, + job_exporter=MagicMock(), + studio_web_tracking_exporter=MagicMock(), ) # Mock execute_runtime to return a successful result @@ -352,6 +358,8 @@ async def test_run_evaluator_creates_evaluator_span( factory=mock_factory, trace_manager=mock_trace_manager, event_bus=mock_event_bus, + job_exporter=MagicMock(), + studio_web_tracking_exporter=MagicMock(), ) await runtime.run_evaluator( @@ -393,6 +401,8 @@ async def test_multiple_evaluators_create_multiple_spans( factory=mock_factory, trace_manager=mock_trace_manager, event_bus=mock_event_bus, + job_exporter=MagicMock(), + studio_web_tracking_exporter=MagicMock(), ) evaluator_names = ["Accuracy", "Relevance", "Fluency"] @@ -464,6 +474,8 @@ async def test_evaluation_span_has_unique_execution_id( factory=mock_factory, trace_manager=mock_trace_manager, event_bus=mock_event_bus, + job_exporter=MagicMock(), + studio_web_tracking_exporter=MagicMock(), ) mock_runtime = AsyncMock() @@ -517,6 +529,8 @@ async def test_evaluator_span_inherits_eval_item_id( factory=mock_factory, trace_manager=mock_trace_manager, event_bus=mock_event_bus, + job_exporter=MagicMock(), + studio_web_tracking_exporter=MagicMock(), ) eval_item = MagicMock() @@ -706,6 +720,8 @@ async def test_evaluation_set_run_span_has_output_attribute( factory=mock_factory, trace_manager=mock_trace_manager, event_bus=mock_event_bus, + job_exporter=MagicMock(), + studio_web_tracking_exporter=MagicMock(), ) # Mock the runtime and evaluator @@ -783,6 +799,8 @@ async def test_evaluation_span_has_metadata_attributes( factory=mock_factory, trace_manager=mock_trace_manager, event_bus=mock_event_bus, + job_exporter=MagicMock(), + studio_web_tracking_exporter=MagicMock(), ) # Mock the runtime @@ -862,6 +880,8 @@ async def test_evaluation_output_span_has_output_with_type_and_value( factory=mock_factory, trace_manager=mock_trace_manager, event_bus=mock_event_bus, + job_exporter=MagicMock(), + studio_web_tracking_exporter=MagicMock(), ) # Mock execution output diff --git a/tests/cli/eval/test_evaluate.py b/tests/cli/eval/test_evaluate.py index 9a473b3ab..84fbf997c 100644 --- a/tests/cli/eval/test_evaluate.py +++ b/tests/cli/eval/test_evaluate.py @@ -17,6 +17,7 @@ from uipath._cli._evals._models._output import UiPathEvalOutput from uipath._cli._evals._runtime import UiPathEvalContext, UiPathEvalRuntime from uipath._events._event_bus import EventBus +from uipath.tracing import LlmOpsHttpExporter async def test_evaluate(): @@ -89,7 +90,16 @@ async def dispose(self) -> None: factory = TestFactory(identity) # Act - result = await evaluate(factory, trace_manager, context, event_bus) + job_exporter = LlmOpsHttpExporter() + studio_web_tracking_exporter = LlmOpsHttpExporter() + result = await evaluate( + factory, + trace_manager, + context, + event_bus, + job_exporter, + studio_web_tracking_exporter, + ) # Assert that the output is json-serializable UiPathEvalOutput.model_validate(result.output).model_dump_json() @@ -185,9 +195,119 @@ async def dispose(self) -> None: factory = TestFactory(identity) # Act - runtime = UiPathEvalRuntime(context, factory, trace_manager, event_bus) + job_exporter = LlmOpsHttpExporter() + studio_web_tracking_exporter = LlmOpsHttpExporter() + runtime = UiPathEvalRuntime( + context, + factory, + trace_manager, + event_bus, + job_exporter, + studio_web_tracking_exporter, + ) # Assert # Should be a valid UUID format (36 characters with dashes) assert len(runtime.execution_id) == 36 assert runtime.execution_id.count("-") == 4 + + +async def test_eval_runtime_works_without_exporters(): + """Test that UiPathEvalRuntime works when both exporters are None (local execution).""" + # Arrange + context = UiPathEvalContext() + context.eval_set = str( + Path(__file__).parent / "evals" / "eval-sets" / "default.json" + ) + event_bus = EventBus() + trace_manager = UiPathTraceManager() + + async def identity(input: dict[str, Any]) -> dict[str, Any]: + return input + + # Mock runtime that implements the protocol + class TestRuntime: + def __init__(self, executor): + self.executor = executor + + async def execute( + self, + input: dict[str, Any] | None = None, + options: UiPathExecuteOptions | None = None, + ) -> UiPathRuntimeResult: + result = await self.executor(input or {}) + return UiPathRuntimeResult( + output=result, + status=UiPathRuntimeStatus.SUCCESSFUL, + ) + + async def stream( + self, + input: dict[str, Any] | None = None, + options: UiPathStreamOptions | None = None, + ) -> AsyncGenerator[UiPathRuntimeEvent, None]: + result = await self.executor(input or {}) + yield UiPathRuntimeResult( + output=result, + status=UiPathRuntimeStatus.SUCCESSFUL, + ) + + async def get_schema(self) -> UiPathRuntimeSchema: + return UiPathRuntimeSchema( + filePath="test.py", + uniqueId="test", + type="workflow", + input={"type": "object", "properties": {}}, + output={"type": "object", "properties": {}}, + ) + + async def dispose(self) -> None: + pass + + class TestFactory: + def __init__(self, executor): + self.executor = executor + + def discover_entrypoints(self) -> list[str]: + return ["test"] + + async def discover_runtimes(self) -> list[UiPathRuntimeProtocol]: + return [TestRuntime(self.executor)] + + async def new_runtime( + self, entrypoint: str, runtime_id: str, **kwargs + ) -> UiPathRuntimeProtocol: + return TestRuntime(self.executor) + + async def dispose(self) -> None: + pass + + factory = TestFactory(identity) + + # Act - Pass None for both exporters (simulating local execution without job/studio web) + runtime = UiPathEvalRuntime( + context, + factory, + trace_manager, + event_bus, + job_exporter=None, + studio_web_tracking_exporter=None, + ) + + # Assert - Runtime should still work without exporters + assert runtime is not None + assert len(runtime.execution_id) == 36 + assert runtime.execution_id.count("-") == 4 + + # Verify that evaluate() also works with None exporters + result = await evaluate( + factory, + trace_manager, + context, + event_bus, + job_exporter=None, + studio_web_tracking_exporter=None, + ) + + # Assert that the evaluation completed successfully + assert result.output is not None diff --git a/uv.lock b/uv.lock index 2048aab27..51e417182 100644 --- a/uv.lock +++ b/uv.lock @@ -2486,7 +2486,7 @@ wheels = [ [[package]] name = "uipath" -version = "2.5.8" +version = "2.5.9" source = { editable = "." } dependencies = [ { name = "applicationinsights" },