|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Example: Evaluation Error Example |
| 4 | +
|
| 5 | +Install deepeval for evaluation: |
| 6 | + pip install deepeval |
| 7 | +
|
| 8 | +This example demonstrates: |
| 9 | +1. Successful LLM call, with a failed evaluation |
| 10 | +""" |
| 11 | + |
| 12 | +import os |
| 13 | + |
| 14 | +# overwrite OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS=deepeval |
| 15 | +os.environ["OTEL_INSTRUMENTATION_GENAI_EVALS_EVALUATORS"] = "deepeval" |
| 16 | +# set the openai key to an invalid key which will cause the evaluation to fail |
| 17 | +os.environ["OPENAI_API_KEY"] = "invalid_key" |
| 18 | +os.environ["OTEL_RESOURCE_ATTRIBUTES"] = ( |
| 19 | + "deployment.environment=example_evaluation_errors" |
| 20 | +) |
| 21 | +os.environ["OTEL_SERVICE_NAME"] = "demo-app-util-genai-dev" |
| 22 | +os.environ["OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT"] = "true" |
| 23 | +os.environ["OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT_MODE"] = ( |
| 24 | + "SPAN_AND_EVENT" |
| 25 | +) |
| 26 | +os.environ["OTEL_INSTRUMENTATION_GENAI_EVALS_RESULTS_AGGREGATION"] = "true" |
| 27 | +os.environ["OTEL_INSTRUMENTATION_GENAI_EMITTERS"] = "span_metric_event" |
| 28 | + |
| 29 | +import time |
| 30 | + |
| 31 | +from opentelemetry import _logs as logs |
| 32 | +from opentelemetry import metrics, trace |
| 33 | +from opentelemetry.sdk._logs import LoggerProvider |
| 34 | +from opentelemetry.sdk._logs.export import ( |
| 35 | + ConsoleLogExporter, |
| 36 | + SimpleLogRecordProcessor, |
| 37 | +) |
| 38 | +from opentelemetry.sdk.metrics import MeterProvider |
| 39 | +from opentelemetry.sdk.metrics.export import ( |
| 40 | + ConsoleMetricExporter, |
| 41 | + PeriodicExportingMetricReader, |
| 42 | +) |
| 43 | +from opentelemetry.sdk.trace import TracerProvider |
| 44 | +from opentelemetry.sdk.trace.export import ( |
| 45 | + ConsoleSpanExporter, |
| 46 | + SimpleSpanProcessor, |
| 47 | +) |
| 48 | +from opentelemetry.util.genai.handler import get_telemetry_handler |
| 49 | +from opentelemetry.util.genai.types import ( |
| 50 | + Error, |
| 51 | + InputMessage, |
| 52 | + LLMInvocation, |
| 53 | + OutputMessage, |
| 54 | + Text, |
| 55 | +) |
| 56 | + |
| 57 | + |
| 58 | +def setup_telemetry(): |
| 59 | + trace_provider = TracerProvider() |
| 60 | + trace_provider.add_span_processor( |
| 61 | + SimpleSpanProcessor(ConsoleSpanExporter()) |
| 62 | + ) |
| 63 | + trace.set_tracer_provider(trace_provider) |
| 64 | + |
| 65 | + metric_reader = PeriodicExportingMetricReader( |
| 66 | + ConsoleMetricExporter(), export_interval_millis=5000 |
| 67 | + ) |
| 68 | + meter_provider = MeterProvider(metric_readers=[metric_reader]) |
| 69 | + metrics.set_meter_provider(meter_provider) |
| 70 | + |
| 71 | + logger_provider = LoggerProvider() |
| 72 | + logger_provider.add_log_record_processor( |
| 73 | + SimpleLogRecordProcessor(ConsoleLogExporter()) |
| 74 | + ) |
| 75 | + logs.set_logger_provider(logger_provider) |
| 76 | + |
| 77 | + return trace_provider, meter_provider, logger_provider |
| 78 | + |
| 79 | + |
| 80 | +def successful_llm_call(): |
| 81 | + handler = get_telemetry_handler() |
| 82 | + print("Starting successful LLM invocation...") |
| 83 | + llm = LLMInvocation( |
| 84 | + request_model="gpt-4", |
| 85 | + input_messages=[ |
| 86 | + InputMessage( |
| 87 | + role="user", |
| 88 | + parts=[Text(content="Hello, how can I track my order?")], |
| 89 | + ), |
| 90 | + ], |
| 91 | + ) |
| 92 | + handler.start_llm(llm) |
| 93 | + time.sleep(0.1) |
| 94 | + llm.output_messages = [ |
| 95 | + OutputMessage( |
| 96 | + role="assistant", |
| 97 | + parts=[ |
| 98 | + Text( |
| 99 | + content="You can track your order using the tracking link sent to your email." |
| 100 | + ) |
| 101 | + ], |
| 102 | + finish_reason="stop", |
| 103 | + ) |
| 104 | + ] |
| 105 | + llm.input_tokens = 12 |
| 106 | + llm.output_tokens = 18 |
| 107 | + handler.stop_llm(llm) |
| 108 | + print("LLM invocation completed successfully.\n") |
| 109 | + |
| 110 | + |
| 111 | +def failed_llm_call(): |
| 112 | + handler = get_telemetry_handler() |
| 113 | + print("Starting failed LLM invocation...") |
| 114 | + llm = LLMInvocation( |
| 115 | + request_model="gpt-4", |
| 116 | + input_messages=[ |
| 117 | + InputMessage( |
| 118 | + role="user", |
| 119 | + parts=[Text(content="Tell me about the weather in Atlantis.")], |
| 120 | + ), |
| 121 | + ], |
| 122 | + ) |
| 123 | + handler.start_llm(llm) |
| 124 | + time.sleep(0.1) |
| 125 | + error = Error(message="Model unavailable", type=RuntimeError) |
| 126 | + handler.fail_llm(llm, error) |
| 127 | + print("LLM invocation failed.\n") |
| 128 | + |
| 129 | + |
| 130 | +if __name__ == "__main__": |
| 131 | + setup_telemetry() |
| 132 | + successful_llm_call() |
| 133 | + time.sleep(6) # Wait for metrics export |
0 commit comments