Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Copy this file to `.env` and update values before running the sample.

# Required OpenAI API key
OPENAI_API_KEY=sk-YOUR_API_KEY

# Optional: override default model (defaults to gpt-4.1)
# OPENAI_MODEL=gpt-4.1

# OTLP exporter configuration (update for your collector)
# OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4317
# OTEL_EXPORTER_OTLP_PROTOCOL=grpc

# Traces will use this service.name
OTEL_SERVICE_NAME=opentelemetry-multi-agent-traceloop-translator-evals

OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:4318
OTEL_SEMCONV_STABILITY_OPT_IN=gen_ai_latest_experimental
OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true
OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT_MODE=SPAN_AND_EVENT
OTEL_INSTRUMENTATION_GENAI_EMITTERS=span_metric_event,splunk
OTEL_INSTRUMENTATION_GENAI_EMITTERS_EVALUATION=replace-category:SplunkEvaluationResults
OTEL_INSTRUMENTATION_GENAI_EVALS_RESULTS_AGGREGATION=true
OTEL_INSTRUMENTATION_GENAI_DEBUG=true
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Use Python 3.12 as base image
FROM python:3.12-slim

# Set working directory
WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
&& rm -rf /var/lib/apt/lists/*

# Copy the entire instrumentation-genai and util directories to maintain package paths
# Build context is at repo root to access both instrumentation-genai/ and util/
COPY instrumentation-genai/opentelemetry-instrumentation-langchain /app/opentelemetry-instrumentation-langchain
COPY util/opentelemetry-util-genai /app/opentelemetry-util-genai
COPY util/opentelemetry-util-genai-traceloop-translator /app/opentelemetry-util-genai-traceloop-translator
COPY util/opentelemetry-util-genai-evals /app/opentelemetry-util-genai-evals
COPY util/opentelemetry-util-genai-evals-deepeval /app/opentelemetry-util-genai-evals-deepeval
COPY util/opentelemetry-util-genai-emitters-splunk /app/opentelemetry-util-genai-emitters-splunk

# Set working directory to the example
WORKDIR /app/opentelemetry-instrumentation-langchain/examples/multi_agent_travel_planner

# Install Python dependencies from requirements.traceloop.txt (excluding local -e packages)
# First, create a temporary requirements file without the local editable packages
RUN grep -v "^-e \.\." requirements.traceloop.txt > /tmp/requirements_external.txt && \
pip install --no-cache-dir -r /tmp/requirements_external.txt && \
rm /tmp/requirements_external.txt

# Install the local packages in editable mode
# The Traceloop translator will enable zero-code instrumentation via .pth file
RUN cd /app/opentelemetry-util-genai && \
pip install --no-cache-dir --no-deps -e . && \
cd /app/opentelemetry-util-genai-evals && \
pip install --no-cache-dir --no-deps -e . && \
cd /app/opentelemetry-util-genai-evals-deepeval && \
pip install --no-cache-dir --no-deps -e . && \
cd /app/opentelemetry-util-genai-emitters-splunk && \
pip install --no-cache-dir --no-deps -e . && \
cd /app/opentelemetry-instrumentation-langchain && \
pip install --no-cache-dir --no-deps -e . && \
cd /app/opentelemetry-util-genai-traceloop-translator && \
pip install --no-cache-dir --no-deps -e .

# Verify packages are installed correctly
RUN python3 -c "from opentelemetry.util.genai.handler import get_telemetry_handler; print('✓ GenAI handler available')" && \
python3 -c "from opentelemetry.util.genai.evals import create_evaluation_manager; print('✓ Evaluation manager available')" && \
python3 -c "import opentelemetry.util.genai.emitters.splunk; print('✓ Splunk emitters available')" && \
python3 -c "import opentelemetry.util.evaluator.deepeval; print('✓ Deepeval evaluator module available')" && \
python3 -c "import deepeval; print('✓ Deepeval SDK installed')" && \
python3 -c "from opentelemetry.util.genai.traceloop import enable_traceloop_translator; print('✓ Traceloop translator available')"

# Make the script executable
RUN chmod +x main_traceloop.py

# Set default environment variables for OpenTelemetry
ENV OTEL_PYTHON_LOG_CORRELATION=true \
OTEL_PYTHON_LOG_LEVEL=info \
OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf \
PYTHONUNBUFFERED=1

# Health check (optional)
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD python3 -c "import sys; sys.exit(0)"

# Run the Traceloop version
CMD ["python3", "main_traceloop.py"]

Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
apiVersion: batch/v1
kind: CronJob
metadata:
name: travel-planner-tl
namespace: o11y-4-ai-admehra
labels:
app: travel-planner-tl
component: telemetry
annotations:
description: "Multi-agent travel planner with Traceloop translator and GenAI evaluations (Deepeval telemetry disabled)"
git-commit: "30c512d"
spec:
# Run every 30 minutes from 8 AM to 5 PM PST on weekdays (Monday-Friday)
schedule: "*/30 8-17 * * 1-5"
timeZone: "America/Los_Angeles"
suspend: false

# Keep last 3 successful and 1 failed job for debugging
successfulJobsHistoryLimit: 3
failedJobsHistoryLimit: 1

jobTemplate:
metadata:
labels:
app: travel-planner-tl
component: telemetry
spec:
template:
metadata:
labels:
app: travel-planner-tl
component: telemetry
spec:
restartPolicy: OnFailure

containers:
- name: travel-planner-traceloop
# Multi-platform image (amd64, arm64) with git commit hash tag
image: admehra621/travel-planner-tl:30c512d
imagePullPolicy: Always

env:
# === GenAI Semantic Conventions (REQUIRED) ===
- name: OTEL_SEMCONV_STABILITY_OPT_IN
value: "gen_ai_latest_experimental"

# === OpenTelemetry Resource Attributes ===
- name: OTEL_RESOURCE_ATTRIBUTES
value: "deployment.environment=o11y-inframon-ai,git.commit.id=30c512d"

# === Service name for telemetry ===
- name: OTEL_SERVICE_NAME
value: "travel-planner-tl"

# === OpenAI Configuration ===
- name: OPENAI_API_KEY
valueFrom:
secretKeyRef:
name: openai-credentials
key: api-key

- name: OPENAI_MODEL
value: "gpt-4o-mini"

# === Deepeval Telemetry Opt-Out ===
- name: DEEPEVAL_TELEMETRY_OPT_OUT
value: "1"

# === GenAI Content Capture ===
- name: OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT
value: "true"

- name: OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT_MODE
value: "SPAN_AND_EVENT"

# === GenAI Emitters Configuration ===
- name: OTEL_INSTRUMENTATION_GENAI_EMITTERS
value: "span_metric_event,splunk"

- name: OTEL_INSTRUMENTATION_GENAI_EMITTERS_EVALUATION
value: "replace-category:SplunkEvaluationResults"

# === Evaluation Settings ===
# All 5 default evaluations enabled (bias, toxicity, relevance, hallucination, sentiment)
- name: OTEL_INSTRUMENTATION_GENAI_EVALS_RESULTS_AGGREGATION
value: "true"

# === GenAI Debug Flags (disabled for production) ===
# - name: OTEL_GENAI_EVAL_DEBUG_SKIPS
# value: "true"
# - name: OTEL_GENAI_EVAL_DEBUG_EACH
# value: "true"
# - name: OTEL_INSTRUMENTATION_GENAI_DEBUG
# value: "true"

# === OpenTelemetry Logs Exporter ===
- name: OTEL_LOGS_EXPORTER
value: "otlp"

# === Get the host IP for Splunk OTEL agent ===
- name: SPLUNK_OTEL_AGENT
valueFrom:
fieldRef:
fieldPath: status.hostIP

# === OpenTelemetry OTLP endpoint using Splunk agent ===
- name: OTEL_EXPORTER_OTLP_ENDPOINT
value: "http://$(SPLUNK_OTEL_AGENT):4317"

# === OTLP Protocol (grpc) ===
- name: OTEL_EXPORTER_OTLP_PROTOCOL
value: "grpc"

# === Exclude health check URLs ===
- name: OTEL_PYTHON_EXCLUDED_URLS
value: "^(https?://)?[^/]+(/)?$"

# === Enable Python logging auto instrumentation ===
- name: OTEL_PYTHON_LOGGING_AUTO_INSTRUMENTATION_ENABLED
value: "true"

# === Enable log correlation ===
- name: OTEL_PYTHON_LOG_CORRELATION
value: "true"

# === Enable LangChain content capture ===
- name: OTEL_INSTRUMENTATION_LANGCHAIN_CAPTURE_MESSAGE_CONTENT
value: "true"

# === Enable Splunk profiler ===
- name: SPLUNK_PROFILER_ENABLED
value: "true"

# === Unbuffered Python output ===
- name: PYTHONUNBUFFERED
value: "1"

# === GenAI evaluation sampling rate ===
- name: OTEL_GENAI_EVALUATION_SAMPLING_RATE
value: "1"

# === Resource limits ===
resources:
requests:
memory: "512Mi"
cpu: "500m"
limits:
memory: "1Gi"
cpu: "1000m"

Loading
Loading