Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
37338c5
Fix CI test order - install util-genai before dependent packages
shuningc Nov 5, 2025
b87d5aa
Fix CI: Install all packages before running tests
shuningc Nov 5, 2025
2de5993
Fix remaining undefined logger references in handler.py
shuningc Nov 5, 2025
a81aed8
Fix test mocking path for load_completion_callbacks
shuningc Nov 5, 2025
e97b0e6
Fix emitters-splunk: use event_logger instead of content_logger in fa…
shuningc Nov 5, 2025
0087986
Fix test: correct evaluation metric name from toxicity_v1 to toxicity/v1
shuningc Nov 5, 2025
718ce0f
Fix remaining mock paths and dynamic aggregation test in util-genai-e…
shuningc Nov 5, 2025
55f352d
Fix CI test order - install util-genai before dependent packages
shuningc Nov 5, 2025
7d1487a
Fix CI: Install all packages before running tests
shuningc Nov 5, 2025
e1549b5
Add test dependencies for langchain instrumentation
shuningc Nov 7, 2025
b771c53
Upgrade flaky to >=3.8.1 for pytest 7.4.4 compatibility
shuningc Nov 7, 2025
e849bb1
Fix deepeval test patching issues
shuningc Nov 7, 2025
03df5b5
Fix langchain instrumentation callback handler tests
shuningc Nov 7, 2025
2a5cdb6
Fix langchain instrumentation test isolation issue - unwrap all wrapp…
shuningc Nov 7, 2025
a9db044
Add response model extraction to callback_handler
shuningc Nov 7, 2025
b3a4d7e
Fix deepeval compatibility: make CacheConfig optional
shuningc Nov 7, 2025
ded7636
Upgrade deepeval minimum version to 3.7.0
shuningc Nov 7, 2025
0046bed
Add CacheConfig to test stubs for deepeval
shuningc Nov 7, 2025
3a4f091
Add runtime check for deepeval module availability
shuningc Nov 7, 2025
8787ad3
Apply code formatting (auto-formatter cleanup)
shuningc Nov 7, 2025
46ad5ed
Merge main into unitTestFix
shuningc Nov 7, 2025
590da35
Fix ruff linting issues
shuningc Nov 7, 2025
f965210
Apply ruff formatting
shuningc Nov 7, 2025
7743992
Remove CacheConfig import from deepeval_runner
shuningc Nov 7, 2025
faf2786
Complete CacheConfig cleanup and fix flaky test
shuningc Nov 7, 2025
455b44e
Revert production code to main branch
shuningc Nov 7, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 39 additions & 31 deletions .github/workflows/ci-main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,43 +22,51 @@ jobs:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
python-version: ['3.10', '3.11', '3.12', '3.13']
python-version: ["3.10", "3.11", "3.12", "3.13"]

steps:
- uses: actions/checkout@v5
- uses: actions/checkout@v5

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}
- name: Enable long paths on Windows
if: runner.os == 'Windows'
run: |
git config --system core.longpaths true

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest==7.4.4 pytest-cov==4.1.0
pip install -r dev-genai-requirements.txt
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python-version }}

- name: Run tests - opentelemetry-util-genai-emitters-splunk
run: |
pip install -e util/opentelemetry-util-genai-emitters-splunk --no-deps
python -m pytest util/opentelemetry-util-genai-emitters-splunk/tests/ -v
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest==7.4.4 pytest-cov==4.1.0
pip install -r dev-genai-requirements.txt

- name: Run tests - opentelemetry-util-genai-evals
run: |
pip install -e util/opentelemetry-util-genai-evals --no-deps
python -m pytest util/opentelemetry-util-genai-evals/tests/ -v
- name: Install all genai packages
run: |
pip install -e util/opentelemetry-util-genai --no-deps
pip install -e util/opentelemetry-util-genai-evals --no-deps
pip install -e util/opentelemetry-util-genai-evals-deepeval --no-deps
pip install -e util/opentelemetry-util-genai-emitters-splunk --no-deps
pip install -e "instrumentation-genai/opentelemetry-instrumentation-langchain[instruments,test]"

- name: Run tests - opentelemetry-util-genai-evals-deepeval
run: |
pip install -e util/opentelemetry-util-genai-evals-deepeval --no-deps
python -m pytest util/opentelemetry-util-genai-evals-deepeval/tests/ -v
- name: Run tests - opentelemetry-util-genai
run: |
python -m pytest util/opentelemetry-util-genai/tests/ -v --cov=opentelemetry.util.genai --cov-report=term-missing
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick, why do we add these options for opentelemetry-util-genai testing only?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Testing it separately ensures core functionality works independently before testing dependent packages


- name: Run tests - opentelemetry-instrumentation-langchain
run: |
pip install -e instrumentation-genai/opentelemetry-instrumentation-langchain --no-deps
python -m pytest instrumentation-genai/opentelemetry-instrumentation-langchain/tests/ -v
- name: Run tests - opentelemetry-util-genai-emitters-splunk
run: |
python -m pytest util/opentelemetry-util-genai-emitters-splunk/tests/ -v

- name: Run tests - opentelemetry-util-genai
run: |
pip install -e util/opentelemetry-util-genai --no-deps
python -m pytest util/opentelemetry-util-genai/tests/ -v --cov=opentelemetry.util.genai --cov-report=term-missing
- name: Run tests - opentelemetry-util-genai-evals
run: |
python -m pytest util/opentelemetry-util-genai-evals/tests/ -v

- name: Run tests - opentelemetry-util-genai-evals-deepeval
run: |
python -m pytest util/opentelemetry-util-genai-evals-deepeval/tests/ -v

- name: Run tests - opentelemetry-instrumentation-langchain
run: |
python -m pytest instrumentation-genai/opentelemetry-instrumentation-langchain/tests/ -v
2 changes: 1 addition & 1 deletion dev-genai-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ markupsafe>=2.0.1
codespell==2.1.0
requests==2.32.3
ruamel.yaml==0.17.21
flaky==3.7.0
flaky>=3.8.1
pre-commit==3.7.0; python_version >= '3.9'
pre-commit==3.5.0; python_version < '3.9'
ruff==0.6.9
Expand Down
2 changes: 1 addition & 1 deletion dev-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ markupsafe>=2.0.1
codespell==2.1.0
requests==2.32.4
ruamel.yaml==0.17.21
flaky==3.7.0
flaky>=3.8.1
pre-commit==3.7.0; python_version >= '3.9'
pre-commit==3.5.0; python_version < '3.9'
ruff==0.6.9
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,14 @@ dependencies = [
instruments = [
"langchain >= 0.3.21",
]
test = [
"langchain-core >= 1.0.0",
"langchain-openai >= 1.0.0",
"pytest-recording >= 0.13.0",
"vcrpy >= 7.0.0",
"pyyaml >= 6.0.0",
"flaky >= 3.8.1",
]

[project.entry-points.opentelemetry_instrumentor]
langchain = "opentelemetry.instrumentation.langchain:LangChainInstrumentor"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def chatOpenAI_client():
return ChatOpenAI()


@pytest.fixture(scope="module")
@pytest.fixture(scope="function")
def vcr_config():
return {
"filter_headers": [
Expand All @@ -149,9 +149,19 @@ def vcr_config():
],
"decode_compressed_response": True,
"before_record_response": scrub_response_headers,
"serializer": "yaml",
}


@pytest.fixture(scope="session")
def vcr_cassette_dir():
"""Override the default cassette directory to avoid nested subdirectories."""
import os

# Return the cassettes directory path
return os.path.join(os.path.dirname(__file__), "cassettes")


@pytest.fixture(scope="function")
def instrument_no_content(tracer_provider, event_logger_provider, meter_provider):
if LangChainInstrumentor is None: # pragma: no cover - skip when dependency missing
Expand All @@ -175,16 +185,35 @@ def instrument_with_content(tracer_provider, event_logger_provider, meter_provid
if LangChainInstrumentor is None: # pragma: no cover
pytest.skip("opentelemetry-instrumentation-langchain not available")
set_prompt_capture_enabled(True)

# Reset util-genai singleton handler to ensure clean state
import opentelemetry.util.genai.handler as _util_handler_mod # noqa: PLC0415

if hasattr(_util_handler_mod.get_telemetry_handler, "_default_handler"):
setattr(_util_handler_mod.get_telemetry_handler, "_default_handler", None)

# Create new instrumentor for each test
instrumentor = LangChainInstrumentor()

# If already instrumented (from previous test), uninstrument first
if instrumentor._is_instrumented_by_opentelemetry:
instrumentor.uninstrument()

instrumentor.instrument(
tracer_provider=tracer_provider,
event_logger_provider=event_logger_provider,
meter_provider=meter_provider,
)

yield instrumentor

set_prompt_capture_enabled(True)
instrumentor.uninstrument()
# Clean up: uninstrument and reset singleton
if instrumentor._is_instrumented_by_opentelemetry:
instrumentor.uninstrument()

if hasattr(_util_handler_mod.get_telemetry_handler, "_default_handler"):
setattr(_util_handler_mod.get_telemetry_handler, "_default_handler", None)


@pytest.fixture(scope="function")
Expand Down Expand Up @@ -222,21 +251,37 @@ def instrument_with_content_util(
OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT: "SPAN_ONLY", # util-genai content gate
}
)

# Reset singleton so new env vars are applied
import opentelemetry.util.genai.handler as _util_handler_mod # noqa: PLC0415

if hasattr(_util_handler_mod.get_telemetry_handler, "_default_handler"):
setattr(_util_handler_mod.get_telemetry_handler, "_default_handler", None)

# Create new instrumentor for each test
instrumentor = LangChainInstrumentor()

# If already instrumented (from previous test), uninstrument first
if instrumentor._is_instrumented_by_opentelemetry:
instrumentor.uninstrument()

instrumentor.instrument(
tracer_provider=tracer_provider,
event_logger_provider=event_logger_provider,
meter_provider=meter_provider,
)

yield instrumentor

os.environ.pop(OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, None)
set_prompt_capture_enabled(True)
instrumentor.uninstrument()

# Clean up: uninstrument and reset singleton
if instrumentor._is_instrumented_by_opentelemetry:
instrumentor.uninstrument()

if hasattr(_util_handler_mod.get_telemetry_handler, "_default_handler"):
setattr(_util_handler_mod.get_telemetry_handler, "_default_handler", None)


class LiteralBlockScalar(str):
Expand Down Expand Up @@ -305,6 +350,11 @@ def deserialize(cassette_string):

try: # pragma: no cover - optional pytest-vcr dependency
import pytest_recording # type: ignore # noqa: F401
import vcr as vcr_module # type: ignore # noqa: F401

# Register custom YAML serializer globally
vcr_module.VCR().register_serializer("yaml", PrettyPrintJSONBody)

except ModuleNotFoundError: # pragma: no cover - provide stub when plugin missing

@pytest.fixture(name="vcr", scope="module")
Expand All @@ -316,9 +366,10 @@ def register_serializer(self, *_args, **_kwargs):
return _VCRStub()


@pytest.fixture(scope="module", autouse=True)
@pytest.fixture(scope="function", autouse=True)
def fixture_vcr(vcr):
vcr.register_serializer("yaml", PrettyPrintJSONBody)
# When pytest-recording is installed, vcr is a Cassette and we don't need to do anything
# The serializer is already registered on the VCR module above
return vcr


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import sys
from pathlib import Path
from typing import Any, Optional, Tuple
from unittest.mock import MagicMock
from uuid import uuid4

import pytest
Expand All @@ -16,7 +15,6 @@
from opentelemetry.instrumentation.langchain.callback_handler import ( # noqa: E402
LangchainCallbackHandler,
)
from opentelemetry.sdk.trace import TracerProvider # noqa: E402
from opentelemetry.util.genai.types import Step, ToolCall # noqa: E402

try: # pragma: no cover - optional dependency in CI
Expand All @@ -42,6 +40,8 @@ def __init__(self) -> None:
self.started_steps = []
self.stopped_steps = []
self.failed_steps = []
self.started_workflows = []
self.stopped_workflows = []
self.entities: dict[str, Any] = {}

def start_agent(self, agent):
Expand Down Expand Up @@ -102,6 +102,28 @@ def fail_step(self, step, error):
self.entities.pop(str(step.run_id), None)
return step

def start_workflow(self, workflow):
self.started_workflows.append(workflow)
self.entities[str(workflow.run_id)] = workflow
return workflow

def stop_workflow(self, workflow):
self.stopped_workflows.append(workflow)
self.entities.pop(str(workflow.run_id), None)
return workflow

def fail_workflow(self, workflow, error):
self.entities.pop(str(workflow.run_id), None)
return workflow

def fail_by_run_id(self, run_id, error):
# Simplified implementation for stub - just call fail_agent
entity = self.entities.get(str(run_id))
if entity is None:
return
# For simplicity, assume it's an agent
self.fail_agent(entity, error)

def get_entity(self, run_id):
return self.entities.get(str(run_id))

Expand All @@ -110,12 +132,8 @@ def get_entity(self, run_id):
def _handler_with_stub_fixture() -> (
Tuple[LangchainCallbackHandler, _StubTelemetryHandler]
):
tracer = TracerProvider().get_tracer(__name__)
histogram = MagicMock()
histogram.record = MagicMock()
handler = LangchainCallbackHandler(tracer, histogram, histogram)
stub = _StubTelemetryHandler()
handler._handler = stub # type: ignore[attr-defined]
handler = LangchainCallbackHandler(telemetry_handler=stub)
return handler, stub


Expand Down Expand Up @@ -337,12 +355,8 @@ def test_step_outputs_recorded_on_chain_end(handler_with_stub):
@pytest.mark.skipif(not LANGCHAIN_CORE_AVAILABLE, reason="langchain_core not available")
def test_llm_attributes_independent_of_emitters(monkeypatch):
def _build_handler() -> Tuple[LangchainCallbackHandler, _StubTelemetryHandler]:
tracer = TracerProvider().get_tracer(__name__)
histogram = MagicMock()
histogram.record = MagicMock()
handler = LangchainCallbackHandler(tracer, histogram, histogram)
stub_handler = _StubTelemetryHandler()
handler._telemetry_handler = stub_handler # type: ignore[attr-defined]
handler = LangchainCallbackHandler(telemetry_handler=stub_handler)
return handler, stub_handler

def _invoke_with_env(env_value: Optional[str]):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -645,14 +645,14 @@ def splunk_emitters() -> list[EmitterSpec]:
def _conversation_factory(ctx: Any) -> SplunkConversationEventsEmitter:
capture_mode = getattr(ctx, "capture_event_content", False)
return SplunkConversationEventsEmitter(
event_logger=getattr(ctx, "content_logger", None),
event_logger=getattr(ctx, "event_logger", None),
capture_content=cast(bool, capture_mode),
)

def _evaluation_factory(ctx: Any) -> SplunkEvaluationResultsEmitter:
capture_mode = getattr(ctx, "capture_event_content", False)
return SplunkEvaluationResultsEmitter(
event_logger=getattr(ctx, "content_logger", None),
event_logger=getattr(ctx, "event_logger", None),
capture_content=cast(bool, capture_mode),
)

Expand Down
Loading