Skip to content

Commit a2ca6af

Browse files
authored
Add Support for Detecting Synthetic Source (#3674)
1 parent 185502b commit a2ca6af

File tree

11 files changed

+597
-3
lines changed

11 files changed

+597
-3
lines changed

CHANGELOG.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1111
1212
## Unreleased
1313

14+
- `opentelemetry-instrumentation-requests`, `opentelemetry-instrumentation-wsgi`, `opentelemetry-instrumentation-asgi` Detect synthetic sources on requests, ASGI, and WSGI.
15+
([#3674](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3674))
16+
1417
### Added
1518

1619
- `opentelemetry-instrumentation-aiohttp-client`: add support for url exclusions via `OTEL_PYTHON_EXCLUDED_URLS` / `OTEL_PYTHON_AIOHTTP_CLIENT_EXCLUDED_URLS`

instrumentation/opentelemetry-instrumentation-asgi/src/opentelemetry/instrumentation/asgi/__init__.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,9 @@ def client_response_hook(span: Span, scope: Scope, message: dict[str, Any]):
258258
from opentelemetry.instrumentation.utils import _start_internal_or_server_span
259259
from opentelemetry.metrics import get_meter
260260
from opentelemetry.propagators.textmap import Getter, Setter
261+
from opentelemetry.semconv._incubating.attributes.user_agent_attributes import (
262+
USER_AGENT_SYNTHETIC_TYPE,
263+
)
261264
from opentelemetry.semconv._incubating.metrics.http_metrics import (
262265
create_http_server_active_requests,
263266
create_http_server_request_body_size,
@@ -276,6 +279,7 @@ def client_response_hook(span: Span, scope: Scope, message: dict[str, Any]):
276279
ExcludeList,
277280
SanitizeValue,
278281
_parse_url_query,
282+
detect_synthetic_user_agent,
279283
get_custom_headers,
280284
normalise_request_header_name,
281285
normalise_response_header_name,
@@ -397,7 +401,13 @@ def collect_request_attributes(
397401
)
398402
http_user_agent = asgi_getter.get(scope, "user-agent")
399403
if http_user_agent:
400-
_set_http_user_agent(result, http_user_agent[0], sem_conv_opt_in_mode)
404+
user_agent_value = http_user_agent[0]
405+
_set_http_user_agent(result, user_agent_value, sem_conv_opt_in_mode)
406+
407+
# Check for synthetic user agent type
408+
synthetic_type = detect_synthetic_user_agent(user_agent_value)
409+
if synthetic_type:
410+
result[USER_AGENT_SYNTHETIC_TYPE] = synthetic_type
401411

402412
if "client" in scope and scope["client"] is not None:
403413
_set_http_peer_ip_server(

instrumentation/opentelemetry-instrumentation-asgi/tests/test_asgi_middleware.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@
4242
HistogramDataPoint,
4343
NumberDataPoint,
4444
)
45+
from opentelemetry.semconv._incubating.attributes.user_agent_attributes import (
46+
USER_AGENT_SYNTHETIC_TYPE,
47+
)
4548
from opentelemetry.semconv.attributes.client_attributes import (
4649
CLIENT_ADDRESS,
4750
CLIENT_PORT,
@@ -883,6 +886,145 @@ def update_expected_user_agent(expected):
883886
new_sem_conv=True,
884887
)
885888

889+
async def test_user_agent_synthetic_bot_detection(self):
890+
"""Test that bot user agents are detected as synthetic with type 'bot'"""
891+
test_cases = [
892+
b"Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)",
893+
b"Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)",
894+
b"googlebot/1.0",
895+
b"bingbot/1.0",
896+
]
897+
898+
# Test each user agent case separately to avoid span accumulation
899+
for user_agent in test_cases:
900+
with self.subTest(user_agent=user_agent):
901+
# Clear headers first
902+
self.scope["headers"] = []
903+
904+
def update_expected_synthetic_bot(
905+
expected, ua: bytes = user_agent
906+
):
907+
expected[3]["attributes"].update(
908+
{
909+
SpanAttributes.HTTP_USER_AGENT: ua.decode("utf8"),
910+
USER_AGENT_SYNTHETIC_TYPE: "bot",
911+
}
912+
)
913+
return expected
914+
915+
self.scope["headers"].append([b"user-agent", user_agent])
916+
app = otel_asgi.OpenTelemetryMiddleware(simple_asgi)
917+
self.seed_app(app)
918+
await self.send_default_request()
919+
outputs = await self.get_all_output()
920+
self.validate_outputs(
921+
outputs, modifiers=[update_expected_synthetic_bot]
922+
)
923+
924+
# Clear spans after each test case to prevent accumulation
925+
self.memory_exporter.clear()
926+
927+
async def test_user_agent_synthetic_test_detection(self):
928+
"""Test that test user agents are detected as synthetic with type 'test'"""
929+
test_cases = [
930+
b"alwayson/1.0",
931+
b"AlwaysOn/2.0",
932+
b"test-alwayson-client",
933+
]
934+
935+
# Test each user agent case separately to avoid span accumulation
936+
for user_agent in test_cases:
937+
with self.subTest(user_agent=user_agent):
938+
# Clear headers first
939+
self.scope["headers"] = []
940+
941+
def update_expected_synthetic_test(
942+
expected, ua: bytes = user_agent
943+
):
944+
expected[3]["attributes"].update(
945+
{
946+
SpanAttributes.HTTP_USER_AGENT: ua.decode("utf8"),
947+
USER_AGENT_SYNTHETIC_TYPE: "test",
948+
}
949+
)
950+
return expected
951+
952+
self.scope["headers"].append([b"user-agent", user_agent])
953+
app = otel_asgi.OpenTelemetryMiddleware(simple_asgi)
954+
self.seed_app(app)
955+
await self.send_default_request()
956+
outputs = await self.get_all_output()
957+
self.validate_outputs(
958+
outputs, modifiers=[update_expected_synthetic_test]
959+
)
960+
961+
# Clear spans after each test case to prevent accumulation
962+
self.memory_exporter.clear()
963+
964+
async def test_user_agent_non_synthetic(self):
965+
"""Test that normal user agents are not marked as synthetic"""
966+
test_cases = [
967+
b"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
968+
b"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15",
969+
b"PostmanRuntime/7.28.4",
970+
b"curl/7.68.0",
971+
]
972+
973+
# Test each user agent case separately to avoid span accumulation
974+
for user_agent in test_cases:
975+
with self.subTest(user_agent=user_agent):
976+
# Clear headers first
977+
self.scope["headers"] = []
978+
979+
def update_expected_non_synthetic(
980+
expected, ua: bytes = user_agent
981+
):
982+
# Should only have the user agent, not synthetic type
983+
expected[3]["attributes"].update(
984+
{
985+
SpanAttributes.HTTP_USER_AGENT: ua.decode("utf8"),
986+
}
987+
)
988+
return expected
989+
990+
self.scope["headers"].append([b"user-agent", user_agent])
991+
app = otel_asgi.OpenTelemetryMiddleware(simple_asgi)
992+
self.seed_app(app)
993+
await self.send_default_request()
994+
outputs = await self.get_all_output()
995+
self.validate_outputs(
996+
outputs, modifiers=[update_expected_non_synthetic]
997+
)
998+
999+
# Clear spans after each test case to prevent accumulation
1000+
self.memory_exporter.clear()
1001+
1002+
async def test_user_agent_synthetic_new_semconv(self):
1003+
"""Test synthetic user agent detection with new semantic conventions"""
1004+
user_agent = b"Mozilla/5.0 (compatible; Googlebot/2.1)"
1005+
1006+
def update_expected_synthetic_new_semconv(expected):
1007+
expected[3]["attributes"].update(
1008+
{
1009+
USER_AGENT_ORIGINAL: user_agent.decode("utf8"),
1010+
USER_AGENT_SYNTHETIC_TYPE: "bot",
1011+
}
1012+
)
1013+
return expected
1014+
1015+
self.scope["headers"] = []
1016+
self.scope["headers"].append([b"user-agent", user_agent])
1017+
app = otel_asgi.OpenTelemetryMiddleware(simple_asgi)
1018+
self.seed_app(app)
1019+
await self.send_default_request()
1020+
outputs = await self.get_all_output()
1021+
self.validate_outputs(
1022+
outputs,
1023+
modifiers=[update_expected_synthetic_new_semconv],
1024+
old_sem_conv=False,
1025+
new_sem_conv=True,
1026+
)
1027+
8861028
async def test_traceresponse_header(self):
8871029
"""Test a traceresponse header is sent when a global propagator is set."""
8881030

instrumentation/opentelemetry-instrumentation-requests/src/opentelemetry/instrumentation/requests/__init__.py

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,11 +132,17 @@ def response_hook(span, request_obj, response):
132132
)
133133
from opentelemetry.metrics import Histogram, get_meter
134134
from opentelemetry.propagate import inject
135+
from opentelemetry.semconv._incubating.attributes.user_agent_attributes import (
136+
USER_AGENT_SYNTHETIC_TYPE,
137+
)
135138
from opentelemetry.semconv.attributes.error_attributes import ERROR_TYPE
136139
from opentelemetry.semconv.attributes.network_attributes import (
137140
NETWORK_PEER_ADDRESS,
138141
NETWORK_PEER_PORT,
139142
)
143+
from opentelemetry.semconv.attributes.user_agent_attributes import (
144+
USER_AGENT_ORIGINAL,
145+
)
140146
from opentelemetry.semconv.metrics import MetricInstruments
141147
from opentelemetry.semconv.metrics.http_metrics import (
142148
HTTP_CLIENT_REQUEST_DURATION,
@@ -145,6 +151,7 @@ def response_hook(span, request_obj, response):
145151
from opentelemetry.trace.span import Span
146152
from opentelemetry.util.http import (
147153
ExcludeList,
154+
detect_synthetic_user_agent,
148155
get_excluded_urls,
149156
parse_excluded_urls,
150157
redact_url,
@@ -243,6 +250,15 @@ def get_or_create_headers():
243250
)
244251
_set_http_url(span_attributes, url, sem_conv_opt_in_mode)
245252

253+
# Check for synthetic user agent type
254+
headers = get_or_create_headers()
255+
user_agent = headers.get("User-Agent")
256+
synthetic_type = detect_synthetic_user_agent(user_agent)
257+
if synthetic_type:
258+
span_attributes[USER_AGENT_SYNTHETIC_TYPE] = synthetic_type
259+
if user_agent:
260+
span_attributes[USER_AGENT_ORIGINAL] = user_agent
261+
246262
metric_labels = {}
247263
_set_http_method(
248264
metric_labels,
@@ -297,7 +313,6 @@ def get_or_create_headers():
297313
if callable(request_hook):
298314
request_hook(span, request)
299315

300-
headers = get_or_create_headers()
301316
inject(headers)
302317

303318
with suppress_http_instrumentation():

instrumentation/opentelemetry-instrumentation-requests/tests/test_requests_integration.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,9 @@
6363
SERVER_PORT,
6464
)
6565
from opentelemetry.semconv.attributes.url_attributes import URL_FULL
66+
from opentelemetry.semconv.attributes.user_agent_attributes import (
67+
USER_AGENT_ORIGINAL,
68+
)
6669
from opentelemetry.test.mock_textmap import MockTextMapPropagator
6770
from opentelemetry.test.test_base import TestBase
6871
from opentelemetry.trace import StatusCode
@@ -175,6 +178,7 @@ def test_basic(self):
175178
HTTP_METHOD: "GET",
176179
HTTP_URL: self.URL,
177180
HTTP_STATUS_CODE: 200,
181+
USER_AGENT_ORIGINAL: "python-requests/2.32.3",
178182
},
179183
)
180184

@@ -211,6 +215,7 @@ def test_basic_new_semconv(self):
211215
NETWORK_PROTOCOL_VERSION: "1.1",
212216
SERVER_PORT: 80,
213217
NETWORK_PEER_PORT: 80,
218+
USER_AGENT_ORIGINAL: "python-requests/2.32.3",
214219
},
215220
)
216221

@@ -253,6 +258,7 @@ def test_basic_both_semconv(self):
253258
NETWORK_PROTOCOL_VERSION: "1.1",
254259
SERVER_PORT: 80,
255260
NETWORK_PEER_PORT: 80,
261+
USER_AGENT_ORIGINAL: "python-requests/2.32.3",
256262
},
257263
)
258264

@@ -276,6 +282,7 @@ def test_nonstandard_http_method(self):
276282
HTTP_METHOD: "_OTHER",
277283
HTTP_URL: self.URL,
278284
HTTP_STATUS_CODE: 405,
285+
USER_AGENT_ORIGINAL: "python-requests/2.32.3",
279286
},
280287
)
281288

@@ -300,6 +307,7 @@ def test_nonstandard_http_method_new_semconv(self):
300307
NETWORK_PROTOCOL_VERSION: "1.1",
301308
ERROR_TYPE: "405",
302309
HTTP_REQUEST_METHOD_ORIGINAL: "NONSTANDARD",
310+
USER_AGENT_ORIGINAL: "python-requests/2.32.3",
303311
},
304312
)
305313
self.assertIs(span.status.status_code, trace.StatusCode.ERROR)
@@ -534,6 +542,7 @@ def response_hook(
534542
HTTP_URL: self.URL,
535543
HTTP_STATUS_CODE: 200,
536544
"http.response.body": "Hello!",
545+
USER_AGENT_ORIGINAL: "python-requests/2.32.3",
537546
},
538547
)
539548

@@ -564,6 +573,7 @@ def test_requests_exception_without_response(self, *_, **__):
564573
{
565574
HTTP_METHOD: "GET",
566575
HTTP_URL: self.URL,
576+
USER_AGENT_ORIGINAL: "python-requests/2.32.3",
567577
},
568578
)
569579
self.assertEqual(span.status.status_code, StatusCode.ERROR)
@@ -591,6 +601,7 @@ def test_requests_exception_new_semconv(self, *_, **__):
591601
NETWORK_PEER_PORT: 80,
592602
NETWORK_PEER_ADDRESS: "mock",
593603
ERROR_TYPE: "RequestException",
604+
USER_AGENT_ORIGINAL: "python-requests/2.32.3",
594605
},
595606
)
596607
self.assertEqual(span.status.status_code, StatusCode.ERROR)
@@ -613,6 +624,7 @@ def test_requests_exception_without_proper_response_type(self, *_, **__):
613624
{
614625
HTTP_METHOD: "GET",
615626
HTTP_URL: self.URL,
627+
USER_AGENT_ORIGINAL: "python-requests/2.32.3",
616628
},
617629
)
618630
self.assertEqual(span.status.status_code, StatusCode.ERROR)
@@ -636,6 +648,7 @@ def test_requests_exception_with_response(self, *_, **__):
636648
HTTP_METHOD: "GET",
637649
HTTP_URL: self.URL,
638650
HTTP_STATUS_CODE: 500,
651+
USER_AGENT_ORIGINAL: "python-requests/2.32.3",
639652
},
640653
)
641654
self.assertEqual(span.status.status_code, StatusCode.ERROR)
@@ -675,6 +688,7 @@ def test_adapter_with_custom_response(self):
675688
"http.method": "GET",
676689
"http.url": self.URL,
677690
"http.status_code": 210,
691+
USER_AGENT_ORIGINAL: "python-requests/2.32.3",
678692
},
679693
)
680694

0 commit comments

Comments
 (0)