From 9e22828f3e0824c24aa7ffa9b834158ab7c082f4 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 24 Sep 2025 14:39:25 +0200 Subject: [PATCH 01/21] wip --- src/sentry/insights/__init__.py | 14 ++++---- .../consumers/process_segments/convert.py | 7 +++- .../consumers/process_segments/enrichment.py | 26 +++++++-------- .../consumers/process_segments/message.py | 19 ++++++----- .../spans/consumers/process_segments/shim.py | 32 +++++++++++-------- .../spans/consumers/process_segments/types.py | 13 +++++++- src/sentry/spans/grouping/strategy/base.py | 7 ++-- 7 files changed, 69 insertions(+), 49 deletions(-) diff --git a/src/sentry/insights/__init__.py b/src/sentry/insights/__init__.py index 5c5749e4a1821f..bb46b80dae20d0 100644 --- a/src/sentry/insights/__init__.py +++ b/src/sentry/insights/__init__.py @@ -26,15 +26,13 @@ def from_span_v1(cls, span: dict[str, Any]) -> "FilterSpan": ) @classmethod - def from_span_data(cls, data: dict[str, Any]) -> "FilterSpan": - """Get relevant fields from `span.data`. - - This will later be replaced by `from_span_attributes` or `from_span_v2`.""" + def from_span_attributes(cls, attributes: dict[str, Any]) -> "FilterSpan": + """Get relevant fields from `span.data`.""" return cls( - op=data.get("sentry.op"), - category=data.get("sentry.category"), - description=data.get("sentry.description"), - transaction_op=data.get("sentry.transaction_op"), + op=(attributes.get("sentry.op") or {}).get("value"), + category=attributes.get("sentry.category" or {}).get("value"), + description=attributes.get("sentry.description" or {}).get("value"), + transaction_op=attributes.get("sentry.transaction_op" or {}).get("value"), ) diff --git a/src/sentry/spans/consumers/process_segments/convert.py b/src/sentry/spans/consumers/process_segments/convert.py index faa93d2a40cf74..6d4381754f267f 100644 --- a/src/sentry/spans/consumers/process_segments/convert.py +++ b/src/sentry/spans/consumers/process_segments/convert.py @@ -37,7 +37,12 @@ def convert_span_to_item(span: CompatibleSpan) -> TraceItem: client_sample_rate = 1.0 server_sample_rate = 1.0 - for k, v in (span.get("data") or {}).items(): + for k, v in (span.get("attributes") or {}).items(): + try: + v = v["value"] + except Exception: + sentry_sdk.capture_exception() + continue if v is not None: try: attributes[k] = _anyvalue(v) diff --git a/src/sentry/spans/consumers/process_segments/enrichment.py b/src/sentry/spans/consumers/process_segments/enrichment.py index 91b358fc06f692..803aee20c58355 100644 --- a/src/sentry/spans/consumers/process_segments/enrichment.py +++ b/src/sentry/spans/consumers/process_segments/enrichment.py @@ -5,7 +5,7 @@ from sentry_kafka_schemas.schema_types.buffered_segments_v1 import SegmentSpan from sentry.performance_issues.types import SentryTags as PerformanceIssuesSentryTags -from sentry.spans.consumers.process_segments.types import EnrichedSpan, get_span_op +from sentry.spans.consumers.process_segments.types import EnrichedSpan, attribute_value, get_span_op # Keys of shared sentry attributes that are shared across all spans in a segment. This list # is taken from `extract_shared_tags` in Relay. @@ -79,16 +79,16 @@ def __init__(self, spans: list[SegmentSpan]) -> None: interval = _span_interval(span) self._span_map.setdefault(parent_span_id, []).append(interval) - def _data(self, span: SegmentSpan) -> dict[str, Any]: - ret = {**span.get("data", {})} + def _attributes(self, span: SegmentSpan) -> dict[str, Any]: + ret = {**span.get("attributes", {})} if self._segment_span is not None: # Assume that Relay has extracted the shared tags into `data` on the # root span. Once `sentry_tags` is removed, the logic from # `extract_shared_tags` should be moved here. - segment_fields = self._segment_span.get("data", {}) - shared_tags = {k: v for k, v in segment_fields.items() if k in SHARED_SENTRY_ATTRIBUTES} + segment_attrs = self._segment_span.get("attributes", {}) + shared_tags = {k: v for k, v in segment_attrs.items() if k in SHARED_SENTRY_ATTRIBUTES} - is_mobile = segment_fields.get("sentry.mobile") == "true" + is_mobile = attribute_value(span, "sentry.mobile") == "true" mobile_start_type = _get_mobile_start_type(self._segment_span) if is_mobile: @@ -157,10 +157,10 @@ def _exclusive_time(self, span: SegmentSpan) -> float: def enrich_span(self, span: SegmentSpan) -> EnrichedSpan: exclusive_time = self._exclusive_time(span) - data = self._data(span) + attributes = self._attributes(span) return { **span, - "data": data, + "attributes": attributes, "exclusive_time_ms": exclusive_time, } @@ -184,11 +184,11 @@ def _get_mobile_start_type(segment: SegmentSpan) -> str | None: Check the measurements on the span to determine what kind of start type the event is. """ - data = segment.get("data") or {} + attributes = segment.get("attributes") or {} - if "app_start_cold" in data: + if "app_start_cold" in attributes: return "cold" - if "app_start_warm" in data: + if "app_start_warm" in attributes: return "warm" return None @@ -215,7 +215,7 @@ def _us(timestamp: float) -> int: def compute_breakdowns( spans: Sequence[SegmentSpan], breakdowns_config: dict[str, dict[str, Any]], -) -> dict[str, float]: +) -> dict[str, Any]: """ Computes breakdowns from all spans and writes them to the segment span. @@ -234,7 +234,7 @@ def compute_breakdowns( continue for key, value in breakdowns.items(): - ret[f"{breakdown_name}.{key}"] = value + ret[f"{breakdown_name}.{key}"] = {"value": value} return ret diff --git a/src/sentry/spans/consumers/process_segments/message.py b/src/sentry/spans/consumers/process_segments/message.py index 870fd964ffee8c..e342e3233cf1d2 100644 --- a/src/sentry/spans/consumers/process_segments/message.py +++ b/src/sentry/spans/consumers/process_segments/message.py @@ -29,7 +29,7 @@ from sentry.signals import first_insight_span_received, first_transaction_received from sentry.spans.consumers.process_segments.enrichment import TreeEnricher, compute_breakdowns from sentry.spans.consumers.process_segments.shim import build_shim_event_data, make_compatible -from sentry.spans.consumers.process_segments.types import CompatibleSpan +from sentry.spans.consumers.process_segments.types import CompatibleSpan, attribute_value from sentry.spans.grouping.api import load_span_grouping_config from sentry.utils import metrics from sentry.utils.dates import to_datetime @@ -145,7 +145,7 @@ def _compute_breakdowns( ) -> None: config = project.get_option("sentry:breakdowns") breakdowns = compute_breakdowns(spans, config) - segment.setdefault("data", {}).update(breakdowns) + segment.setdefault("attributes", {}).update(breakdowns) @metrics.wraps("spans.consumers.process_segments.create_models") @@ -155,9 +155,9 @@ def _create_models(segment: CompatibleSpan, project: Project) -> None: relationships between them and the Project model. """ - environment_name = segment["data"].get("sentry.environment") - release_name = segment["data"].get("sentry.release") - dist_name = segment["data"].get("sentry.dist") + environment_name = attribute_value(segment, "sentry.environment") + release_name = attribute_value(segment, "sentry.release") + dist_name = attribute_value(segment, "sentry.dist") date = to_datetime(segment["end_timestamp_precise"]) environment = Environment.get_or_create(project=project, name=environment_name) @@ -248,13 +248,12 @@ def _detect_performance_problems( def _record_signals( segment_span: CompatibleSpan, spans: list[CompatibleSpan], project: Project ) -> None: - data = segment_span.get("data", {}) record_generic_event_processed( project, - platform=data.get("sentry.platform"), - release=data.get("sentry.release"), - environment=data.get("sentry.environment"), + platform=attribute_value(segment_span, "sentry.platform"), + release=attribute_value(segment_span, "sentry.release"), + environment=attribute_value(segment_span, "sentry.environment"), ) # signal expects an event like object with a datetime attribute @@ -268,7 +267,7 @@ def _record_signals( ) for module in insights_modules( - [FilterSpan.from_span_data(span.get("data", {})) for span in spans] + [FilterSpan.from_span_attributes(span.get("attributes") or {}) for span in spans] ): set_project_flag_and_signal( project, diff --git a/src/sentry/spans/consumers/process_segments/shim.py b/src/sentry/spans/consumers/process_segments/shim.py index 822efe657a7d6c..a6f1addfaf30ab 100644 --- a/src/sentry/spans/consumers/process_segments/shim.py +++ b/src/sentry/spans/consumers/process_segments/shim.py @@ -11,7 +11,12 @@ from sentry_kafka_schemas.schema_types.buffered_segments_v1 import _SentryExtractedTags from sentry.performance_issues.types import SentryTags as PerformanceIssuesSentryTags -from sentry.spans.consumers.process_segments.types import CompatibleSpan, EnrichedSpan, get_span_op +from sentry.spans.consumers.process_segments.types import ( + CompatibleSpan, + EnrichedSpan, + attribute_value, + get_span_op, +) from sentry.utils.dates import to_datetime @@ -25,7 +30,7 @@ def make_compatible(span: EnrichedSpan) -> CompatibleSpan: # logic. ret: CompatibleSpan = { **span, - "sentry_tags": _sentry_tags(span.get("data") or {}), + "sentry_tags": _sentry_tags(span.get("attributes") or {}), "op": get_span_op(span), # Note: Event protocol spans expect `exclusive_time` while EAP expects # `exclusive_time_ms`. Both are the same value in milliseconds @@ -35,7 +40,7 @@ def make_compatible(span: EnrichedSpan) -> CompatibleSpan: return ret -def _sentry_tags(data: dict[str, Any]) -> _SentryExtractedTags: +def _sentry_tags(attributes: dict[str, Any]) -> _SentryExtractedTags: """Backfill sentry tags used in performance issue detection. Once performance issue detection is only called from process_segments, @@ -44,11 +49,11 @@ def _sentry_tags(data: dict[str, Any]) -> _SentryExtractedTags: """ sentry_tags: _SentryExtractedTags = {} for tag_key in PerformanceIssuesSentryTags.__mutable_keys__: - data_key = ( + attribute_key = ( "sentry.normalized_description" if tag_key == "description" else f"sentry.{tag_key}" ) - if data_key in data: - sentry_tags[tag_key] = data[data_key] # type: ignore[literal-required] + if attribute_key in attributes: + sentry_tags[tag_key] = (attributes[attribute_key] or {}).get("value") return sentry_tags @@ -57,7 +62,6 @@ def build_shim_event_data( segment_span: CompatibleSpan, spans: list[CompatibleSpan] ) -> dict[str, Any]: """Create a shimmed event payload for performance issue detection.""" - data = segment_span.get("data", {}) event: dict[str, Any] = { "type": "transaction", @@ -66,19 +70,19 @@ def build_shim_event_data( "trace": { "trace_id": segment_span["trace_id"], "type": "trace", - "op": data.get("sentry.transaction.op"), + "op": attribute_value(segment_span, "sentry.transaction.op"), "span_id": segment_span["span_id"], "hash": segment_span["hash"], }, }, "event_id": uuid.uuid4().hex, "project_id": segment_span["project_id"], - "transaction": data.get("sentry.transaction"), - "release": data.get("sentry.release"), - "dist": data.get("sentry.dist"), - "environment": data.get("sentry.environment"), - "platform": data.get("sentry.platform"), - "tags": [["environment", data.get("sentry.environment")]], + "transaction": attribute_value(segment_span, "sentry.transaction"), + "release": attribute_value(segment_span, "sentry.release"), + "dist": attribute_value(segment_span, "sentry.dist"), + "environment": attribute_value(segment_span, "sentry.environment"), + "platform": attribute_value(segment_span, "sentry.platform"), + "tags": [["environment", attribute_value(segment_span, "sentry.environment")]], "received": segment_span["received"], "timestamp": segment_span["end_timestamp_precise"], "start_timestamp": segment_span["start_timestamp_precise"], diff --git a/src/sentry/spans/consumers/process_segments/types.py b/src/sentry/spans/consumers/process_segments/types.py index ba4d8864c8bca6..3a4155ccd3c0fc 100644 --- a/src/sentry/spans/consumers/process_segments/types.py +++ b/src/sentry/spans/consumers/process_segments/types.py @@ -1,7 +1,10 @@ from typing import NotRequired +import sentry_sdk from sentry_kafka_schemas.schema_types.buffered_segments_v1 import SegmentSpan +from sentry.spans.consumers.process_segments.convert import Any + # The default span.op to assume if it is missing on the span. This should be # normalized by Relay, but we defensively apply the same fallback as the op is # not guaranteed in typing. @@ -9,7 +12,7 @@ def get_span_op(span: SegmentSpan) -> str: - return span.get("data", {}).get("sentry.op") or DEFAULT_SPAN_OP + return attribute_value(span, "sentry.op") or DEFAULT_SPAN_OP class EnrichedSpan(SegmentSpan, total=True): @@ -31,3 +34,11 @@ class CompatibleSpan(EnrichedSpan, total=True): # Added by `SpanGroupingResults.write_to_spans` in `_enrich_spans` hash: NotRequired[str] + + +def attribute_value(span: SegmentSpan, key) -> Any: + attributes = span.get("attributes") or {} + try: + return attributes.get(key)["value"] + except Exception as e: + sentry_sdk.capture_exception(e) diff --git a/src/sentry/spans/grouping/strategy/base.py b/src/sentry/spans/grouping/strategy/base.py index 11db6068536d77..3b945a2b39a2f1 100644 --- a/src/sentry/spans/grouping/strategy/base.py +++ b/src/sentry/spans/grouping/strategy/base.py @@ -3,6 +3,7 @@ from dataclasses import dataclass from typing import Any, NotRequired, Optional, TypedDict +from sentry.spans.consumers.process_segments.types import attribute_value from sentry.spans.grouping.utils import Hash, parse_fingerprint_var from sentry.utils import urls @@ -55,8 +56,10 @@ def get_standalone_span_group(self, span: Span) -> str: # Treat the segment span like get_transaction_span_group for backwards # compatibility with transaction events, but fall back to default # fingerprinting if the span doesn't have a transaction. - data = span.get("data") or {} - if span.get("is_segment") and (transaction := data.get("sentry.transaction")) is not None: + if ( + span.get("is_segment") + and (transaction := attribute_value(span, "sentry.transaction")) is not None + ): result = Hash() result.update(transaction) return result.hexdigest() From 6bc5a043ef56c6902537c8eb3744c6438126ef54 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 25 Sep 2025 07:53:28 +0200 Subject: [PATCH 02/21] fix --- src/sentry/insights/__init__.py | 6 +- .../consumers/process_segments/convert.py | 32 +++---- .../spans/consumers/process_segments/types.py | 4 +- .../spans/consumers/process/__init__.py | 16 ++-- .../process_segments/test_convert.py | 89 ++++++++++--------- .../process_segments/test_enrichment.py | 30 ++++--- .../process_segments/test_message.py | 34 +++---- .../consumers/process_segments/test_shim.py | 2 +- 8 files changed, 109 insertions(+), 104 deletions(-) diff --git a/src/sentry/insights/__init__.py b/src/sentry/insights/__init__.py index bb46b80dae20d0..ebe0b0aadc3c04 100644 --- a/src/sentry/insights/__init__.py +++ b/src/sentry/insights/__init__.py @@ -30,9 +30,9 @@ def from_span_attributes(cls, attributes: dict[str, Any]) -> "FilterSpan": """Get relevant fields from `span.data`.""" return cls( op=(attributes.get("sentry.op") or {}).get("value"), - category=attributes.get("sentry.category" or {}).get("value"), - description=attributes.get("sentry.description" or {}).get("value"), - transaction_op=attributes.get("sentry.transaction_op" or {}).get("value"), + category=(attributes.get("sentry.category") or {}).get("value"), + description=(attributes.get("sentry.description") or {}).get("value"), + transaction_op=(attributes.get("sentry.transaction_op") or {}).get("value"), ) diff --git a/src/sentry/spans/consumers/process_segments/convert.py b/src/sentry/spans/consumers/process_segments/convert.py index 6d4381754f267f..805adebc63b998 100644 --- a/src/sentry/spans/consumers/process_segments/convert.py +++ b/src/sentry/spans/consumers/process_segments/convert.py @@ -39,26 +39,22 @@ def convert_span_to_item(span: CompatibleSpan) -> TraceItem: for k, v in (span.get("attributes") or {}).items(): try: - v = v["value"] + # NOTE: This ignores the `type` field of the attribute itself + value = v["value"] + attributes[k] = _anyvalue(value) except Exception: sentry_sdk.capture_exception() - continue - if v is not None: - try: - attributes[k] = _anyvalue(v) - except Exception: - sentry_sdk.capture_exception() - else: - if k == "sentry.client_sample_rate": - try: - client_sample_rate = float(v) - except ValueError: - pass - elif k == "sentry.server_sample_rate": - try: - server_sample_rate = float(v) - except ValueError: - pass + else: + if k == "sentry.client_sample_rate": + try: + client_sample_rate = float(value) + except ValueError: + pass + elif k == "sentry.server_sample_rate": + try: + server_sample_rate = float(value) + except ValueError: + pass for field_name, attribute_name in FIELD_TO_ATTRIBUTE.items(): v = span.get(field_name) diff --git a/src/sentry/spans/consumers/process_segments/types.py b/src/sentry/spans/consumers/process_segments/types.py index 3a4155ccd3c0fc..cf066a1f406bb8 100644 --- a/src/sentry/spans/consumers/process_segments/types.py +++ b/src/sentry/spans/consumers/process_segments/types.py @@ -1,10 +1,8 @@ -from typing import NotRequired +from typing import Any, NotRequired import sentry_sdk from sentry_kafka_schemas.schema_types.buffered_segments_v1 import SegmentSpan -from sentry.spans.consumers.process_segments.convert import Any - # The default span.op to assume if it is missing on the span. This should be # normalized by Relay, but we defensively apply the same fallback as the op is # not guaranteed in typing. diff --git a/tests/sentry/spans/consumers/process/__init__.py b/tests/sentry/spans/consumers/process/__init__.py index bb9b7365a51e0a..82b61db13781da 100644 --- a/tests/sentry/spans/consumers/process/__init__.py +++ b/tests/sentry/spans/consumers/process/__init__.py @@ -1,4 +1,4 @@ -def build_mock_span(project_id, span_op=None, is_segment=False, data=None, **kwargs): +def build_mock_span(project_id, *, span_op=None, is_segment=False, attributes=None, **kwargs): span = { "description": "OrganizationNPlusOne", "duration_ms": 107, @@ -11,12 +11,14 @@ def build_mock_span(project_id, span_op=None, is_segment=False, data=None, **kwa "received": 1707953019.044972, "retention_days": 90, "segment_id": "a49b42af9fb69da0", - "data": { - "sentry.environment": "development", - "sentry.release": "backend@24.2.0.dev0+699ce0cd1281cc3c7275d0a474a595375c769ae8", - "sentry.platform": "python", - "sentry.op": span_op or "base.dispatch.sleep", - **(data or {}), + "attributes": { + "sentry.environment": {"value": "development"}, + "sentry.release": { + "value": "backend@24.2.0.dev0+699ce0cd1281cc3c7275d0a474a595375c769ae8" + }, + "sentry.platform": {"value": "python"}, + "sentry.op": {"value": span_op or "base.dispatch.sleep"}, + **(attributes or {}), }, "span_id": "a49b42af9fb69da0", "start_timestamp_ms": 1707953018865, diff --git a/tests/sentry/spans/consumers/process_segments/test_convert.py b/tests/sentry/spans/consumers/process_segments/test_convert.py index 2329123fd8175e..739188b83d226d 100644 --- a/tests/sentry/spans/consumers/process_segments/test_convert.py +++ b/tests/sentry/spans/consumers/process_segments/test_convert.py @@ -16,48 +16,53 @@ "duration_ms": 152, "exclusive_time_ms": 0.228, "is_segment": True, - "data": { - "http.status_code": "200", - "my.array.field": [1, 2, ["nested", "array"]], - "my.dict.field": {"id": 42, "name": "test"}, - "my.false.bool.field": False, - "my.float.field": 101.2, - "my.int.field": 2000, - "my.neg.field": -100, - "my.neg.float.field": -101.2, - "my.true.bool.field": True, - "my.u64.field": 9447000002305251000, - "num_of_spans": 50.0, - "relay_endpoint_version": "3", - "relay_id": "88888888-4444-4444-8444-cccccccccccc", - "relay_no_cache": "False", - "relay_protocol_version": "3", - "relay_use_post_or_schedule": "True", - "relay_use_post_or_schedule_rejected": "version", - "sentry.category": "http", - "sentry.client_sample_rate": 0.1, - "sentry.environment": "development", - "sentry.normalized_description": "normalized_description", - "sentry.op": "http.server", - "sentry.platform": "python", - "sentry.release": "backend@24.7.0.dev0+c45b49caed1e5fcbf70097ab3f434b487c359b6b", - "sentry.sdk.name": "sentry.python.django", - "sentry.sdk.version": "2.7.0", - "sentry.segment.name": "/api/0/relays/projectconfigs/", - "sentry.server_sample_rate": 0.2, - "sentry.status": "ok", - "sentry.status_code": "200", - "sentry.thread.id": "8522009600", - "sentry.thread.name": "uWSGIWorker1Core0", - "sentry.trace.status": "ok", - "sentry.transaction": "/api/0/relays/projectconfigs/", - "sentry.transaction.method": "POST", - "sentry.transaction.op": "http.server", - "sentry.user": "ip:127.0.0.1", - "server_name": "D23CXQ4GK2.local", - "spans_over_limit": "False", - "thread.id": "8522009600", - "thread.name": "uWSGIWorker1Core0", + "attributes": { + "http.status_code": {"value": "200", "type": "string"}, + "my.array.field": {"value": [1, 2, ["nested", "array"]], "type": "array"}, + "my.dict.field": { + "value": {"id": {"value": 42, "name": {"value": "test"}, "type": "object"}} + }, + "my.false.bool.field": {"value": False, "type": "boolean"}, + "my.float.field": {"value": 101.2, "type": "double"}, + "my.int.field": {"value": 2000, "type": "integer"}, + "my.neg.field": {"value": -100, "type": "integer"}, + "my.neg.float.field": {"value": -101.2, "type": "double"}, + "my.true.bool.field": {"value": True, "type": "boolean"}, + "my.u64.field": {"value": 9447000002305251000, "type": "integer"}, + "num_of_spans": {"value": 50.0, "type": "string"}, + "relay_endpoint_version": {"value": "3", "type": "string"}, + "relay_id": {"value": "88888888-4444-4444-8444-cccccccccccc", "type": "string"}, + "relay_no_cache": {"value": "False", "type": "string"}, + "relay_protocol_version": {"value": "3", "type": "string"}, + "relay_use_post_or_schedule": {"value": "True", "type": "string"}, + "relay_use_post_or_schedule_rejected": {"value": "version", "type": "string"}, + "sentry.category": {"value": "http", "type": "string"}, + "sentry.client_sample_rate": {"value": 0.1, "type": "string"}, + "sentry.environment": {"value": "development", "type": "string"}, + "sentry.normalized_description": {"value": "normalized_description", "type": "string"}, + "sentry.op": {"value": "http.server", "type": "string"}, + "sentry.platform": {"value": "python", "type": "string"}, + "sentry.release": { + "value": "backend@24.7.0.dev0+c45b49caed1e5fcbf70097ab3f434b487c359b6b", + "type": "string", + }, + "sentry.sdk.name": {"value": "sentry.python.django", "type": "string"}, + "sentry.sdk.version": {"value": "2.7.0", "type": "string"}, + "sentry.segment.name": {"value": "/api/0/relays/projectconfigs/", "type": "string"}, + "sentry.server_sample_rate": {"value": 0.2, "type": "string"}, + "sentry.status": {"value": "ok", "type": "string"}, + "sentry.status_code": {"value": "200", "type": "string"}, + "sentry.thread.id": {"value": "8522009600", "type": "string"}, + "sentry.thread.name": {"value": "uWSGIWorker1Core0", "type": "string"}, + "sentry.trace.status": {"value": "ok", "type": "string"}, + "sentry.transaction": {"value": "/api/0/relays/projectconfigs/", "type": "string"}, + "sentry.transaction.method": {"value": "POST", "type": "string"}, + "sentry.transaction.op": {"value": "http.server", "type": "string"}, + "sentry.user": {"value": "ip:127.0.0.1", "type": "string"}, + "server_name": {"value": "D23CXQ4GK2.local", "type": "string"}, + "spans_over_limit": {"value": "False", "type": "string"}, + "thread.id": {"value": "8522009600", "type": "string"}, + "thread.name": {"value": "uWSGIWorker1Core0", "type": "string"}, }, "sentry_tags": {"ignored": "tags"}, "profile_id": "56c7d1401ea14ad7b4ac86de46baebae", diff --git a/tests/sentry/spans/consumers/process_segments/test_enrichment.py b/tests/sentry/spans/consumers/process_segments/test_enrichment.py index f11b5cb5a1f662..8c33f48209751e 100644 --- a/tests/sentry/spans/consumers/process_segments/test_enrichment.py +++ b/tests/sentry/spans/consumers/process_segments/test_enrichment.py @@ -369,10 +369,10 @@ def test_emit_ops_breakdown() -> None: _ = TreeEnricher.enrich_spans(spans) updates = compute_breakdowns(spans, breakdowns_config) - assert updates["span_ops.ops.http"] == 3600000.0 - assert updates["span_ops.ops.db"] == 7200000.0 - assert updates["span_ops_2.ops.http"] == 3600000.0 - assert updates["span_ops_2.ops.db"] == 7200000.0 + assert updates["span_ops.ops.http"]["value"] == 3600000.0 + assert updates["span_ops.ops.db"]["value"] == 7200000.0 + assert updates["span_ops_2.ops.http"]["value"] == 3600000.0 + assert updates["span_ops_2.ops.db"]["value"] == 7200000.0 # NOTE: Relay used to extract a total.time breakdown, which is no longer # included in span breakdowns. @@ -384,20 +384,22 @@ def test_write_tags_for_performance_issue_detection(): segment_span = _mock_performance_issue_span( is_segment=True, span_id="ffffffffffffffff", - data={ - "sentry.sdk.name": "sentry.php.laravel", - "sentry.environment": "production", - "sentry.release": "1.0.0", - "sentry.platform": "php", + attributes={ + "sentry.sdk.name": {"value": "sentry.php.laravel"}, + "sentry.environment": {"value": "production"}, + "sentry.release": {"value": "1.0.0"}, + "sentry.platform": {"value": "php"}, }, ) spans = [ _mock_performance_issue_span( is_segment=False, - data={ - "sentry.system": "mongodb", - "sentry.normalized_description": '{"filter":{"productid":{"buffer":"?"}},"find":"reviews"}', + attributes={ + "sentry.system": {"value": "mongodb"}, + "sentry.normalized_description": { + "value": '{"filter":{"productid":{"buffer":"?"}},"find":"reviews"}' + }, }, ), segment_span, @@ -425,7 +427,7 @@ def test_write_tags_for_performance_issue_detection(): } -def _mock_performance_issue_span(is_segment, data, **fields): +def _mock_performance_issue_span(is_segment, attributes, **fields): return { "description": "OrganizationNPlusOne", "duration_ms": 107, @@ -438,7 +440,7 @@ def _mock_performance_issue_span(is_segment, data, **fields): "received": 1707953019.044972, "retention_days": 90, "segment_id": "a49b42af9fb69da0", - "data": data, + "attributes": attributes, "span_id": "a49b42af9fb69da0", "start_timestamp_ms": 1707953018865, "start_timestamp_precise": 1707953018.865, diff --git a/tests/sentry/spans/consumers/process_segments/test_message.py b/tests/sentry/spans/consumers/process_segments/test_message.py index d1914c064b0854..29946c97eddb6b 100644 --- a/tests/sentry/spans/consumers/process_segments/test_message.py +++ b/tests/sentry/spans/consumers/process_segments/test_message.py @@ -24,12 +24,14 @@ def generate_basic_spans(self): segment_span = build_mock_span( project_id=self.project.id, is_segment=True, - data={ - "sentry.browser.name": "Google Chrome", - "sentry.transaction": "/api/0/organizations/{organization_id_or_slug}/n-plus-one/", - "sentry.transaction.method": "GET", - "sentry.transaction.op": "http.server", - "sentry.user": "id:1", + attributes={ + "sentry.browser.name": {"value": "Google Chrome"}, + "sentry.transaction": { + "value": "/api/0/organizations/{organization_id_or_slug}/n-plus-one/" + }, + "sentry.transaction.method": {"value": "GET"}, + "sentry.transaction.op": {"value": "http.server"}, + "sentry.user": {"value": "id:1"}, }, ) child_span = build_mock_span( @@ -90,19 +92,19 @@ def test_enrich_spans(self) -> None: assert len(processed_spans) == len(spans) child_span, segment_span = processed_spans - child_data = child_span["data"] - segment_data = segment_span["data"] + child_attrs = child_span["attributes"] + segment_data = segment_span["attributes"] - assert child_data["sentry.transaction"] == segment_data["sentry.transaction"] - assert child_data["sentry.transaction.method"] == segment_data["sentry.transaction.method"] - assert child_data["sentry.transaction.op"] == segment_data["sentry.transaction.op"] - assert child_data["sentry.user"] == segment_data["sentry.user"] + assert child_attrs["sentry.transaction"] == segment_data["sentry.transaction"] + assert child_attrs["sentry.transaction.method"] == segment_data["sentry.transaction.method"] + assert child_attrs["sentry.transaction.op"] == segment_data["sentry.transaction.op"] + assert child_attrs["sentry.user"] == segment_data["sentry.user"] def test_enrich_spans_no_segment(self) -> None: spans = self.generate_basic_spans() for span in spans: span["is_segment"] = False - del span["data"] + del span["attributes"] processed_spans = process_segment(spans) assert len(processed_spans) == len(spans) @@ -227,9 +229,9 @@ def test_record_signals(self, mock_track): project_id=self.project.id, is_segment=True, span_op="http.client", - data={ - "sentry.op": "http.client", - "sentry.category": "http", + attributes={ + "sentry.op": {"value": "http.client"}, + "sentry.category": {"value": "http"}, }, ) spans = process_segment([span]) diff --git a/tests/sentry/spans/consumers/process_segments/test_shim.py b/tests/sentry/spans/consumers/process_segments/test_shim.py index b923bda992d2d8..a65b65cf979b0a 100644 --- a/tests/sentry/spans/consumers/process_segments/test_shim.py +++ b/tests/sentry/spans/consumers/process_segments/test_shim.py @@ -9,7 +9,7 @@ def test_make_compatible(): message = cast(EnrichedSpan, {**SPAN_KAFKA_MESSAGE, "sentry_tags": {"ignored": "tags"}}) compatible = make_compatible(message) assert compatible["exclusive_time"] == message["exclusive_time_ms"] - assert compatible["op"] == message["data"]["sentry.op"] + assert compatible["op"] == message["attributes"]["sentry.op"]["value"] # Pre-existing tags got overwritten: assert compatible["sentry_tags"] == { From fd55002ca1b2c3876cae97ca1c3211d97b152ea5 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 25 Sep 2025 09:01:32 +0200 Subject: [PATCH 03/21] fix --- .../process_segments/test_convert.py | 79 ++++++++++--------- 1 file changed, 40 insertions(+), 39 deletions(-) diff --git a/tests/sentry/spans/consumers/process_segments/test_convert.py b/tests/sentry/spans/consumers/process_segments/test_convert.py index 739188b83d226d..e6ae71348859b3 100644 --- a/tests/sentry/spans/consumers/process_segments/test_convert.py +++ b/tests/sentry/spans/consumers/process_segments/test_convert.py @@ -19,9 +19,7 @@ "attributes": { "http.status_code": {"value": "200", "type": "string"}, "my.array.field": {"value": [1, 2, ["nested", "array"]], "type": "array"}, - "my.dict.field": { - "value": {"id": {"value": 42, "name": {"value": "test"}, "type": "object"}} - }, + "my.dict.field": {"value": {"id": 42, "name": "test"}, "type": "object"}, "my.false.bool.field": {"value": False, "type": "boolean"}, "my.float.field": {"value": 101.2, "type": "double"}, "my.int.field": {"value": 2000, "type": "integer"}, @@ -95,60 +93,63 @@ def test_convert_span_to_item() -> None: assert item.retention_days == 90 assert item.received == Timestamp(seconds=1721319572, nanos=877828000) - assert item.attributes == { + # Sort for easier comparison: + attrs = {k: v for (k, v) in sorted(item.attributes.items())} + + assert attrs == { + "http.status_code": AnyValue(string_value="200"), + "my.array.field": AnyValue(string_value=r"""[1,2,["nested","array"]]"""), + "my.dict.field": AnyValue(string_value=r"""{"id":42,"name":"test"}"""), "my.false.bool.field": AnyValue(bool_value=False), - "my.true.bool.field": AnyValue(bool_value=True), - "sentry.is_segment": AnyValue(bool_value=True), "my.float.field": AnyValue(double_value=101.2), - "my.neg.float.field": AnyValue(double_value=-101.2), - "sentry.exclusive_time_ms": AnyValue(double_value=0.228), - "sentry.start_timestamp_precise": AnyValue(double_value=1721319572.616648), - "num_of_spans": AnyValue(double_value=50.0), - "sentry.end_timestamp_precise": AnyValue(double_value=1721319572.768806), - "sentry.duration_ms": AnyValue(int_value=152), - "sentry.received": AnyValue(double_value=1721319572.877828), "my.int.field": AnyValue(int_value=2000), "my.neg.field": AnyValue(int_value=-100), - "relay_protocol_version": AnyValue(string_value="3"), - "sentry.raw_description": AnyValue(string_value="/api/0/relays/projectconfigs/"), - "sentry.segment_id": AnyValue(string_value="8873a98879faf06d"), - "sentry.transaction.method": AnyValue(string_value="POST"), - "server_name": AnyValue(string_value="D23CXQ4GK2.local"), - "sentry.status": AnyValue(string_value="ok"), + "my.neg.float.field": AnyValue(double_value=-101.2), + "my.true.bool.field": AnyValue(bool_value=True), + "my.u64.field": AnyValue(double_value=9447000002305251000.0), + "num_of_spans": AnyValue(double_value=50.0), "relay_endpoint_version": AnyValue(string_value="3"), + "relay_id": AnyValue(string_value="88888888-4444-4444-8444-cccccccccccc"), "relay_no_cache": AnyValue(string_value="False"), + "relay_protocol_version": AnyValue(string_value="3"), + "relay_use_post_or_schedule_rejected": AnyValue(string_value="version"), "relay_use_post_or_schedule": AnyValue(string_value="True"), - "spans_over_limit": AnyValue(string_value="False"), - "sentry.segment.name": AnyValue(string_value="/api/0/relays/projectconfigs/"), - "sentry.status_code": AnyValue(string_value="200"), + "sentry.category": AnyValue(string_value="http"), + "sentry.client_sample_rate": AnyValue(double_value=0.1), + "sentry.duration_ms": AnyValue(int_value=152), + "sentry.end_timestamp_precise": AnyValue(double_value=1721319572.768806), + "sentry.environment": AnyValue(string_value="development"), + "sentry.exclusive_time_ms": AnyValue(double_value=0.228), + "sentry.is_segment": AnyValue(bool_value=True), + "sentry.normalized_description": AnyValue(string_value="normalized_description"), "sentry.op": AnyValue(string_value="http.server"), "sentry.origin": AnyValue(string_value="auto.http.django"), - "sentry.transaction": AnyValue(string_value="/api/0/relays/projectconfigs/"), - "sentry.thread.name": AnyValue(string_value="uWSGIWorker1Core0"), + "sentry.platform": AnyValue(string_value="python"), "sentry.profile_id": AnyValue(string_value="56c7d1401ea14ad7b4ac86de46baebae"), - "thread.id": AnyValue(string_value="8522009600"), - "http.status_code": AnyValue(string_value="200"), + "sentry.raw_description": AnyValue(string_value="/api/0/relays/projectconfigs/"), + "sentry.received": AnyValue(double_value=1721319572.877828), "sentry.release": AnyValue( string_value="backend@24.7.0.dev0+c45b49caed1e5fcbf70097ab3f434b487c359b6b" ), "sentry.sdk.name": AnyValue(string_value="sentry.python.django"), - "sentry.transaction.op": AnyValue(string_value="http.server"), - "relay_id": AnyValue(string_value="88888888-4444-4444-8444-cccccccccccc"), - "sentry.trace.status": AnyValue(string_value="ok"), - "sentry.category": AnyValue(string_value="http"), - "sentry.environment": AnyValue(string_value="development"), - "sentry.thread.id": AnyValue(string_value="8522009600"), "sentry.sdk.version": AnyValue(string_value="2.7.0"), - "sentry.platform": AnyValue(string_value="python"), - "sentry.client_sample_rate": AnyValue(double_value=0.1), + "sentry.segment_id": AnyValue(string_value="8873a98879faf06d"), + "sentry.segment.name": AnyValue(string_value="/api/0/relays/projectconfigs/"), "sentry.server_sample_rate": AnyValue(double_value=0.2), + "sentry.start_timestamp_precise": AnyValue(double_value=1721319572.616648), + "sentry.status_code": AnyValue(string_value="200"), + "sentry.status": AnyValue(string_value="ok"), + "sentry.thread.id": AnyValue(string_value="8522009600"), + "sentry.thread.name": AnyValue(string_value="uWSGIWorker1Core0"), + "sentry.trace.status": AnyValue(string_value="ok"), + "sentry.transaction.method": AnyValue(string_value="POST"), + "sentry.transaction.op": AnyValue(string_value="http.server"), + "sentry.transaction": AnyValue(string_value="/api/0/relays/projectconfigs/"), "sentry.user": AnyValue(string_value="ip:127.0.0.1"), - "relay_use_post_or_schedule_rejected": AnyValue(string_value="version"), - "sentry.normalized_description": AnyValue(string_value="normalized_description"), + "server_name": AnyValue(string_value="D23CXQ4GK2.local"), + "spans_over_limit": AnyValue(string_value="False"), + "thread.id": AnyValue(string_value="8522009600"), "thread.name": AnyValue(string_value="uWSGIWorker1Core0"), - "my.dict.field": AnyValue(string_value=r"""{"id":42,"name":"test"}"""), - "my.u64.field": AnyValue(double_value=9447000002305251000.0), - "my.array.field": AnyValue(string_value=r"""[1,2,["nested","array"]]"""), } From 65f98c216306be9b62bf9b77fa82c65f918e1c6f Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 25 Sep 2025 16:13:52 +0200 Subject: [PATCH 04/21] wip --- .../spans/consumers/process_segments/convert.py | 14 +++++++++++++- .../consumers/process_segments/enrichment.py | 17 ----------------- src/sentry/spans/grouping/strategy/base.py | 16 ++++++++++------ 3 files changed, 23 insertions(+), 24 deletions(-) diff --git a/src/sentry/spans/consumers/process_segments/convert.py b/src/sentry/spans/consumers/process_segments/convert.py index 805adebc63b998..0430de592acde4 100644 --- a/src/sentry/spans/consumers/process_segments/convert.py +++ b/src/sentry/spans/consumers/process_segments/convert.py @@ -13,7 +13,6 @@ I64_MAX = 2**63 - 1 FIELD_TO_ATTRIBUTE = { - "description": "sentry.raw_description", "duration_ms": "sentry.duration_ms", "is_segment": "sentry.is_segment", "exclusive_time_ms": "sentry.exclusive_time_ms", @@ -30,6 +29,11 @@ "event_id": "sentry.event_id", } +ATTRIBUTE_TO_ATTRIBUTE = { + "sentry.description": "sentry.raw_description", + "sentry.duration": "sentry.duration_ms", +} + def convert_span_to_item(span: CompatibleSpan) -> TraceItem: attributes: MutableMapping[str, AnyValue] = {} # TODO @@ -61,6 +65,14 @@ def convert_span_to_item(span: CompatibleSpan) -> TraceItem: if v is not None: attributes[attribute_name] = _anyvalue(v) + if "sentry.duration_ms" not in attributes: + pass # FIXME + # attributes["sentry.duration_ms"] = + + for input_name, output_name in ATTRIBUTE_TO_ATTRIBUTE.items(): + if input_name in attributes: + attributes[output_name] = attributes[input_name] + if links := span.get("links"): try: sanitized_links = [_sanitize_span_link(link) for link in links] diff --git a/src/sentry/spans/consumers/process_segments/enrichment.py b/src/sentry/spans/consumers/process_segments/enrichment.py index 803aee20c58355..a20ebf19bac231 100644 --- a/src/sentry/spans/consumers/process_segments/enrichment.py +++ b/src/sentry/spans/consumers/process_segments/enrichment.py @@ -4,7 +4,6 @@ from sentry_kafka_schemas.schema_types.buffered_segments_v1 import SegmentSpan -from sentry.performance_issues.types import SentryTags as PerformanceIssuesSentryTags from sentry.spans.consumers.process_segments.types import EnrichedSpan, attribute_value, get_span_op # Keys of shared sentry attributes that are shared across all spans in a segment. This list @@ -111,22 +110,6 @@ def _attributes(self, span: SegmentSpan) -> dict[str, Any]: return ret - def _sentry_tags(self, data: dict[str, Any]) -> dict[str, str]: - """Backfill sentry tags used in performance issue detection. - - Once performance issue detection is only called from process_segments, - (not from event_manager), the performance issues code can be refactored to access - span attributes instead of sentry_tags. - """ - sentry_tags = {} - for tag_key in PerformanceIssuesSentryTags.__mutable_keys__: - data_key = ( - "sentry.normalized_description" if tag_key == "description" else f"sentry.{tag_key}" - ) - if data_key in data: - sentry_tags[tag_key] = data[data_key] - return sentry_tags - def _exclusive_time(self, span: SegmentSpan) -> float: """ Sets the exclusive time on all spans in the list. diff --git a/src/sentry/spans/grouping/strategy/base.py b/src/sentry/spans/grouping/strategy/base.py index 3b945a2b39a2f1..990b18890f5f63 100644 --- a/src/sentry/spans/grouping/strategy/base.py +++ b/src/sentry/spans/grouping/strategy/base.py @@ -120,7 +120,11 @@ def raw_description_strategy(span: Span) -> Sequence[str]: strategy is only effective if the span description is a fixed string. Otherwise, this strategy will produce a large number of span groups. """ - return [span.get("description") or ""] + return [raw_description(span) or ""] + + +def raw_description(span: Span) -> str: + return span.get("description") or attribute_value(span, "sentry.description") IN_CONDITION_PATTERN = re.compile(r" IN \(%s(\s*,\s*%s)*\)") @@ -143,7 +147,7 @@ def normalized_db_span_in_condition_strategy(span: Span) -> Sequence[str] | None seen as different spans. We want these spans to be seen as similar spans, so we normalize the right hand side of `IN` conditions to `(%s) to use in the fingerprint.""" - description = span.get("description") or "" + description = raw_description(span) cleaned, count = IN_CONDITION_PATTERN.subn(" IN (%s)", description) if count == 0: return None @@ -168,7 +172,7 @@ def loose_normalized_db_span_in_condition_strategy(span: Span) -> Sequence[str] """This is identical to the above `normalized_db_span_in_condition_strategy` but it uses a looser regular expression that catches database spans that come from Laravel and Rails""" - description = span.get("description") or "" + description = raw_description(span) cleaned, count = LOOSE_IN_CONDITION_PATTERN.subn(" IN (%s)", description) if count == 0: return None @@ -212,7 +216,7 @@ def parametrize_db_span_strategy(span: Span) -> Sequence[str] | None: conservative with the literals we target. Currently, only single-quoted strings are parametrized even though MySQL supports double-quoted strings as well, because PG uses double-quoted strings for identifiers.""" - query = span.get("description") or "" + query = raw_description(span) query, in_count = LOOSE_IN_CONDITION_PATTERN.subn(" IN (%s)", query) query, savepoint_count = DB_SAVEPOINT_PATTERN.subn("SAVEPOINT %s", query) query, param_count = DB_PARAMETRIZATION_PATTERN.subn("%s", query) @@ -258,7 +262,7 @@ def remove_http_client_query_string_strategy(span: Span) -> Sequence[str] | None """ # Check the description is of the form ` ` - description = span.get("description") or "" + description = raw_description(span) parts = description.split(" ", 1) if len(parts) != 2: return None @@ -279,7 +283,7 @@ def remove_redis_command_arguments_strategy(span: Span) -> Sequence[str] | None: The arguments to the redis command is highly variable and therefore not used as a part of the fingerprint. """ - description = span.get("description") or "" + description = raw_description(span) parts = description.split(" ", 1) # the redis command name is the first word in the description From 55b62d8608dd81f081243f8eee02b2f8fc2cbf30 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 26 Sep 2025 09:26:30 +0200 Subject: [PATCH 05/21] _timestamp_precise -> _timestamp --- src/sentry/spans/buffer.py | 6 +- src/sentry/spans/consumers/process/factory.py | 2 +- .../consumers/process_segments/convert.py | 33 ++--- .../consumers/process_segments/enrichment.py | 8 +- .../consumers/process_segments/message.py | 25 ++-- .../spans/consumers/process_segments/shim.py | 12 +- .../spans/consumers/process/__init__.py | 4 +- .../spans/consumers/process/test_consumer.py | 4 +- .../spans/consumers/process/test_flusher.py | 8 +- .../process_segments/test_convert.py | 8 +- .../process_segments/test_enrichment.py | 140 +++++++++--------- .../process_segments/test_message.py | 16 +- tests/sentry/spans/test_buffer.py | 66 ++++----- 13 files changed, 164 insertions(+), 168 deletions(-) diff --git a/src/sentry/spans/buffer.py b/src/sentry/spans/buffer.py index a8bebf7d14c0de..5ca6c6c59defbf 100644 --- a/src/sentry/spans/buffer.py +++ b/src/sentry/spans/buffer.py @@ -129,7 +129,7 @@ class Span(NamedTuple): segment_id: str | None project_id: int payload: bytes - end_timestamp_precise: float + end_timestamp: float is_segment_span: bool = False def effective_parent_id(self): @@ -339,7 +339,7 @@ def _group_by_parent(self, spans: Sequence[Span]) -> dict[tuple[str, str], list[ def _prepare_payloads(self, spans: list[Span]) -> dict[str | bytes, float]: if self._zstd_compressor is None: - return {span.payload: span.end_timestamp_precise for span in spans} + return {span.payload: span.end_timestamp for span in spans} combined = b"\x00".join(span.payload for span in spans) original_size = len(combined) @@ -354,7 +354,7 @@ def _prepare_payloads(self, spans: list[Span]) -> dict[str | bytes, float]: metrics.timing("spans.buffer.compression.compressed_size", compressed_size) metrics.timing("spans.buffer.compression.compression_ratio", compression_ratio) - min_timestamp = min(span.end_timestamp_precise for span in spans) + min_timestamp = min(span.end_timestamp for span in spans) return {compressed: min_timestamp} def _decompress_batch(self, compressed_data: bytes) -> list[bytes]: diff --git a/src/sentry/spans/consumers/process/factory.py b/src/sentry/spans/consumers/process/factory.py index e75498ef34f23c..8ef837aa6f791b 100644 --- a/src/sentry/spans/consumers/process/factory.py +++ b/src/sentry/spans/consumers/process/factory.py @@ -185,7 +185,7 @@ def process_batch( segment_id=cast(str | None, val.get("segment_id")), project_id=val["project_id"], payload=payload.value, - end_timestamp_precise=val["end_timestamp_precise"], + end_timestamp=val["end_timestamp"], is_segment_span=bool(val.get("parent_span_id") is None or val.get("is_remote")), ) spans.append(span) diff --git a/src/sentry/spans/consumers/process_segments/convert.py b/src/sentry/spans/consumers/process_segments/convert.py index 0430de592acde4..a5836f80dfe36e 100644 --- a/src/sentry/spans/consumers/process_segments/convert.py +++ b/src/sentry/spans/consumers/process_segments/convert.py @@ -13,23 +13,22 @@ I64_MAX = 2**63 - 1 FIELD_TO_ATTRIBUTE = { - "duration_ms": "sentry.duration_ms", - "is_segment": "sentry.is_segment", + "end_timestamp": "sentry.end_timestamp_precise", + "event_id": "sentry.event_id", "exclusive_time_ms": "sentry.exclusive_time_ms", - "start_timestamp_precise": "sentry.start_timestamp_precise", - "end_timestamp_precise": "sentry.end_timestamp_precise", + "hash": "sentry.hash", "is_remote": "sentry.is_remote", + "is_segment": "sentry.is_segment", + "kind": "sentry.kind", + "origin": "sentry.origin", "parent_span_id": "sentry.parent_span_id", "profile_id": "sentry.profile_id", - "segment_id": "sentry.segment_id", "received": "sentry.received", - "origin": "sentry.origin", - "kind": "sentry.kind", - "hash": "sentry.hash", - "event_id": "sentry.event_id", + "segment_id": "sentry.segment_id", + "start_timestamp": "sentry.start_timestamp_precise", } -ATTRIBUTE_TO_ATTRIBUTE = { +RENAME_ATTRIBUTES = { "sentry.description": "sentry.raw_description", "sentry.duration": "sentry.duration_ms", } @@ -65,13 +64,11 @@ def convert_span_to_item(span: CompatibleSpan) -> TraceItem: if v is not None: attributes[attribute_name] = _anyvalue(v) - if "sentry.duration_ms" not in attributes: - pass # FIXME - # attributes["sentry.duration_ms"] = - - for input_name, output_name in ATTRIBUTE_TO_ATTRIBUTE.items(): - if input_name in attributes: - attributes[output_name] = attributes[input_name] + # Rename some attributes from their sentry-conventions name to what the product currently expects. + # Eventually this should all be handled by deprecation policies in sentry-conventions. + for convention_name, eap_name in RENAME_ATTRIBUTES.items(): + if convention_name in attributes: + attributes[eap_name] = attributes.pop(convention_name) if links := span.get("links"): try: @@ -87,7 +84,7 @@ def convert_span_to_item(span: CompatibleSpan) -> TraceItem: trace_id=span["trace_id"], item_id=int(span["span_id"], 16).to_bytes(16, "little"), item_type=TraceItemType.TRACE_ITEM_TYPE_SPAN, - timestamp=_timestamp(span["start_timestamp_precise"]), + timestamp=_timestamp(span["start_timestamp"]), attributes=attributes, client_sample_rate=client_sample_rate, server_sample_rate=server_sample_rate, diff --git a/src/sentry/spans/consumers/process_segments/enrichment.py b/src/sentry/spans/consumers/process_segments/enrichment.py index a20ebf19bac231..b8fe7afa1025b6 100644 --- a/src/sentry/spans/consumers/process_segments/enrichment.py +++ b/src/sentry/spans/consumers/process_segments/enrichment.py @@ -99,9 +99,9 @@ def _attributes(self, span: SegmentSpan) -> dict[str, Any]: if not ret.get("sentry.app_start_type") and mobile_start_type: ret["sentry.app_start_type"] = mobile_start_type - if self._ttid_ts is not None and span["end_timestamp_precise"] <= self._ttid_ts: + if self._ttid_ts is not None and span["end_timestamp"] <= self._ttid_ts: ret["sentry.ttid"] = "ttid" - if self._ttfd_ts is not None and span["end_timestamp_precise"] <= self._ttfd_ts: + if self._ttfd_ts is not None and span["end_timestamp"] <= self._ttfd_ts: ret["sentry.ttfd"] = "ttfd" for key, value in shared_tags.items(): @@ -180,13 +180,13 @@ def _get_mobile_start_type(segment: SegmentSpan) -> str | None: def _timestamp_by_op(spans: list[SegmentSpan], op: str) -> float | None: for span in spans: if get_span_op(span) == op: - return span["end_timestamp_precise"] + return span["end_timestamp"] return None def _span_interval(span: SegmentSpan | EnrichedSpan) -> tuple[int, int]: """Get the start and end timestamps of a span in microseconds.""" - return _us(span["start_timestamp_precise"]), _us(span["end_timestamp_precise"]) + return _us(span["start_timestamp"]), _us(span["end_timestamp"]) def _us(timestamp: float) -> int: diff --git a/src/sentry/spans/consumers/process_segments/message.py b/src/sentry/spans/consumers/process_segments/message.py index e342e3233cf1d2..61ccadabcb66b0 100644 --- a/src/sentry/spans/consumers/process_segments/message.py +++ b/src/sentry/spans/consumers/process_segments/message.py @@ -29,7 +29,7 @@ from sentry.signals import first_insight_span_received, first_transaction_received from sentry.spans.consumers.process_segments.enrichment import TreeEnricher, compute_breakdowns from sentry.spans.consumers.process_segments.shim import build_shim_event_data, make_compatible -from sentry.spans.consumers.process_segments.types import CompatibleSpan, attribute_value +from sentry.spans.consumers.process_segments.types import CompatibleSpan from sentry.spans.grouping.api import load_span_grouping_config from sentry.utils import metrics from sentry.utils.dates import to_datetime @@ -145,7 +145,7 @@ def _compute_breakdowns( ) -> None: config = project.get_option("sentry:breakdowns") breakdowns = compute_breakdowns(spans, config) - segment.setdefault("attributes", {}).update(breakdowns) + segment.setdefault("data", {}).update(breakdowns) @metrics.wraps("spans.consumers.process_segments.create_models") @@ -155,10 +155,10 @@ def _create_models(segment: CompatibleSpan, project: Project) -> None: relationships between them and the Project model. """ - environment_name = attribute_value(segment, "sentry.environment") - release_name = attribute_value(segment, "sentry.release") - dist_name = attribute_value(segment, "sentry.dist") - date = to_datetime(segment["end_timestamp_precise"]) + environment_name = segment["data"].get("sentry.environment") + release_name = segment["data"].get("sentry.release") + dist_name = segment["data"].get("sentry.dist") + date = to_datetime(segment["end_timestamp"]) environment = Environment.get_or_create(project=project, name=environment_name) @@ -232,7 +232,7 @@ def _detect_performance_problems( culprit=event_data["transaction"], evidence_data=problem.evidence_data or {}, evidence_display=problem.evidence_display, - detection_time=to_datetime(segment_span["end_timestamp_precise"]), + detection_time=to_datetime(segment_span["end_timestamp"]), level="info", ) @@ -248,16 +248,17 @@ def _detect_performance_problems( def _record_signals( segment_span: CompatibleSpan, spans: list[CompatibleSpan], project: Project ) -> None: + data = segment_span.get("data", {}) record_generic_event_processed( project, - platform=attribute_value(segment_span, "sentry.platform"), - release=attribute_value(segment_span, "sentry.release"), - environment=attribute_value(segment_span, "sentry.environment"), + platform=data.get("sentry.platform"), + release=data.get("sentry.release"), + environment=data.get("sentry.environment"), ) # signal expects an event like object with a datetime attribute - event_like = types.SimpleNamespace(datetime=to_datetime(segment_span["end_timestamp_precise"])) + event_like = types.SimpleNamespace(datetime=to_datetime(segment_span["end_timestamp"])) set_project_flag_and_signal( project, @@ -267,7 +268,7 @@ def _record_signals( ) for module in insights_modules( - [FilterSpan.from_span_attributes(span.get("attributes") or {}) for span in spans] + [FilterSpan.from_span_data(span.get("data", {})) for span in spans] ): set_project_flag_and_signal( project, diff --git a/src/sentry/spans/consumers/process_segments/shim.py b/src/sentry/spans/consumers/process_segments/shim.py index a6f1addfaf30ab..c3b2bff759fc81 100644 --- a/src/sentry/spans/consumers/process_segments/shim.py +++ b/src/sentry/spans/consumers/process_segments/shim.py @@ -84,11 +84,9 @@ def build_shim_event_data( "platform": attribute_value(segment_span, "sentry.platform"), "tags": [["environment", attribute_value(segment_span, "sentry.environment")]], "received": segment_span["received"], - "timestamp": segment_span["end_timestamp_precise"], - "start_timestamp": segment_span["start_timestamp_precise"], - "datetime": to_datetime(segment_span["end_timestamp_precise"]).strftime( - "%Y-%m-%dT%H:%M:%SZ" - ), + "timestamp": segment_span["end_timestamp"], + "start_timestamp": segment_span["start_timestamp"], + "datetime": to_datetime(segment_span["end_timestamp"]).strftime("%Y-%m-%dT%H:%M:%SZ"), "spans": [], } @@ -100,8 +98,8 @@ def build_shim_event_data( # topological sorting on the span tree. for span in spans: event_span = cast(dict[str, Any], deepcopy(span)) - event_span["start_timestamp"] = span["start_timestamp_precise"] - event_span["timestamp"] = span["end_timestamp_precise"] + event_span["start_timestamp"] = span["start_timestamp"] + event_span["timestamp"] = span["end_timestamp"] event["spans"].append(event_span) return event diff --git a/tests/sentry/spans/consumers/process/__init__.py b/tests/sentry/spans/consumers/process/__init__.py index 82b61db13781da..07282807639bf2 100644 --- a/tests/sentry/spans/consumers/process/__init__.py +++ b/tests/sentry/spans/consumers/process/__init__.py @@ -22,8 +22,8 @@ def build_mock_span(project_id, *, span_op=None, is_segment=False, attributes=No }, "span_id": "a49b42af9fb69da0", "start_timestamp_ms": 1707953018865, - "start_timestamp_precise": 1707953018.865, - "end_timestamp_precise": 1707953018.972, + "start_timestamp": 1707953018.865, + "end_timestamp": 1707953018.972, "trace_id": "94576097f3a64b68b85a59c7d4e3ee2a", } diff --git a/tests/sentry/spans/consumers/process/test_consumer.py b/tests/sentry/spans/consumers/process/test_consumer.py index 95d13499635a14..f8298ff9c3c55b 100644 --- a/tests/sentry/spans/consumers/process/test_consumer.py +++ b/tests/sentry/spans/consumers/process/test_consumer.py @@ -50,7 +50,7 @@ def add_commit(offsets, force=False): "project_id": 12, "span_id": "a" * 16, "trace_id": "b" * 32, - "end_timestamp_precise": 1700000000.0, + "end_timestamp": 1700000000.0, } ), [], @@ -81,7 +81,7 @@ def add_commit(offsets, force=False): "segment_id": "aaaaaaaaaaaaaaaa", "span_id": "aaaaaaaaaaaaaaaa", "trace_id": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", - "end_timestamp_precise": 1700000000.0, + "end_timestamp": 1700000000.0, }, ], } diff --git a/tests/sentry/spans/consumers/process/test_flusher.py b/tests/sentry/spans/consumers/process/test_flusher.py index 50b156148225d2..3fa01c36f05b52 100644 --- a/tests/sentry/spans/consumers/process/test_flusher.py +++ b/tests/sentry/spans/consumers/process/test_flusher.py @@ -50,7 +50,7 @@ def append(msg): parent_span_id="b" * 16, segment_id=None, project_id=1, - end_timestamp_precise=now, + end_timestamp=now, ), Span( payload=_payload("d" * 16), @@ -59,7 +59,7 @@ def append(msg): parent_span_id="b" * 16, segment_id=None, project_id=1, - end_timestamp_precise=now, + end_timestamp=now, ), Span( payload=_payload("c" * 16), @@ -68,7 +68,7 @@ def append(msg): parent_span_id="b" * 16, segment_id=None, project_id=1, - end_timestamp_precise=now, + end_timestamp=now, ), Span( payload=_payload("b" * 16), @@ -78,7 +78,7 @@ def append(msg): is_segment_span=True, segment_id=None, project_id=1, - end_timestamp_precise=now, + end_timestamp=now, ), ] diff --git a/tests/sentry/spans/consumers/process_segments/test_convert.py b/tests/sentry/spans/consumers/process_segments/test_convert.py index e6ae71348859b3..3cda4fae68a3c3 100644 --- a/tests/sentry/spans/consumers/process_segments/test_convert.py +++ b/tests/sentry/spans/consumers/process_segments/test_convert.py @@ -73,8 +73,8 @@ "span_id": "8873a98879faf06d", "trace_id": "d099bf9ad5a143cf8f83a98081d0ed3b", "start_timestamp_ms": 1721319572616, - "start_timestamp_precise": 1721319572.616648, - "end_timestamp_precise": 1721319572.768806, + "start_timestamp": 1721319572.616648, + "end_timestamp": 1721319572.768806, } @@ -117,7 +117,7 @@ def test_convert_span_to_item() -> None: "sentry.category": AnyValue(string_value="http"), "sentry.client_sample_rate": AnyValue(double_value=0.1), "sentry.duration_ms": AnyValue(int_value=152), - "sentry.end_timestamp_precise": AnyValue(double_value=1721319572.768806), + "sentry.end_timestamp": AnyValue(double_value=1721319572.768806), "sentry.environment": AnyValue(string_value="development"), "sentry.exclusive_time_ms": AnyValue(double_value=0.228), "sentry.is_segment": AnyValue(bool_value=True), @@ -136,7 +136,7 @@ def test_convert_span_to_item() -> None: "sentry.segment_id": AnyValue(string_value="8873a98879faf06d"), "sentry.segment.name": AnyValue(string_value="/api/0/relays/projectconfigs/"), "sentry.server_sample_rate": AnyValue(double_value=0.2), - "sentry.start_timestamp_precise": AnyValue(double_value=1721319572.616648), + "sentry.start_timestamp": AnyValue(double_value=1721319572.616648), "sentry.status_code": AnyValue(string_value="200"), "sentry.status": AnyValue(string_value="ok"), "sentry.thread.id": AnyValue(string_value="8522009600"), diff --git a/tests/sentry/spans/consumers/process_segments/test_enrichment.py b/tests/sentry/spans/consumers/process_segments/test_enrichment.py index 8c33f48209751e..2551be4b388869 100644 --- a/tests/sentry/spans/consumers/process_segments/test_enrichment.py +++ b/tests/sentry/spans/consumers/process_segments/test_enrichment.py @@ -10,28 +10,28 @@ def test_childless_spans() -> None: build_mock_span( project_id=1, is_segment=True, - start_timestamp_precise=1609455600.0, - end_timestamp_precise=1609455605.0, + start_timestamp=1609455600.0, + end_timestamp=1609455605.0, span_id="aaaaaaaaaaaaaaaa", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455601.0, - end_timestamp_precise=1609455604.0, + start_timestamp=1609455601.0, + end_timestamp=1609455604.0, span_id="bbbbbbbbbbbbbbbb", parent_span_id="aaaaaaaaaaaaaaaa", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455601.0, - end_timestamp_precise=1609455603.5, + start_timestamp=1609455601.0, + end_timestamp=1609455603.5, span_id="cccccccccccccccc", parent_span_id="aaaaaaaaaaaaaaaa", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455603.0, - end_timestamp_precise=1609455604.877, + start_timestamp=1609455603.0, + end_timestamp=1609455604.877, span_id="dddddddddddddddd", parent_span_id="aaaaaaaaaaaaaaaa", ), @@ -54,28 +54,28 @@ def test_nested_spans() -> None: build_mock_span( project_id=1, is_segment=True, - start_timestamp_precise=1609455600.0, - end_timestamp_precise=1609455605.0, + start_timestamp=1609455600.0, + end_timestamp=1609455605.0, span_id="aaaaaaaaaaaaaaaa", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455601.0, - end_timestamp_precise=1609455602.0, + start_timestamp=1609455601.0, + end_timestamp=1609455602.0, span_id="bbbbbbbbbbbbbbbb", parent_span_id="aaaaaaaaaaaaaaaa", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455601.2, - end_timestamp_precise=1609455601.8, + start_timestamp=1609455601.2, + end_timestamp=1609455601.8, span_id="cccccccccccccccc", parent_span_id="bbbbbbbbbbbbbbbb", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455601.4, - end_timestamp_precise=1609455601.6, + start_timestamp=1609455601.4, + end_timestamp=1609455601.6, span_id="dddddddddddddddd", parent_span_id="cccccccccccccccc", ), @@ -97,28 +97,28 @@ def test_overlapping_child_spans() -> None: build_mock_span( project_id=1, is_segment=True, - start_timestamp_precise=1609455600.0, - end_timestamp_precise=1609455605.0, + start_timestamp=1609455600.0, + end_timestamp=1609455605.0, span_id="aaaaaaaaaaaaaaaa", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455601.0, - end_timestamp_precise=1609455602.0, + start_timestamp=1609455601.0, + end_timestamp=1609455602.0, span_id="bbbbbbbbbbbbbbbb", parent_span_id="aaaaaaaaaaaaaaaa", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455601.2, - end_timestamp_precise=1609455601.6, + start_timestamp=1609455601.2, + end_timestamp=1609455601.6, span_id="cccccccccccccccc", parent_span_id="bbbbbbbbbbbbbbbb", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455601.4, - end_timestamp_precise=1609455601.8, + start_timestamp=1609455601.4, + end_timestamp=1609455601.8, span_id="dddddddddddddddd", parent_span_id="bbbbbbbbbbbbbbbb", ), @@ -140,28 +140,28 @@ def test_child_spans_dont_intersect_parent() -> None: build_mock_span( project_id=1, is_segment=True, - start_timestamp_precise=1609455600.0, - end_timestamp_precise=1609455605.0, + start_timestamp=1609455600.0, + end_timestamp=1609455605.0, span_id="aaaaaaaaaaaaaaaa", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455601.0, - end_timestamp_precise=1609455602.0, + start_timestamp=1609455601.0, + end_timestamp=1609455602.0, span_id="bbbbbbbbbbbbbbbb", parent_span_id="aaaaaaaaaaaaaaaa", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455600.4, - end_timestamp_precise=1609455600.8, + start_timestamp=1609455600.4, + end_timestamp=1609455600.8, span_id="cccccccccccccccc", parent_span_id="bbbbbbbbbbbbbbbb", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455602.2, - end_timestamp_precise=1609455602.6, + start_timestamp=1609455602.2, + end_timestamp=1609455602.6, span_id="dddddddddddddddd", parent_span_id="bbbbbbbbbbbbbbbb", ), @@ -183,28 +183,28 @@ def test_child_spans_extend_beyond_parent() -> None: build_mock_span( project_id=1, is_segment=True, - start_timestamp_precise=1609455600.0, - end_timestamp_precise=1609455605.0, + start_timestamp=1609455600.0, + end_timestamp=1609455605.0, span_id="aaaaaaaaaaaaaaaa", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455601.0, - end_timestamp_precise=1609455602.0, + start_timestamp=1609455601.0, + end_timestamp=1609455602.0, span_id="bbbbbbbbbbbbbbbb", parent_span_id="aaaaaaaaaaaaaaaa", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455600.8, - end_timestamp_precise=1609455601.4, + start_timestamp=1609455600.8, + end_timestamp=1609455601.4, span_id="cccccccccccccccc", parent_span_id="bbbbbbbbbbbbbbbb", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455601.6, - end_timestamp_precise=1609455602.2, + start_timestamp=1609455601.6, + end_timestamp=1609455602.2, span_id="dddddddddddddddd", parent_span_id="bbbbbbbbbbbbbbbb", ), @@ -226,28 +226,28 @@ def test_child_spans_consumes_all_of_parent() -> None: build_mock_span( project_id=1, is_segment=True, - start_timestamp_precise=1609455600.0, - end_timestamp_precise=1609455605.0, + start_timestamp=1609455600.0, + end_timestamp=1609455605.0, span_id="aaaaaaaaaaaaaaaa", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455601.0, - end_timestamp_precise=1609455602.0, + start_timestamp=1609455601.0, + end_timestamp=1609455602.0, span_id="bbbbbbbbbbbbbbbb", parent_span_id="aaaaaaaaaaaaaaaa", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455600.8, - end_timestamp_precise=1609455601.6, + start_timestamp=1609455600.8, + end_timestamp=1609455601.6, span_id="cccccccccccccccc", parent_span_id="bbbbbbbbbbbbbbbb", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455601.4, - end_timestamp_precise=1609455602.2, + start_timestamp=1609455601.4, + end_timestamp=1609455602.2, span_id="dddddddddddddddd", parent_span_id="bbbbbbbbbbbbbbbb", ), @@ -269,28 +269,28 @@ def test_only_immediate_child_spans_affect_calculation() -> None: build_mock_span( project_id=1, is_segment=True, - start_timestamp_precise=1609455600.0, - end_timestamp_precise=1609455605.0, + start_timestamp=1609455600.0, + end_timestamp=1609455605.0, span_id="aaaaaaaaaaaaaaaa", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455601.0, - end_timestamp_precise=1609455602.0, + start_timestamp=1609455601.0, + end_timestamp=1609455602.0, span_id="bbbbbbbbbbbbbbbb", parent_span_id="aaaaaaaaaaaaaaaa", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455601.6, - end_timestamp_precise=1609455602.2, + start_timestamp=1609455601.6, + end_timestamp=1609455602.2, span_id="cccccccccccccccc", parent_span_id="bbbbbbbbbbbbbbbb", ), build_mock_span( project_id=1, - start_timestamp_precise=1609455601.4, - end_timestamp_precise=1609455601.8, + start_timestamp=1609455601.4, + end_timestamp=1609455601.8, span_id="dddddddddddddddd", parent_span_id="cccccccccccccccc", ), @@ -311,48 +311,48 @@ def test_emit_ops_breakdown() -> None: segment_span = build_mock_span( project_id=1, is_segment=True, - start_timestamp_precise=1577836800.0, - end_timestamp_precise=1577858400.01, + start_timestamp=1577836800.0, + end_timestamp=1577858400.01, span_id="ffffffffffffffff", ) spans = [ build_mock_span( project_id=1, - start_timestamp_precise=1577836800.0, # 2020-01-01 00:00:00 - end_timestamp_precise=1577840400.0, # 2020-01-01 01:00:00 + start_timestamp=1577836800.0, # 2020-01-01 00:00:00 + end_timestamp=1577840400.0, # 2020-01-01 01:00:00 span_id="fa90fdead5f74052", parent_span_id=segment_span["span_id"], span_op="http", ), build_mock_span( project_id=1, - start_timestamp_precise=1577844000.0, # 2020-01-01 02:00:00 - end_timestamp_precise=1577847600.0, # 2020-01-01 03:00:00 + start_timestamp=1577844000.0, # 2020-01-01 02:00:00 + end_timestamp=1577847600.0, # 2020-01-01 03:00:00 span_id="bbbbbbbbbbbbbbbb", parent_span_id=segment_span["span_id"], span_op="db", ), build_mock_span( project_id=1, - start_timestamp_precise=1577845800.0, # 2020-01-01 02:30:00 - end_timestamp_precise=1577849400.0, # 2020-01-01 03:30:00 + start_timestamp=1577845800.0, # 2020-01-01 02:30:00 + end_timestamp=1577849400.0, # 2020-01-01 03:30:00 span_id="cccccccccccccccc", parent_span_id=segment_span["span_id"], span_op="db.postgres", ), build_mock_span( project_id=1, - start_timestamp_precise=1577851200.0, # 2020-01-01 04:00:00 - end_timestamp_precise=1577853000.0, # 2020-01-01 04:30:00 + start_timestamp=1577851200.0, # 2020-01-01 04:00:00 + end_timestamp=1577853000.0, # 2020-01-01 04:30:00 span_id="dddddddddddddddd", parent_span_id=segment_span["span_id"], span_op="db.mongo", ), build_mock_span( project_id=1, - start_timestamp_precise=1577854800.0, # 2020-01-01 05:00:00 - end_timestamp_precise=1577858400.01, # 2020-01-01 06:00:00.01 + start_timestamp=1577854800.0, # 2020-01-01 05:00:00 + end_timestamp=1577858400.01, # 2020-01-01 06:00:00.01 span_id="eeeeeeeeeeeeeeee", parent_span_id=segment_span["span_id"], span_op="browser", @@ -443,8 +443,8 @@ def _mock_performance_issue_span(is_segment, attributes, **fields): "attributes": attributes, "span_id": "a49b42af9fb69da0", "start_timestamp_ms": 1707953018865, - "start_timestamp_precise": 1707953018.865, - "end_timestamp_precise": 1707953018.972, + "start_timestamp": 1707953018.865, + "end_timestamp": 1707953018.972, "trace_id": "94576097f3a64b68b85a59c7d4e3ee2a", **fields, } diff --git a/tests/sentry/spans/consumers/process_segments/test_message.py b/tests/sentry/spans/consumers/process_segments/test_message.py index 29946c97eddb6b..20574a3b0c873d 100644 --- a/tests/sentry/spans/consumers/process_segments/test_message.py +++ b/tests/sentry/spans/consumers/process_segments/test_message.py @@ -40,7 +40,7 @@ def generate_basic_spans(self): parent_span_id=segment_span["span_id"], span_id="940ce942561548b5", start_timestamp_ms=1707953018867, - start_timestamp_precise=1707953018.867, + start_timestamp=1707953018.867, ) return [child_span, segment_span] @@ -57,7 +57,7 @@ def generate_n_plus_one_spans(self): parent_span_id=segment_span["span_id"], span_id="940ce942561548b5", start_timestamp_ms=1707953018867, - start_timestamp_precise=1707953018.867, + start_timestamp=1707953018.867, ) cause_span = build_mock_span( project_id=self.project.id, @@ -66,7 +66,7 @@ def generate_n_plus_one_spans(self): parent_span_id="940ce942561548b5", span_id="a974da4671bc3857", start_timestamp_ms=1707953018867, - start_timestamp_precise=1707953018.867, + start_timestamp=1707953018.867, ) repeating_span_description = 'SELECT "sentry_organization"."id", "sentry_organization"."name", "sentry_organization"."slug", "sentry_organization"."status", "sentry_organization"."date_added", "sentry_organization"."default_role", "sentry_organization"."is_test", "sentry_organization"."flags" FROM "sentry_organization" WHERE "sentry_organization"."id" = %s LIMIT 21' @@ -78,7 +78,7 @@ def repeating_span(): parent_span_id="940ce942561548b5", span_id=uuid.uuid4().hex[:16], start_timestamp_ms=1707953018869, - start_timestamp_precise=1707953018.869, + start_timestamp=1707953018.869, ) repeating_spans = [repeating_span() for _ in range(7)] @@ -126,7 +126,7 @@ def test_create_models(self) -> None: organization_id=self.organization.id, version="backend@24.2.0.dev0+699ce0cd1281cc3c7275d0a474a595375c769ae8", ) - assert release.date_added.timestamp() == spans[0]["end_timestamp_precise"] + assert release.date_added.timestamp() == spans[0]["end_timestamp"] @override_options({"spans.process-segments.detect-performance-problems.enable": True}) @mock.patch("sentry.issues.ingest.send_issue_occurrence_to_eventstream") @@ -162,7 +162,7 @@ def test_n_plus_one_issue_detection_without_segment_span( parent_span_id="b35b839c02985f33", span_id="940ce942561548b5", start_timestamp_ms=1707953018867, - start_timestamp_precise=1707953018.867, + start_timestamp=1707953018.867, ) cause_span = build_mock_span( project_id=self.project.id, @@ -172,7 +172,7 @@ def test_n_plus_one_issue_detection_without_segment_span( parent_span_id="940ce942561548b5", span_id="a974da4671bc3857", start_timestamp_ms=1707953018867, - start_timestamp_precise=1707953018.867, + start_timestamp=1707953018.867, ) repeating_span_description = 'SELECT "sentry_organization"."id", "sentry_organization"."name", "sentry_organization"."slug", "sentry_organization"."status", "sentry_organization"."date_added", "sentry_organization"."default_role", "sentry_organization"."is_test", "sentry_organization"."flags" FROM "sentry_organization" WHERE "sentry_organization"."id" = %s LIMIT 21' @@ -185,7 +185,7 @@ def repeating_span(): parent_span_id="940ce942561548b5", span_id=uuid.uuid4().hex[:16], start_timestamp_ms=1707953018869, - start_timestamp_precise=1707953018.869, + start_timestamp=1707953018.869, ) repeating_spans = [repeating_span() for _ in range(7)] diff --git a/tests/sentry/spans/test_buffer.py b/tests/sentry/spans/test_buffer.py index 7a22a4e8f3bb9d..91f76f66711b6b 100644 --- a/tests/sentry/spans/test_buffer.py +++ b/tests/sentry/spans/test_buffer.py @@ -137,7 +137,7 @@ def process_spans(spans: Sequence[Span | _SplitBatch], buffer: SpansBuffer, now) parent_span_id="b" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), Span( payload=_payload("d" * 16), @@ -146,7 +146,7 @@ def process_spans(spans: Sequence[Span | _SplitBatch], buffer: SpansBuffer, now) parent_span_id="b" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), Span( payload=_payload("c" * 16), @@ -155,7 +155,7 @@ def process_spans(spans: Sequence[Span | _SplitBatch], buffer: SpansBuffer, now) parent_span_id="b" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), Span( payload=_payload("b" * 16), @@ -165,7 +165,7 @@ def process_spans(spans: Sequence[Span | _SplitBatch], buffer: SpansBuffer, now) segment_id=None, is_segment_span=True, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), ] ) @@ -210,7 +210,7 @@ def test_basic(buffer: SpansBuffer, spans) -> None: parent_span_id="b" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), _SplitBatch(), Span( @@ -220,7 +220,7 @@ def test_basic(buffer: SpansBuffer, spans) -> None: parent_span_id="a" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), Span( payload=_payload("a" * 16), @@ -230,7 +230,7 @@ def test_basic(buffer: SpansBuffer, spans) -> None: is_segment_span=True, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), Span( payload=_payload("c" * 16), @@ -239,7 +239,7 @@ def test_basic(buffer: SpansBuffer, spans) -> None: parent_span_id="a" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), ] ) @@ -284,7 +284,7 @@ def test_deep(buffer: SpansBuffer, spans) -> None: parent_span_id="d" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), Span( payload=_payload("d" * 16), @@ -293,7 +293,7 @@ def test_deep(buffer: SpansBuffer, spans) -> None: parent_span_id="b" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), Span( payload=_payload("b" * 16), @@ -302,7 +302,7 @@ def test_deep(buffer: SpansBuffer, spans) -> None: parent_span_id="c" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), Span( payload=_payload("c" * 16), @@ -311,7 +311,7 @@ def test_deep(buffer: SpansBuffer, spans) -> None: parent_span_id="a" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), Span( payload=_payload("a" * 16), @@ -321,7 +321,7 @@ def test_deep(buffer: SpansBuffer, spans) -> None: is_segment_span=True, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), ] ) @@ -367,7 +367,7 @@ def test_deep2(buffer: SpansBuffer, spans) -> None: parent_span_id="b" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), Span( payload=_payload("d" * 16), @@ -376,7 +376,7 @@ def test_deep2(buffer: SpansBuffer, spans) -> None: parent_span_id="b" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), Span( payload=_payload("e" * 16), @@ -385,7 +385,7 @@ def test_deep2(buffer: SpansBuffer, spans) -> None: parent_span_id="b" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), Span( payload=_payload("b" * 16), @@ -395,7 +395,7 @@ def test_deep2(buffer: SpansBuffer, spans) -> None: is_segment_span=True, segment_id=None, project_id=2, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), ] ) @@ -448,7 +448,7 @@ def test_parent_in_other_project(buffer: SpansBuffer, spans) -> None: project_id=1, segment_id=None, is_segment_span=True, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), Span( payload=_payload("d" * 16), @@ -457,7 +457,7 @@ def test_parent_in_other_project(buffer: SpansBuffer, spans) -> None: parent_span_id="b" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), Span( payload=_payload("e" * 16), @@ -466,7 +466,7 @@ def test_parent_in_other_project(buffer: SpansBuffer, spans) -> None: parent_span_id="b" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), Span( payload=_payload("b" * 16), @@ -476,7 +476,7 @@ def test_parent_in_other_project(buffer: SpansBuffer, spans) -> None: is_segment_span=True, segment_id=None, project_id=2, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), ] ), @@ -532,7 +532,7 @@ def test_flush_rebalance(buffer: SpansBuffer) -> None: segment_id=None, project_id=1, is_segment_span=True, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ) ] @@ -582,7 +582,7 @@ def make_payload(span_id: str): project_id=1, segment_id=None, is_segment_span=True, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), Span( payload=make_payload("a" * 16), @@ -591,7 +591,7 @@ def make_payload(span_id: str): parent_span_id="b" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), Span( payload=make_payload("c" * 16), @@ -600,7 +600,7 @@ def make_payload(span_id: str): parent_span_id="b" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), ] @@ -640,7 +640,7 @@ def test_max_segment_spans_limit(buffer: SpansBuffer) -> None: parent_span_id="b" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000001.0, + end_timestamp=1700000001.0, ), Span( payload=_payload("b" * 16), @@ -649,7 +649,7 @@ def test_max_segment_spans_limit(buffer: SpansBuffer) -> None: parent_span_id="a" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000002.0, + end_timestamp=1700000002.0, ), ] batch2 = [ @@ -660,7 +660,7 @@ def test_max_segment_spans_limit(buffer: SpansBuffer) -> None: parent_span_id="a" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000003.0, + end_timestamp=1700000003.0, ), Span( payload=_payload("e" * 16), @@ -669,7 +669,7 @@ def test_max_segment_spans_limit(buffer: SpansBuffer) -> None: parent_span_id="a" * 16, segment_id=None, project_id=1, - end_timestamp_precise=1700000004.0, + end_timestamp=1700000004.0, ), Span( payload=_payload("a" * 16), @@ -679,7 +679,7 @@ def test_max_segment_spans_limit(buffer: SpansBuffer) -> None: project_id=1, segment_id=None, is_segment_span=True, - end_timestamp_precise=1700000005.0, + end_timestamp=1700000005.0, ), ] @@ -716,7 +716,7 @@ def test_kafka_slice_id(buffer: SpansBuffer) -> None: project_id=1, segment_id=None, is_segment_span=True, - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ) ] @@ -742,7 +742,7 @@ def test_preassigned_disconnected_segment(buffer: SpansBuffer) -> None: parent_span_id="c" * 16, # does not exist in this segment project_id=1, segment_id="a" * 16, # refers to the correct span below - end_timestamp_precise=1700000000.0, + end_timestamp=1700000000.0, ), Span( payload=_payload("a" * 16), @@ -752,7 +752,7 @@ def test_preassigned_disconnected_segment(buffer: SpansBuffer) -> None: project_id=1, segment_id="a" * 16, is_segment_span=True, - end_timestamp_precise=1700000001.0, + end_timestamp=1700000001.0, ), ] From fc37bc2d40867c774be8b778722cf3271dc19b20 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 26 Sep 2025 09:50:24 +0200 Subject: [PATCH 06/21] segment and profile id (untested) --- src/sentry/spans/buffer.py | 13 +++++++------ src/sentry/spans/consumers/process/factory.py | 3 ++- .../spans/consumers/process_segments/convert.py | 3 +-- src/sentry/spans/consumers/process_segments/shim.py | 2 +- .../spans/consumers/process_segments/types.py | 4 +++- tests/sentry/spans/consumers/process/__init__.py | 2 +- 6 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/sentry/spans/buffer.py b/src/sentry/spans/buffer.py index 5ca6c6c59defbf..27e3d51d8149e1 100644 --- a/src/sentry/spans/buffer.py +++ b/src/sentry/spans/buffer.py @@ -77,6 +77,7 @@ from sentry import options from sentry.processing.backpressure.memory import ServiceMemory, iter_cluster_memory_usage +from sentry.spans.consumers.process_segments.types import attribute_value from sentry.utils import metrics, redis # SegmentKey is an internal identifier used by the redis buffer that is also @@ -428,17 +429,17 @@ def flush_segments(self, now: int) -> dict[SegmentKey, FlushedSegment]: has_root_span = False metrics.timing("spans.buffer.flush_segments.num_spans_per_segment", len(segment)) for payload in segment: - val = orjson.loads(payload) + span = orjson.loads(payload) - if not val.get("segment_id"): - val["segment_id"] = segment_span_id + if not attribute_value(span, "sentry.segment.id"): + span.setdefault("attributes", {})["sentry.segment.id"] = segment_span_id - is_segment = segment_span_id == val["span_id"] - val["is_segment"] = is_segment + is_segment = segment_span_id == span["span_id"] + span["is_segment"] = is_segment if is_segment: has_root_span = True - output_spans.append(OutputSpan(payload=val)) + output_spans.append(OutputSpan(payload=span)) metrics.incr( "spans.buffer.flush_segments.num_segments_per_shard", tags={"shard_i": shard} diff --git a/src/sentry/spans/consumers/process/factory.py b/src/sentry/spans/consumers/process/factory.py index 8ef837aa6f791b..7e9f7311f1cf18 100644 --- a/src/sentry/spans/consumers/process/factory.py +++ b/src/sentry/spans/consumers/process/factory.py @@ -18,6 +18,7 @@ from sentry import killswitches from sentry.spans.buffer import Span, SpansBuffer from sentry.spans.consumers.process.flusher import SpanFlusher +from sentry.spans.consumers.process_segments.types import attribute_value from sentry.utils import metrics from sentry.utils.arroyo import MultiprocessingPool, SetJoinTimeout, run_task_with_multiprocessing @@ -182,7 +183,7 @@ def process_batch( trace_id=val["trace_id"], span_id=val["span_id"], parent_span_id=val.get("parent_span_id"), - segment_id=cast(str | None, val.get("segment_id")), + segment_id=cast(str | None, attribute_value(val, "sentry.segment.id")), project_id=val["project_id"], payload=payload.value, end_timestamp=val["end_timestamp"], diff --git a/src/sentry/spans/consumers/process_segments/convert.py b/src/sentry/spans/consumers/process_segments/convert.py index a5836f80dfe36e..1b1397c74c3ac7 100644 --- a/src/sentry/spans/consumers/process_segments/convert.py +++ b/src/sentry/spans/consumers/process_segments/convert.py @@ -22,15 +22,14 @@ "kind": "sentry.kind", "origin": "sentry.origin", "parent_span_id": "sentry.parent_span_id", - "profile_id": "sentry.profile_id", "received": "sentry.received", - "segment_id": "sentry.segment_id", "start_timestamp": "sentry.start_timestamp_precise", } RENAME_ATTRIBUTES = { "sentry.description": "sentry.raw_description", "sentry.duration": "sentry.duration_ms", + "sentry.segment.id": "sentry.segment_id", } diff --git a/src/sentry/spans/consumers/process_segments/shim.py b/src/sentry/spans/consumers/process_segments/shim.py index c3b2bff759fc81..d52bb8d7330e8e 100644 --- a/src/sentry/spans/consumers/process_segments/shim.py +++ b/src/sentry/spans/consumers/process_segments/shim.py @@ -90,7 +90,7 @@ def build_shim_event_data( "spans": [], } - if (profile_id := segment_span.get("profile_id")) is not None: + if (profile_id := attribute_value(segment_span, "sentry.profile_id")) is not None: event["contexts"]["profile"] = {"profile_id": profile_id, "type": "profile"} # Add legacy span attributes required only by issue detectors. As opposed to diff --git a/src/sentry/spans/consumers/process_segments/types.py b/src/sentry/spans/consumers/process_segments/types.py index cf066a1f406bb8..74921696ba62a9 100644 --- a/src/sentry/spans/consumers/process_segments/types.py +++ b/src/sentry/spans/consumers/process_segments/types.py @@ -37,6 +37,8 @@ class CompatibleSpan(EnrichedSpan, total=True): def attribute_value(span: SegmentSpan, key) -> Any: attributes = span.get("attributes") or {} try: - return attributes.get(key)["value"] + return attributes.get(key).get("value") except Exception as e: + # `attributes` is not a dict or the attribute itself is not a dict. sentry_sdk.capture_exception(e) + return None diff --git a/tests/sentry/spans/consumers/process/__init__.py b/tests/sentry/spans/consumers/process/__init__.py index 07282807639bf2..0f86d7450adb53 100644 --- a/tests/sentry/spans/consumers/process/__init__.py +++ b/tests/sentry/spans/consumers/process/__init__.py @@ -10,7 +10,6 @@ def build_mock_span(project_id, *, span_op=None, is_segment=False, attributes=No "organization_id": 1, "received": 1707953019.044972, "retention_days": 90, - "segment_id": "a49b42af9fb69da0", "attributes": { "sentry.environment": {"value": "development"}, "sentry.release": { @@ -18,6 +17,7 @@ def build_mock_span(project_id, *, span_op=None, is_segment=False, attributes=No }, "sentry.platform": {"value": "python"}, "sentry.op": {"value": span_op or "base.dispatch.sleep"}, + "sentry.segment.id": "a49b42af9fb69da0", **(attributes or {}), }, "span_id": "a49b42af9fb69da0", From 51dfc5fb91c1bb0e5a8a869621054f3df59db273 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 30 Sep 2025 10:52:28 +0200 Subject: [PATCH 07/21] lint: Down to 2 mypy errors --- .../consumers/process_segments/convert.py | 21 ++++--- .../consumers/process_segments/enrichment.py | 60 +++++++++++-------- .../consumers/process_segments/message.py | 26 ++++---- .../spans/consumers/process_segments/shim.py | 13 ++-- .../spans/consumers/process_segments/types.py | 25 ++++---- .../components/datasetSelector.tsx | 6 +- .../components/widgetBuilderSlideout.tsx | 6 +- .../hooks/useSegmentSpanWidgetState.tsx | 6 +- 8 files changed, 90 insertions(+), 73 deletions(-) diff --git a/src/sentry/spans/consumers/process_segments/convert.py b/src/sentry/spans/consumers/process_segments/convert.py index 1b1397c74c3ac7..5d5ba1647ab918 100644 --- a/src/sentry/spans/consumers/process_segments/convert.py +++ b/src/sentry/spans/consumers/process_segments/convert.py @@ -39,10 +39,11 @@ def convert_span_to_item(span: CompatibleSpan) -> TraceItem: client_sample_rate = 1.0 server_sample_rate = 1.0 - for k, v in (span.get("attributes") or {}).items(): + for k, field_value in (span.get("attributes") or {}).items(): + if (value := field_value.get("value")) is None: + continue try: # NOTE: This ignores the `type` field of the attribute itself - value = v["value"] attributes[k] = _anyvalue(value) except Exception: sentry_sdk.capture_exception() @@ -59,9 +60,9 @@ def convert_span_to_item(span: CompatibleSpan) -> TraceItem: pass for field_name, attribute_name in FIELD_TO_ATTRIBUTE.items(): - v = span.get(field_name) - if v is not None: - attributes[attribute_name] = _anyvalue(v) + field_value = span.get(field_name) + if field_value is not None: + attributes[attribute_name] = _anyvalue(field_value) # Rename some attributes from their sentry-conventions name to what the product currently expects. # Eventually this should all be handled by deprecation policies in sentry-conventions. @@ -135,7 +136,10 @@ def _sanitize_span_link(link: SpanLink) -> SpanLink: # might be an intermediary state where there is a pre-existing dropped # attributes count. Respect that count, if it's present. It should always be # an integer. - dropped_attributes_count = attributes.get("sentry.dropped_attributes_count", 0) + try: + dropped_attributes_count = int(attributes["sentry.dropped_attributes_count"]["value"]) # type: ignore[arg-type] + except (KeyError, ValueError, TypeError): + dropped_attributes_count = 0 for key, value in attributes.items(): if key in ALLOWED_LINK_ATTRIBUTE_KEYS: @@ -144,7 +148,10 @@ def _sanitize_span_link(link: SpanLink) -> SpanLink: dropped_attributes_count += 1 if dropped_attributes_count > 0: - allowed_attributes["sentry.dropped_attributes_count"] = dropped_attributes_count + allowed_attributes["sentry.dropped_attributes_count"] = { + "type": "integer", + "value": dropped_attributes_count, + } # Only include the `attributes` key if the key was present in the original # link, don't create a an empty object, since there is a semantic difference diff --git a/src/sentry/spans/consumers/process_segments/enrichment.py b/src/sentry/spans/consumers/process_segments/enrichment.py index b8fe7afa1025b6..362d2e3778b529 100644 --- a/src/sentry/spans/consumers/process_segments/enrichment.py +++ b/src/sentry/spans/consumers/process_segments/enrichment.py @@ -2,9 +2,14 @@ from collections.abc import Sequence from typing import Any -from sentry_kafka_schemas.schema_types.buffered_segments_v1 import SegmentSpan +from sentry_kafka_schemas.schema_types.ingest_spans_v1 import SpanEvent -from sentry.spans.consumers.process_segments.types import EnrichedSpan, attribute_value, get_span_op +from sentry.spans.consumers.process_segments.types import ( + Attributes, + EnrichedSpan, + attribute_value, + get_span_op, +) # Keys of shared sentry attributes that are shared across all spans in a segment. This list # is taken from `extract_shared_tags` in Relay. @@ -44,7 +49,7 @@ DEFAULT_SPAN_OP = "default" -def _find_segment_span(spans: list[SegmentSpan]) -> SegmentSpan | None: +def _find_segment_span(spans: list[SpanEvent]) -> SpanEvent | None: """ Finds the segment in the span in the list that has ``is_segment`` set to ``True``. @@ -66,7 +71,7 @@ def _find_segment_span(spans: list[SegmentSpan]) -> SegmentSpan | None: class TreeEnricher: """Enriches spans with information from their parent, child and sibling spans.""" - def __init__(self, spans: list[SegmentSpan]) -> None: + def __init__(self, spans: list[SpanEvent]) -> None: self._segment_span = _find_segment_span(spans) self._ttid_ts = _timestamp_by_op(spans, "ui.load.initial_display") @@ -78,14 +83,14 @@ def __init__(self, spans: list[SegmentSpan]) -> None: interval = _span_interval(span) self._span_map.setdefault(parent_span_id, []).append(interval) - def _attributes(self, span: SegmentSpan) -> dict[str, Any]: - ret = {**span.get("attributes", {})} + def _attributes(self, span: SpanEvent) -> dict[str, Any]: + attributes: Attributes = {**span.get("attributes", {})} if self._segment_span is not None: # Assume that Relay has extracted the shared tags into `data` on the # root span. Once `sentry_tags` is removed, the logic from # `extract_shared_tags` should be moved here. segment_attrs = self._segment_span.get("attributes", {}) - shared_tags = {k: v for k, v in segment_attrs.items() if k in SHARED_SENTRY_ATTRIBUTES} + shared_attrs = {k: v for k, v in segment_attrs.items() if k in SHARED_SENTRY_ATTRIBUTES} is_mobile = attribute_value(span, "sentry.mobile") == "true" mobile_start_type = _get_mobile_start_type(self._segment_span) @@ -94,23 +99,26 @@ def _attributes(self, span: SegmentSpan) -> dict[str, Any]: # NOTE: Like in Relay's implementation, shared tags are added at the # very end. This does not have access to the shared tag value. We # keep behavior consistent, although this should be revisited. - if ret.get("sentry.thread.name") == MOBILE_MAIN_THREAD_NAME: - ret["sentry.main_thread"] = "true" - if not ret.get("sentry.app_start_type") and mobile_start_type: - ret["sentry.app_start_type"] = mobile_start_type + if attributes.get("sentry.thread.name") == MOBILE_MAIN_THREAD_NAME: + attributes["sentry.main_thread"] = {"type": "string", "value": "true"} + if not attributes.get("sentry.app_start_type") and mobile_start_type: + attributes["sentry.app_start_type"] = { + "type": "string", + "value": mobile_start_type, + } if self._ttid_ts is not None and span["end_timestamp"] <= self._ttid_ts: - ret["sentry.ttid"] = "ttid" + attributes["sentry.ttid"] = {"type": "string", "value": "ttid"} if self._ttfd_ts is not None and span["end_timestamp"] <= self._ttfd_ts: - ret["sentry.ttfd"] = "ttfd" + attributes["sentry.ttfd"] = {"type": "string", "value": "ttfd"} - for key, value in shared_tags.items(): - if ret.get(key) is None: - ret[key] = value + for key, value in shared_attrs.items(): + if attributes.get(key) is None: + attributes[key] = value - return ret + return attributes - def _exclusive_time(self, span: SegmentSpan) -> float: + def _exclusive_time(self, span: SpanEvent) -> float: """ Sets the exclusive time on all spans in the list. @@ -138,17 +146,17 @@ def _exclusive_time(self, span: SegmentSpan) -> float: return exclusive_time_us / 1_000 - def enrich_span(self, span: SegmentSpan) -> EnrichedSpan: + def enrich_span(self, span: SpanEvent) -> EnrichedSpan: exclusive_time = self._exclusive_time(span) attributes = self._attributes(span) return { **span, "attributes": attributes, - "exclusive_time_ms": exclusive_time, + "exclusive_time_ms": exclusive_time, # FIXME } @classmethod - def enrich_spans(cls, spans: list[SegmentSpan]) -> tuple[int | None, list[EnrichedSpan]]: + def enrich_spans(cls, spans: list[SpanEvent]) -> tuple[int | None, list[EnrichedSpan]]: inst = cls(spans) ret = [] segment_idx = None @@ -162,7 +170,7 @@ def enrich_spans(cls, spans: list[SegmentSpan]) -> tuple[int | None, list[Enrich return segment_idx, ret -def _get_mobile_start_type(segment: SegmentSpan) -> str | None: +def _get_mobile_start_type(segment: SpanEvent) -> str | None: """ Check the measurements on the span to determine what kind of start type the event is. @@ -177,14 +185,14 @@ def _get_mobile_start_type(segment: SegmentSpan) -> str | None: return None -def _timestamp_by_op(spans: list[SegmentSpan], op: str) -> float | None: +def _timestamp_by_op(spans: list[SpanEvent], op: str) -> float | None: for span in spans: if get_span_op(span) == op: return span["end_timestamp"] return None -def _span_interval(span: SegmentSpan | EnrichedSpan) -> tuple[int, int]: +def _span_interval(span: SpanEvent | EnrichedSpan) -> tuple[int, int]: """Get the start and end timestamps of a span in microseconds.""" return _us(span["start_timestamp"]), _us(span["end_timestamp"]) @@ -196,7 +204,7 @@ def _us(timestamp: float) -> int: def compute_breakdowns( - spans: Sequence[SegmentSpan], + spans: Sequence[SpanEvent], breakdowns_config: dict[str, dict[str, Any]], ) -> dict[str, Any]: """ @@ -222,7 +230,7 @@ def compute_breakdowns( return ret -def _compute_span_ops(spans: Sequence[SegmentSpan], config: Any) -> dict[str, float]: +def _compute_span_ops(spans: Sequence[SpanEvent], config: Any) -> dict[str, float]: matches = config.get("matches") if not matches: return {} diff --git a/src/sentry/spans/consumers/process_segments/message.py b/src/sentry/spans/consumers/process_segments/message.py index 203caf282ec21b..e24b67fc9e9f46 100644 --- a/src/sentry/spans/consumers/process_segments/message.py +++ b/src/sentry/spans/consumers/process_segments/message.py @@ -6,7 +6,7 @@ import sentry_sdk from django.core.exceptions import ValidationError -from sentry_kafka_schemas.schema_types.buffered_segments_v1 import SegmentSpan +from sentry_kafka_schemas.schema_types.ingest_spans_v1 import SpanEvent from sentry import options from sentry.constants import DataCategory @@ -29,7 +29,7 @@ from sentry.signals import first_insight_span_received, first_transaction_received from sentry.spans.consumers.process_segments.enrichment import TreeEnricher, compute_breakdowns from sentry.spans.consumers.process_segments.shim import build_shim_event_data, make_compatible -from sentry.spans.consumers.process_segments.types import CompatibleSpan +from sentry.spans.consumers.process_segments.types import CompatibleSpan, attribute_value from sentry.spans.grouping.api import load_span_grouping_config from sentry.utils import metrics from sentry.utils.dates import to_datetime @@ -43,7 +43,7 @@ @metrics.wraps("spans.consumers.process_segments.process_segment") def process_segment( - unprocessed_spans: list[SegmentSpan], skip_produce: bool = False + unprocessed_spans: list[SpanEvent], skip_produce: bool = False ) -> list[CompatibleSpan]: _verify_compatibility(unprocessed_spans) segment_span, spans = _enrich_spans(unprocessed_spans) @@ -113,7 +113,7 @@ def _redact(data: Any) -> Any: @metrics.wraps("spans.consumers.process_segments.enrich_spans") def _enrich_spans( - unprocessed_spans: list[SegmentSpan], + unprocessed_spans: list[SpanEvent], ) -> tuple[CompatibleSpan | None, list[CompatibleSpan]]: """ Enriches all spans with data derived from the span tree and the segment. @@ -145,7 +145,7 @@ def _compute_breakdowns( ) -> None: config = project.get_option("sentry:breakdowns") breakdowns = compute_breakdowns(spans, config) - segment.setdefault("data", {}).update(breakdowns) + segment.setdefault("attributes", {}).update(breakdowns) @metrics.wraps("spans.consumers.process_segments.create_models") @@ -155,9 +155,9 @@ def _create_models(segment: CompatibleSpan, project: Project) -> None: relationships between them and the Project model. """ - environment_name = segment["data"].get("sentry.environment") - release_name = segment["data"].get("sentry.release") - dist_name = segment["data"].get("sentry.dist") + environment_name = attribute_value(segment, "sentry.environment") + release_name = attribute_value(segment, "sentry.release") + dist_name = attribute_value(segment, "sentry.dist") date = to_datetime(segment["end_timestamp"]) environment = Environment.get_or_create(project=project, name=environment_name) @@ -248,13 +248,11 @@ def _detect_performance_problems( def _record_signals( segment_span: CompatibleSpan, spans: list[CompatibleSpan], project: Project ) -> None: - data = segment_span.get("data", {}) - record_generic_event_processed( project, - platform=data.get("sentry.platform"), - release=data.get("sentry.release"), - environment=data.get("sentry.environment"), + platform=attribute_value(segment_span, "sentry.platform"), + release=attribute_value(segment_span, "sentry.release"), + environment=attribute_value(segment_span, "sentry.environment"), ) # signal expects an event like object with a datetime attribute @@ -268,7 +266,7 @@ def _record_signals( ) for module in insights_modules( - [FilterSpan.from_span_data(span.get("data", {})) for span in spans] + [FilterSpan.from_span_attributes(span.get("attributes", {})) for span in spans] ): set_project_flag_and_signal( project, diff --git a/src/sentry/spans/consumers/process_segments/shim.py b/src/sentry/spans/consumers/process_segments/shim.py index d52bb8d7330e8e..8ce498d020f205 100644 --- a/src/sentry/spans/consumers/process_segments/shim.py +++ b/src/sentry/spans/consumers/process_segments/shim.py @@ -8,7 +8,7 @@ from copy import deepcopy from typing import Any, cast -from sentry_kafka_schemas.schema_types.buffered_segments_v1 import _SentryExtractedTags +import sentry_sdk from sentry.performance_issues.types import SentryTags as PerformanceIssuesSentryTags from sentry.spans.consumers.process_segments.types import ( @@ -34,26 +34,29 @@ def make_compatible(span: EnrichedSpan) -> CompatibleSpan: "op": get_span_op(span), # Note: Event protocol spans expect `exclusive_time` while EAP expects # `exclusive_time_ms`. Both are the same value in milliseconds - "exclusive_time": span["exclusive_time_ms"], + "exclusive_time": span["exclusive_time_ms"], # FIXME } return ret -def _sentry_tags(attributes: dict[str, Any]) -> _SentryExtractedTags: +def _sentry_tags(attributes: dict[str, Any]) -> dict[str, str]: """Backfill sentry tags used in performance issue detection. Once performance issue detection is only called from process_segments, (not from event_manager), the performance issues code can be refactored to access span attributes instead of sentry_tags. """ - sentry_tags: _SentryExtractedTags = {} + sentry_tags = {} for tag_key in PerformanceIssuesSentryTags.__mutable_keys__: attribute_key = ( "sentry.normalized_description" if tag_key == "description" else f"sentry.{tag_key}" ) if attribute_key in attributes: - sentry_tags[tag_key] = (attributes[attribute_key] or {}).get("value") + try: + sentry_tags[tag_key] = str((attributes[attribute_key] or {}).get("value")) + except Exception: + sentry_sdk.capture_exception() return sentry_tags diff --git a/src/sentry/spans/consumers/process_segments/types.py b/src/sentry/spans/consumers/process_segments/types.py index 74921696ba62a9..e3888904a18b18 100644 --- a/src/sentry/spans/consumers/process_segments/types.py +++ b/src/sentry/spans/consumers/process_segments/types.py @@ -1,7 +1,12 @@ from typing import Any, NotRequired -import sentry_sdk -from sentry_kafka_schemas.schema_types.buffered_segments_v1 import SegmentSpan +from sentry_kafka_schemas.schema_types.ingest_spans_v1 import SpanEvent +from sentry_kafka_schemas.schema_types.ingest_spans_v1 import ( + _FullStopIngestSpansFullStopV1FullStopSchemaFullStopJsonNumberSignDefinitionsAttributevalue as AttributeValue, +) + +Attributes = dict[str, AttributeValue] + # The default span.op to assume if it is missing on the span. This should be # normalized by Relay, but we defensively apply the same fallback as the op is @@ -9,11 +14,11 @@ DEFAULT_SPAN_OP = "default" -def get_span_op(span: SegmentSpan) -> str: +def get_span_op(span: SpanEvent) -> str: return attribute_value(span, "sentry.op") or DEFAULT_SPAN_OP -class EnrichedSpan(SegmentSpan, total=True): +class EnrichedSpan(SpanEvent, total=True): """ Enriched version of the incoming span payload that has additional attributes extracted from its child spans and/or inherited from its parent span. @@ -29,16 +34,12 @@ class CompatibleSpan(EnrichedSpan, total=True): exclusive_time: float op: str + sentry_tags: dict[str, str] # Added by `SpanGroupingResults.write_to_spans` in `_enrich_spans` hash: NotRequired[str] -def attribute_value(span: SegmentSpan, key) -> Any: - attributes = span.get("attributes") or {} - try: - return attributes.get(key).get("value") - except Exception as e: - # `attributes` is not a dict or the attribute itself is not a dict. - sentry_sdk.capture_exception(e) - return None +def attribute_value(span: SpanEvent, key: str) -> Any: + attributes: Attributes = span.get("attributes") or {} + return (attributes.get(key) or {}).get("value") diff --git a/static/app/views/dashboards/widgetBuilder/components/datasetSelector.tsx b/static/app/views/dashboards/widgetBuilder/components/datasetSelector.tsx index edbb2c74a68836..c6a973a0c77e97 100644 --- a/static/app/views/dashboards/widgetBuilder/components/datasetSelector.tsx +++ b/static/app/views/dashboards/widgetBuilder/components/datasetSelector.tsx @@ -21,7 +21,7 @@ import {useWidgetBuilderContext} from 'sentry/views/dashboards/widgetBuilder/con import {useCacheBuilderState} from 'sentry/views/dashboards/widgetBuilder/hooks/useCacheBuilderState'; import useDashboardWidgetSource from 'sentry/views/dashboards/widgetBuilder/hooks/useDashboardWidgetSource'; import useIsEditingWidget from 'sentry/views/dashboards/widgetBuilder/hooks/useIsEditingWidget'; -import {useSegmentSpanWidgetState} from 'sentry/views/dashboards/widgetBuilder/hooks/useSegmentSpanWidgetState'; +import {useSpanEventWidgetState} from 'sentry/views/dashboards/widgetBuilder/hooks/useSpanEventWidgetState'; import {isLogsEnabled} from 'sentry/views/explore/logs/isLogsEnabled'; function WidgetBuilderDatasetSelector() { @@ -31,7 +31,7 @@ function WidgetBuilderDatasetSelector() { const source = useDashboardWidgetSource(); const isEditing = useIsEditingWidget(); const {cacheBuilderState, restoreOrSetBuilderState} = useCacheBuilderState(); - const {setSegmentSpanBuilderState} = useSegmentSpanWidgetState(); + const {setSpanEventBuilderState} = useSpanEventWidgetState(); const disabledChoices: RadioGroupProps['disabledChoices'] = []; const datasetChoices: Array> = []; @@ -56,7 +56,7 @@ function WidgetBuilderDatasetSelector() { }} onClick={() => { cacheBuilderState(state.dataset ?? WidgetType.ERRORS); - setSegmentSpanBuilderState(); + setSpanEventBuilderState(); }} > {t('spans')} diff --git a/static/app/views/dashboards/widgetBuilder/components/widgetBuilderSlideout.tsx b/static/app/views/dashboards/widgetBuilder/components/widgetBuilderSlideout.tsx index cae3e04b965f29..6f67dfa1be18a3 100644 --- a/static/app/views/dashboards/widgetBuilder/components/widgetBuilderSlideout.tsx +++ b/static/app/views/dashboards/widgetBuilder/components/widgetBuilderSlideout.tsx @@ -48,7 +48,7 @@ import {useCacheBuilderState} from 'sentry/views/dashboards/widgetBuilder/hooks/ import useDashboardWidgetSource from 'sentry/views/dashboards/widgetBuilder/hooks/useDashboardWidgetSource'; import {useDisableTransactionWidget} from 'sentry/views/dashboards/widgetBuilder/hooks/useDisableTransactionWidget'; import useIsEditingWidget from 'sentry/views/dashboards/widgetBuilder/hooks/useIsEditingWidget'; -import {useSegmentSpanWidgetState} from 'sentry/views/dashboards/widgetBuilder/hooks/useSegmentSpanWidgetState'; +import {useSpanEventWidgetState} from 'sentry/views/dashboards/widgetBuilder/hooks/useSpanEventWidgetState'; import {convertBuilderStateToWidget} from 'sentry/views/dashboards/widgetBuilder/utils/convertBuilderStateToWidget'; import {convertWidgetToBuilderStateParams} from 'sentry/views/dashboards/widgetBuilder/utils/convertWidgetToBuilderStateParams'; import {getTopNConvertedDefaultWidgets} from 'sentry/views/dashboards/widgetLibrary/data'; @@ -92,7 +92,7 @@ function WidgetBuilderSlideout({ const isEditing = useIsEditingWidget(); const source = useDashboardWidgetSource(); const {cacheBuilderState} = useCacheBuilderState(); - const {setSegmentSpanBuilderState} = useSegmentSpanWidgetState(); + const {setSpanEventBuilderState} = useSpanEventWidgetState(); const disableTransactionWidget = useDisableTransactionWidget(); const isTransactionsWidget = state.dataset === WidgetType.TRANSACTIONS; const [showTransactionsDeprecationAlert, setShowTransactionsDeprecationAlert] = @@ -260,7 +260,7 @@ function WidgetBuilderSlideout({ }} onClick={() => { cacheBuilderState(state.dataset ?? WidgetType.ERRORS); - setSegmentSpanBuilderState(); + setSpanEventBuilderState(); }} > {t('spans')} diff --git a/static/app/views/dashboards/widgetBuilder/hooks/useSegmentSpanWidgetState.tsx b/static/app/views/dashboards/widgetBuilder/hooks/useSegmentSpanWidgetState.tsx index bb94304f23810e..86fdc15ed1fa1f 100644 --- a/static/app/views/dashboards/widgetBuilder/hooks/useSegmentSpanWidgetState.tsx +++ b/static/app/views/dashboards/widgetBuilder/hooks/useSegmentSpanWidgetState.tsx @@ -5,10 +5,10 @@ import {useWidgetBuilderContext} from 'sentry/views/dashboards/widgetBuilder/con import {BuilderStateAction} from 'sentry/views/dashboards/widgetBuilder/hooks/useWidgetBuilderState'; import {convertBuilderStateToStateQueryParams} from 'sentry/views/dashboards/widgetBuilder/utils/convertBuilderStateToStateQueryParams'; -export function useSegmentSpanWidgetState() { +export function useSpanEventWidgetState() { const {dispatch, state} = useWidgetBuilderContext(); - const setSegmentSpanBuilderState = useCallback(() => { + const setSpanEventBuilderState = useCallback(() => { const nextDataset = WidgetType.SPANS; const stateParams = convertBuilderStateToStateQueryParams(state); dispatch({ @@ -22,6 +22,6 @@ export function useSegmentSpanWidgetState() { }, [dispatch, state]); return { - setSegmentSpanBuilderState, + setSpanEventBuilderState, }; } From 37a4ad0f6e27cb2a9fd28314c6ee3bf2ea8c939a Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 30 Sep 2025 12:16:03 +0200 Subject: [PATCH 08/21] test --- .../process_segments/test_enrichment.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/sentry/spans/consumers/process_segments/test_enrichment.py b/tests/sentry/spans/consumers/process_segments/test_enrichment.py index 2551be4b388869..de68a0568da9d1 100644 --- a/tests/sentry/spans/consumers/process_segments/test_enrichment.py +++ b/tests/sentry/spans/consumers/process_segments/test_enrichment.py @@ -37,10 +37,10 @@ def test_childless_spans() -> None: ), ] - _, enriched = TreeEnricher.enrich_spans(spans) - enriched = [make_compatible(span) for span in enriched] + _, spans = TreeEnricher.enrich_spans(spans) + spans = [make_compatible(span) for span in spans] - exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in enriched} + exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in spans} assert exclusive_times == { "aaaaaaaaaaaaaaaa": 1123.0, "bbbbbbbbbbbbbbbb": 3000.0, @@ -81,9 +81,9 @@ def test_nested_spans() -> None: ), ] - _, enriched = TreeEnricher.enrich_spans(spans) + _, spans = TreeEnricher.enrich_spans(spans) - exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in enriched} + exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in spans} assert exclusive_times == { "aaaaaaaaaaaaaaaa": 4000.0, "bbbbbbbbbbbbbbbb": 400.0, @@ -124,9 +124,9 @@ def test_overlapping_child_spans() -> None: ), ] - _, enriched = TreeEnricher.enrich_spans(spans) + _, spans = TreeEnricher.enrich_spans(spans) - exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in enriched} + exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in spans} assert exclusive_times == { "aaaaaaaaaaaaaaaa": 4000.0, "bbbbbbbbbbbbbbbb": 400.0, @@ -167,9 +167,9 @@ def test_child_spans_dont_intersect_parent() -> None: ), ] - _, enriched = TreeEnricher.enrich_spans(spans) + _, spans = TreeEnricher.enrich_spans(spans) - exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in enriched} + exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in spans} assert exclusive_times == { "aaaaaaaaaaaaaaaa": 4000.0, "bbbbbbbbbbbbbbbb": 1000.0, @@ -210,9 +210,9 @@ def test_child_spans_extend_beyond_parent() -> None: ), ] - _, enriched = TreeEnricher.enrich_spans(spans) + _, spans = TreeEnricher.enrich_spans(spans) - exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in enriched} + exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in spans} assert exclusive_times == { "aaaaaaaaaaaaaaaa": 4000.0, "bbbbbbbbbbbbbbbb": 200.0, @@ -253,9 +253,9 @@ def test_child_spans_consumes_all_of_parent() -> None: ), ] - _, enriched = TreeEnricher.enrich_spans(spans) + _, spans = TreeEnricher.enrich_spans(spans) - exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in enriched} + exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in spans} assert exclusive_times == { "aaaaaaaaaaaaaaaa": 4000.0, "bbbbbbbbbbbbbbbb": 0.0, @@ -296,9 +296,9 @@ def test_only_immediate_child_spans_affect_calculation() -> None: ), ] - _, enriched = TreeEnricher.enrich_spans(spans) + _, spans = TreeEnricher.enrich_spans(spans) - exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in enriched} + exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in spans} assert exclusive_times == { "aaaaaaaaaaaaaaaa": 4000.0, "bbbbbbbbbbbbbbbb": 600.0, @@ -366,7 +366,7 @@ def test_emit_ops_breakdown() -> None: } # Compute breakdowns for the segment span - _ = TreeEnricher.enrich_spans(spans) + (_,) = TreeEnricher.enrich_spans(spans) updates = compute_breakdowns(spans, breakdowns_config) assert updates["span_ops.ops.http"]["value"] == 3600000.0 From 5bfaab6f8b75072f95f723b761da2ca96ca7ebaa Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 30 Sep 2025 14:10:51 +0200 Subject: [PATCH 09/21] wip: tests --- src/sentry/spans/buffer.py | 2 +- .../consumers/process_segments/convert.py | 13 +++-- .../consumers/process_segments/enrichment.py | 22 ++++----- .../consumers/process_segments/message.py | 1 - .../spans/consumers/process_segments/shim.py | 10 ++-- .../spans/consumers/process_segments/types.py | 21 +++----- .../spans/consumers/process/__init__.py | 28 ++++++----- .../process_segments/test_convert.py | 37 +++++++------- .../process_segments/test_enrichment.py | 49 +++++++++++++------ .../consumers/process_segments/test_shim.py | 11 +++-- 10 files changed, 105 insertions(+), 89 deletions(-) diff --git a/src/sentry/spans/buffer.py b/src/sentry/spans/buffer.py index 27e3d51d8149e1..80d72933eed0b1 100644 --- a/src/sentry/spans/buffer.py +++ b/src/sentry/spans/buffer.py @@ -435,7 +435,7 @@ def flush_segments(self, now: int) -> dict[SegmentKey, FlushedSegment]: span.setdefault("attributes", {})["sentry.segment.id"] = segment_span_id is_segment = segment_span_id == span["span_id"] - span["is_segment"] = is_segment + span.setdefault("attributes", {})["sentry.is_segment"] = is_segment if is_segment: has_root_span = True diff --git a/src/sentry/spans/consumers/process_segments/convert.py b/src/sentry/spans/consumers/process_segments/convert.py index 5d5ba1647ab918..e9ae6f6b078282 100644 --- a/src/sentry/spans/consumers/process_segments/convert.py +++ b/src/sentry/spans/consumers/process_segments/convert.py @@ -1,4 +1,3 @@ -from collections.abc import MutableMapping from typing import Any, cast import orjson @@ -15,7 +14,6 @@ FIELD_TO_ATTRIBUTE = { "end_timestamp": "sentry.end_timestamp_precise", "event_id": "sentry.event_id", - "exclusive_time_ms": "sentry.exclusive_time_ms", "hash": "sentry.hash", "is_remote": "sentry.is_remote", "is_segment": "sentry.is_segment", @@ -28,13 +26,12 @@ RENAME_ATTRIBUTES = { "sentry.description": "sentry.raw_description", - "sentry.duration": "sentry.duration_ms", "sentry.segment.id": "sentry.segment_id", } def convert_span_to_item(span: CompatibleSpan) -> TraceItem: - attributes: MutableMapping[str, AnyValue] = {} # TODO + attributes: dict[str, AnyValue] = {} client_sample_rate = 1.0 server_sample_rate = 1.0 @@ -60,7 +57,7 @@ def convert_span_to_item(span: CompatibleSpan) -> TraceItem: pass for field_name, attribute_name in FIELD_TO_ATTRIBUTE.items(): - field_value = span.get(field_name) + field_value = span.get(field_name) # type:ignore[assignment] if field_value is not None: attributes[attribute_name] = _anyvalue(field_value) @@ -70,6 +67,12 @@ def convert_span_to_item(span: CompatibleSpan) -> TraceItem: if convention_name in attributes: attributes[eap_name] = attributes.pop(convention_name) + # TODO: Move this to Relay + span.setdefault("attributes", {})["sentry.duration_ms"] = { + "type": "double", + "value": 1000 * (span["end_timestamp"] - span["start_timestamp"]), + } + if links := span.get("links"): try: sanitized_links = [_sanitize_span_link(link) for link in links] diff --git a/src/sentry/spans/consumers/process_segments/enrichment.py b/src/sentry/spans/consumers/process_segments/enrichment.py index 362d2e3778b529..83544862c37257 100644 --- a/src/sentry/spans/consumers/process_segments/enrichment.py +++ b/src/sentry/spans/consumers/process_segments/enrichment.py @@ -4,12 +4,7 @@ from sentry_kafka_schemas.schema_types.ingest_spans_v1 import SpanEvent -from sentry.spans.consumers.process_segments.types import ( - Attributes, - EnrichedSpan, - attribute_value, - get_span_op, -) +from sentry.spans.consumers.process_segments.types import Attributes, attribute_value, get_span_op # Keys of shared sentry attributes that are shared across all spans in a segment. This list # is taken from `extract_shared_tags` in Relay. @@ -62,7 +57,7 @@ def _find_segment_span(spans: list[SpanEvent]) -> SpanEvent | None: # Iterate backwards since we usually expect the segment span to be at the end. for span in reversed(spans): - if span.get("is_segment"): + if attribute_value(span, "sentry.is_segment"): return span return None @@ -116,6 +111,11 @@ def _attributes(self, span: SpanEvent) -> dict[str, Any]: if attributes.get(key) is None: attributes[key] = value + attributes["sentry.exclusive_time_ms"] = { + "type": "double", + "value": self._exclusive_time(span), + } + return attributes def _exclusive_time(self, span: SpanEvent) -> float: @@ -146,17 +146,15 @@ def _exclusive_time(self, span: SpanEvent) -> float: return exclusive_time_us / 1_000 - def enrich_span(self, span: SpanEvent) -> EnrichedSpan: - exclusive_time = self._exclusive_time(span) + def enrich_span(self, span: SpanEvent) -> SpanEvent: attributes = self._attributes(span) return { **span, "attributes": attributes, - "exclusive_time_ms": exclusive_time, # FIXME } @classmethod - def enrich_spans(cls, spans: list[SpanEvent]) -> tuple[int | None, list[EnrichedSpan]]: + def enrich_spans(cls, spans: list[SpanEvent]) -> tuple[int | None, list[SpanEvent]]: inst = cls(spans) ret = [] segment_idx = None @@ -192,7 +190,7 @@ def _timestamp_by_op(spans: list[SpanEvent], op: str) -> float | None: return None -def _span_interval(span: SpanEvent | EnrichedSpan) -> tuple[int, int]: +def _span_interval(span: SpanEvent) -> tuple[int, int]: """Get the start and end timestamps of a span in microseconds.""" return _us(span["start_timestamp"]), _us(span["end_timestamp"]) diff --git a/src/sentry/spans/consumers/process_segments/message.py b/src/sentry/spans/consumers/process_segments/message.py index e24b67fc9e9f46..d5d4baa959a56a 100644 --- a/src/sentry/spans/consumers/process_segments/message.py +++ b/src/sentry/spans/consumers/process_segments/message.py @@ -154,7 +154,6 @@ def _create_models(segment: CompatibleSpan, project: Project) -> None: Creates the Environment and Release models, along with the necessary relationships between them and the Project model. """ - environment_name = attribute_value(segment, "sentry.environment") release_name = attribute_value(segment, "sentry.release") dist_name = attribute_value(segment, "sentry.dist") diff --git a/src/sentry/spans/consumers/process_segments/shim.py b/src/sentry/spans/consumers/process_segments/shim.py index 8ce498d020f205..d64e46e78bd8ba 100644 --- a/src/sentry/spans/consumers/process_segments/shim.py +++ b/src/sentry/spans/consumers/process_segments/shim.py @@ -9,22 +9,22 @@ from typing import Any, cast import sentry_sdk +from sentry_kafka_schemas.schema_types.ingest_spans_v1 import SpanEvent from sentry.performance_issues.types import SentryTags as PerformanceIssuesSentryTags from sentry.spans.consumers.process_segments.types import ( CompatibleSpan, - EnrichedSpan, attribute_value, get_span_op, ) from sentry.utils.dates import to_datetime -def make_compatible(span: EnrichedSpan) -> CompatibleSpan: +def make_compatible(span: SpanEvent) -> CompatibleSpan: # Creates attributes for EAP spans that are required by logic shared with the # event pipeline. # - # Spans in the transaction event protocol had a slightly different schema + # Spans in the transaction event protocol had a different schema # compared to raw spans on the EAP topic. This function adds the missing # attributes to the spans to make them compatible with the event pipeline # logic. @@ -32,9 +32,7 @@ def make_compatible(span: EnrichedSpan) -> CompatibleSpan: **span, "sentry_tags": _sentry_tags(span.get("attributes") or {}), "op": get_span_op(span), - # Note: Event protocol spans expect `exclusive_time` while EAP expects - # `exclusive_time_ms`. Both are the same value in milliseconds - "exclusive_time": span["exclusive_time_ms"], # FIXME + "exclusive_time": attribute_value(span, "sentry.exclusive_time_ms"), } return ret diff --git a/src/sentry/spans/consumers/process_segments/types.py b/src/sentry/spans/consumers/process_segments/types.py index e3888904a18b18..f622f57c3f5170 100644 --- a/src/sentry/spans/consumers/process_segments/types.py +++ b/src/sentry/spans/consumers/process_segments/types.py @@ -1,8 +1,9 @@ +from collections.abc import Mapping from typing import Any, NotRequired from sentry_kafka_schemas.schema_types.ingest_spans_v1 import SpanEvent from sentry_kafka_schemas.schema_types.ingest_spans_v1 import ( - _FullStopIngestSpansFullStopV1FullStopSchemaFullStopJsonNumberSignDefinitionsAttributevalue as AttributeValue, + _FileColonFullStopIngestSpansFullStopV1FullStopSchemaFullStopJsonNumberSignDefinitionsAttributevalue as AttributeValue, ) Attributes = dict[str, AttributeValue] @@ -18,16 +19,7 @@ def get_span_op(span: SpanEvent) -> str: return attribute_value(span, "sentry.op") or DEFAULT_SPAN_OP -class EnrichedSpan(SpanEvent, total=True): - """ - Enriched version of the incoming span payload that has additional attributes - extracted from its child spans and/or inherited from its parent span. - """ - - exclusive_time_ms: float - - -class CompatibleSpan(EnrichedSpan, total=True): +class CompatibleSpan(SpanEvent, total=True): """A span that has the same fields as a kafka span, plus shimming for logic shared with the event pipeline. This type will be removed eventually.""" @@ -40,6 +32,7 @@ class CompatibleSpan(EnrichedSpan, total=True): hash: NotRequired[str] -def attribute_value(span: SpanEvent, key: str) -> Any: - attributes: Attributes = span.get("attributes") or {} - return (attributes.get(key) or {}).get("value") +def attribute_value(span: Mapping[str, Any], key: str) -> Any: + attributes = span.get("attributes") or {} + attr: dict[str, Any] = attributes.get(key) or {} + return attr.get("value") diff --git a/tests/sentry/spans/consumers/process/__init__.py b/tests/sentry/spans/consumers/process/__init__.py index 0f86d7450adb53..0c5dbc7344f8d9 100644 --- a/tests/sentry/spans/consumers/process/__init__.py +++ b/tests/sentry/spans/consumers/process/__init__.py @@ -1,31 +1,35 @@ +from sentry_kafka_schemas.schema_types.ingest_spans_v1 import SpanEvent + + def build_mock_span(project_id, *, span_op=None, is_segment=False, attributes=None, **kwargs): - span = { - "description": "OrganizationNPlusOne", - "duration_ms": 107, - "is_segment": is_segment, + span: SpanEvent = { "is_remote": is_segment, "parent_span_id": None, - "profile_id": "dbae2b82559649a1a34a2878134a007b", "project_id": project_id, "organization_id": 1, "received": 1707953019.044972, "retention_days": 90, "attributes": { - "sentry.environment": {"value": "development"}, + "sentry.is_segment": {"value": is_segment, "type": "boolean"}, + "sentry.duration": {"value": 0.107, "type": "double"}, + "sentry.environment": {"value": "development", "type": "string"}, "sentry.release": { - "value": "backend@24.2.0.dev0+699ce0cd1281cc3c7275d0a474a595375c769ae8" + "value": "backend@24.2.0.dev0+699ce0cd1281cc3c7275d0a474a595375c769ae8", + "type": "string", }, - "sentry.platform": {"value": "python"}, - "sentry.op": {"value": span_op or "base.dispatch.sleep"}, - "sentry.segment.id": "a49b42af9fb69da0", + "sentry.platform": {"value": "python", "type": "string"}, + "sentry.op": {"value": span_op or "base.dispatch.sleep", "type": "string"}, + "sentry.segment.id": {"value": "a49b42af9fb69da0", "type": "string"}, + "sentry.profile_id": {"type": "string", "value": "dbae2b82559649a1a34a2878134a007b"}, **(attributes or {}), }, "span_id": "a49b42af9fb69da0", - "start_timestamp_ms": 1707953018865, "start_timestamp": 1707953018.865, "end_timestamp": 1707953018.972, "trace_id": "94576097f3a64b68b85a59c7d4e3ee2a", + "name": "OrganizationNPlusOne", + "status": "ok", } - span.update(**kwargs) + span.update(**kwargs) # type:ignore[call-arg] return span diff --git a/tests/sentry/spans/consumers/process_segments/test_convert.py b/tests/sentry/spans/consumers/process_segments/test_convert.py index 3cda4fae68a3c3..9e1a7876441759 100644 --- a/tests/sentry/spans/consumers/process_segments/test_convert.py +++ b/tests/sentry/spans/consumers/process_segments/test_convert.py @@ -1,6 +1,7 @@ from typing import cast from google.protobuf.timestamp_pb2 import Timestamp +from sentry_kafka_schemas.schema_types.ingest_spans_v1 import SpanEvent from sentry_protos.snuba.v1.request_common_pb2 import TraceItemType from sentry_protos.snuba.v1.trace_item_pb2 import AnyValue @@ -11,11 +12,8 @@ # Test ported from Snuba's `eap_items_span`. # ############################################### -SPAN_KAFKA_MESSAGE = { - "description": "/api/0/relays/projectconfigs/", - "duration_ms": 152, - "exclusive_time_ms": 0.228, - "is_segment": True, +SPAN_KAFKA_MESSAGE: SpanEvent = { + "is_remote": True, "attributes": { "http.status_code": {"value": "200", "type": "string"}, "my.array.field": {"value": [1, 2, ["nested", "array"]], "type": "array"}, @@ -36,16 +34,21 @@ "relay_use_post_or_schedule_rejected": {"value": "version", "type": "string"}, "sentry.category": {"value": "http", "type": "string"}, "sentry.client_sample_rate": {"value": 0.1, "type": "string"}, + "sentry.description": {"value": "/api/0/relays/projectconfigs/", "type": "string"}, "sentry.environment": {"value": "development", "type": "string"}, + "sentry.is_segment": {"value": True, "type": "boolean"}, "sentry.normalized_description": {"value": "normalized_description", "type": "string"}, "sentry.op": {"value": "http.server", "type": "string"}, + "sentry.origin": {"value": "auto.http.django", "type": "string"}, "sentry.platform": {"value": "python", "type": "string"}, + "sentry.profile_id": {"value": "56c7d1401ea14ad7b4ac86de46baebae", "type": "string"}, "sentry.release": { "value": "backend@24.7.0.dev0+c45b49caed1e5fcbf70097ab3f434b487c359b6b", "type": "string", }, "sentry.sdk.name": {"value": "sentry.python.django", "type": "string"}, "sentry.sdk.version": {"value": "2.7.0", "type": "string"}, + "sentry.segment.id": {"type": "string", "value": "8873a98879faf06d"}, "sentry.segment.name": {"value": "/api/0/relays/projectconfigs/", "type": "string"}, "sentry.server_sample_rate": {"value": 0.2, "type": "string"}, "sentry.status": {"value": "ok", "type": "string"}, @@ -62,25 +65,21 @@ "thread.id": {"value": "8522009600", "type": "string"}, "thread.name": {"value": "uWSGIWorker1Core0", "type": "string"}, }, - "sentry_tags": {"ignored": "tags"}, - "profile_id": "56c7d1401ea14ad7b4ac86de46baebae", "organization_id": 1, - "origin": "auto.http.django", "project_id": 1, "received": 1721319572.877828, "retention_days": 90, - "segment_id": "8873a98879faf06d", "span_id": "8873a98879faf06d", "trace_id": "d099bf9ad5a143cf8f83a98081d0ed3b", - "start_timestamp_ms": 1721319572616, "start_timestamp": 1721319572.616648, "end_timestamp": 1721319572.768806, + "name": "endpoint", + "status": "ok", } def test_convert_span_to_item() -> None: - # Cast since the above payload does not conform to the strict schema - item = convert_span_to_item(cast(CompatibleSpan, SPAN_KAFKA_MESSAGE)) + item = convert_span_to_item(SPAN_KAFKA_MESSAGE) assert item.organization_id == 1 assert item.project_id == 1 @@ -119,7 +118,6 @@ def test_convert_span_to_item() -> None: "sentry.duration_ms": AnyValue(int_value=152), "sentry.end_timestamp": AnyValue(double_value=1721319572.768806), "sentry.environment": AnyValue(string_value="development"), - "sentry.exclusive_time_ms": AnyValue(double_value=0.228), "sentry.is_segment": AnyValue(bool_value=True), "sentry.normalized_description": AnyValue(string_value="normalized_description"), "sentry.op": AnyValue(string_value="http.server"), @@ -154,7 +152,8 @@ def test_convert_span_to_item() -> None: def test_convert_falsy_fields() -> None: - message = {**SPAN_KAFKA_MESSAGE, "duration_ms": 0, "is_segment": False} + message: SpanEvent = {**SPAN_KAFKA_MESSAGE} + message["attributes"]["sentry.is_segment"] = {"type": "boolean", "value": False} item = convert_span_to_item(cast(CompatibleSpan, message)) @@ -163,7 +162,7 @@ def test_convert_falsy_fields() -> None: def test_convert_span_links_to_json() -> None: - message = { + message: SpanEvent = { **SPAN_KAFKA_MESSAGE, "links": [ # A link with all properties @@ -172,10 +171,10 @@ def test_convert_span_links_to_json() -> None: "span_id": "8873a98879faf06d", "sampled": True, "attributes": { - "sentry.link.type": "parent", - "sentry.dropped_attributes_count": 2, - "parent_depth": 17, - "confidence": "high", + "sentry.link.type": {"type": "string", "value": "parent"}, + "sentry.dropped_attributes_count": {"type": "integer", "value": 2}, + "parent_depth": {"type": "integer", "value": 17}, + "confidence": {"type": "string", "value": "high"}, }, }, # A link with missing optional properties diff --git a/tests/sentry/spans/consumers/process_segments/test_enrichment.py b/tests/sentry/spans/consumers/process_segments/test_enrichment.py index de68a0568da9d1..99cc07a5922bc2 100644 --- a/tests/sentry/spans/consumers/process_segments/test_enrichment.py +++ b/tests/sentry/spans/consumers/process_segments/test_enrichment.py @@ -1,4 +1,10 @@ -from sentry.spans.consumers.process_segments.enrichment import TreeEnricher, compute_breakdowns +from sentry_kafka_schemas.schema_types.ingest_spans_v1 import SpanEvent + +from sentry.spans.consumers.process_segments.enrichment import ( + TreeEnricher, + attribute_value, + compute_breakdowns, +) from sentry.spans.consumers.process_segments.shim import make_compatible from tests.sentry.spans.consumers.process import build_mock_span @@ -40,7 +46,9 @@ def test_childless_spans() -> None: _, spans = TreeEnricher.enrich_spans(spans) spans = [make_compatible(span) for span in spans] - exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in spans} + exclusive_times = { + span["span_id"]: attribute_value(span, "sentry.exclusive_time_ms") for span in spans + } assert exclusive_times == { "aaaaaaaaaaaaaaaa": 1123.0, "bbbbbbbbbbbbbbbb": 3000.0, @@ -83,7 +91,9 @@ def test_nested_spans() -> None: _, spans = TreeEnricher.enrich_spans(spans) - exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in spans} + exclusive_times = { + span["span_id"]: attribute_value(span, "sentry.exclusive_time_ms") for span in spans + } assert exclusive_times == { "aaaaaaaaaaaaaaaa": 4000.0, "bbbbbbbbbbbbbbbb": 400.0, @@ -126,7 +136,9 @@ def test_overlapping_child_spans() -> None: _, spans = TreeEnricher.enrich_spans(spans) - exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in spans} + exclusive_times = { + span["span_id"]: attribute_value(span, "sentry.exclusive_time_ms") for span in spans + } assert exclusive_times == { "aaaaaaaaaaaaaaaa": 4000.0, "bbbbbbbbbbbbbbbb": 400.0, @@ -169,7 +181,9 @@ def test_child_spans_dont_intersect_parent() -> None: _, spans = TreeEnricher.enrich_spans(spans) - exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in spans} + exclusive_times = { + span["span_id"]: attribute_value(span, "sentry.exclusive_time_ms") for span in spans + } assert exclusive_times == { "aaaaaaaaaaaaaaaa": 4000.0, "bbbbbbbbbbbbbbbb": 1000.0, @@ -212,7 +226,9 @@ def test_child_spans_extend_beyond_parent() -> None: _, spans = TreeEnricher.enrich_spans(spans) - exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in spans} + exclusive_times = { + span["span_id"]: attribute_value(span, "sentry.exclusive_time_ms") for span in spans + } assert exclusive_times == { "aaaaaaaaaaaaaaaa": 4000.0, "bbbbbbbbbbbbbbbb": 200.0, @@ -255,7 +271,9 @@ def test_child_spans_consumes_all_of_parent() -> None: _, spans = TreeEnricher.enrich_spans(spans) - exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in spans} + exclusive_times = { + span["span_id"]: attribute_value(span, "sentry.exclusive_time_ms") for span in spans + } assert exclusive_times == { "aaaaaaaaaaaaaaaa": 4000.0, "bbbbbbbbbbbbbbbb": 0.0, @@ -298,7 +316,9 @@ def test_only_immediate_child_spans_affect_calculation() -> None: _, spans = TreeEnricher.enrich_spans(spans) - exclusive_times = {span["span_id"]: span["exclusive_time_ms"] for span in spans} + exclusive_times = { + span["span_id"]: attribute_value(span, "sentry.exclusive_time_ms") for span in spans + } assert exclusive_times == { "aaaaaaaaaaaaaaaa": 4000.0, "bbbbbbbbbbbbbbbb": 600.0, @@ -366,7 +386,7 @@ def test_emit_ops_breakdown() -> None: } # Compute breakdowns for the segment span - (_,) = TreeEnricher.enrich_spans(spans) + _, spans = TreeEnricher.enrich_spans(spans) updates = compute_breakdowns(spans, breakdowns_config) assert updates["span_ops.ops.http"]["value"] == 3600000.0 @@ -427,12 +447,9 @@ def test_write_tags_for_performance_issue_detection(): } -def _mock_performance_issue_span(is_segment, attributes, **fields): +def _mock_performance_issue_span(is_segment, attributes, **fields) -> SpanEvent: return { - "description": "OrganizationNPlusOne", "duration_ms": 107, - "is_segment": is_segment, - "is_remote": is_segment, "parent_span_id": None, "profile_id": "dbae2b82559649a1a34a2878134a007b", "project_id": 1, @@ -440,7 +457,11 @@ def _mock_performance_issue_span(is_segment, attributes, **fields): "received": 1707953019.044972, "retention_days": 90, "segment_id": "a49b42af9fb69da0", - "attributes": attributes, + "attributes": { + **attributes, + "sentry.is_segment": {"type": "boolean", "value": is_segment}, + "sentry.description": {"type": "string", "value": "OrganizationNPlusOne"}, + }, "span_id": "a49b42af9fb69da0", "start_timestamp_ms": 1707953018865, "start_timestamp": 1707953018.865, diff --git a/tests/sentry/spans/consumers/process_segments/test_shim.py b/tests/sentry/spans/consumers/process_segments/test_shim.py index a65b65cf979b0a..c6f521ca384209 100644 --- a/tests/sentry/spans/consumers/process_segments/test_shim.py +++ b/tests/sentry/spans/consumers/process_segments/test_shim.py @@ -1,14 +1,15 @@ -from typing import cast - from sentry.spans.consumers.process_segments.shim import make_compatible -from sentry.spans.consumers.process_segments.types import EnrichedSpan from tests.sentry.spans.consumers.process_segments.test_convert import SPAN_KAFKA_MESSAGE def test_make_compatible(): - message = cast(EnrichedSpan, {**SPAN_KAFKA_MESSAGE, "sentry_tags": {"ignored": "tags"}}) + message = {**SPAN_KAFKA_MESSAGE} + message["attributes"] = { + "sentry.exclusive_time_ms": {"type": "float", "value": 100.0}, + **message["attributes"], + } compatible = make_compatible(message) - assert compatible["exclusive_time"] == message["exclusive_time_ms"] + assert compatible["exclusive_time"] == 100.0 assert compatible["op"] == message["attributes"]["sentry.op"]["value"] # Pre-existing tags got overwritten: From 2feac9f3f6ff4885902e2010648fcb906653352d Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 30 Sep 2025 15:14:50 +0200 Subject: [PATCH 10/21] tests pass --- .../spans/consumers/process_segments/convert.py | 16 +++++++++------- src/sentry/spans/grouping/strategy/base.py | 4 ++-- .../spans/consumers/process/test_consumer.py | 6 ++++-- .../consumers/process_segments/test_convert.py | 8 ++++---- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/src/sentry/spans/consumers/process_segments/convert.py b/src/sentry/spans/consumers/process_segments/convert.py index e9ae6f6b078282..cce4b3e4c89d3a 100644 --- a/src/sentry/spans/consumers/process_segments/convert.py +++ b/src/sentry/spans/consumers/process_segments/convert.py @@ -47,12 +47,12 @@ def convert_span_to_item(span: CompatibleSpan) -> TraceItem: else: if k == "sentry.client_sample_rate": try: - client_sample_rate = float(value) + client_sample_rate = float(value) # type:ignore[arg-type] except ValueError: pass elif k == "sentry.server_sample_rate": try: - server_sample_rate = float(value) + server_sample_rate = float(value) # type:ignore[arg-type] except ValueError: pass @@ -67,11 +67,13 @@ def convert_span_to_item(span: CompatibleSpan) -> TraceItem: if convention_name in attributes: attributes[eap_name] = attributes.pop(convention_name) - # TODO: Move this to Relay - span.setdefault("attributes", {})["sentry.duration_ms"] = { - "type": "double", - "value": 1000 * (span["end_timestamp"] - span["start_timestamp"]), - } + try: + # TODO: Move this to Relay + attributes["sentry.duration_ms"] = AnyValue( + int_value=int(1000 * (span["end_timestamp"] - span["start_timestamp"])) + ) + except Exception: + sentry_sdk.capture_exception() if links := span.get("links"): try: diff --git a/src/sentry/spans/grouping/strategy/base.py b/src/sentry/spans/grouping/strategy/base.py index 990b18890f5f63..cf1ae185faef56 100644 --- a/src/sentry/spans/grouping/strategy/base.py +++ b/src/sentry/spans/grouping/strategy/base.py @@ -120,11 +120,11 @@ def raw_description_strategy(span: Span) -> Sequence[str]: strategy is only effective if the span description is a fixed string. Otherwise, this strategy will produce a large number of span groups. """ - return [raw_description(span) or ""] + return [raw_description(span)] def raw_description(span: Span) -> str: - return span.get("description") or attribute_value(span, "sentry.description") + return span.get("description") or attribute_value(span, "sentry.description") or "" IN_CONDITION_PATTERN = re.compile(r" IN \(%s(\s*,\s*%s)*\)") diff --git a/tests/sentry/spans/consumers/process/test_consumer.py b/tests/sentry/spans/consumers/process/test_consumer.py index f8298ff9c3c55b..95beb16e82edf8 100644 --- a/tests/sentry/spans/consumers/process/test_consumer.py +++ b/tests/sentry/spans/consumers/process/test_consumer.py @@ -76,9 +76,11 @@ def add_commit(offsets, force=False): assert orjson.loads(msg.value) == { "spans": [ { - "is_segment": True, + "attributes": { + "sentry.is_segment": True, + "sentry.segment.id": "aaaaaaaaaaaaaaaa", + }, "project_id": 12, - "segment_id": "aaaaaaaaaaaaaaaa", "span_id": "aaaaaaaaaaaaaaaa", "trace_id": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", "end_timestamp": 1700000000.0, diff --git a/tests/sentry/spans/consumers/process_segments/test_convert.py b/tests/sentry/spans/consumers/process_segments/test_convert.py index 9e1a7876441759..d048429cff28ed 100644 --- a/tests/sentry/spans/consumers/process_segments/test_convert.py +++ b/tests/sentry/spans/consumers/process_segments/test_convert.py @@ -116,8 +116,9 @@ def test_convert_span_to_item() -> None: "sentry.category": AnyValue(string_value="http"), "sentry.client_sample_rate": AnyValue(double_value=0.1), "sentry.duration_ms": AnyValue(int_value=152), - "sentry.end_timestamp": AnyValue(double_value=1721319572.768806), + "sentry.end_timestamp_precise": AnyValue(double_value=1721319572.768806), "sentry.environment": AnyValue(string_value="development"), + "sentry.is_remote": AnyValue(bool_value=True), "sentry.is_segment": AnyValue(bool_value=True), "sentry.normalized_description": AnyValue(string_value="normalized_description"), "sentry.op": AnyValue(string_value="http.server"), @@ -134,7 +135,7 @@ def test_convert_span_to_item() -> None: "sentry.segment_id": AnyValue(string_value="8873a98879faf06d"), "sentry.segment.name": AnyValue(string_value="/api/0/relays/projectconfigs/"), "sentry.server_sample_rate": AnyValue(double_value=0.2), - "sentry.start_timestamp": AnyValue(double_value=1721319572.616648), + "sentry.start_timestamp_precise": AnyValue(double_value=1721319572.616648), "sentry.status_code": AnyValue(string_value="200"), "sentry.status": AnyValue(string_value="ok"), "sentry.thread.id": AnyValue(string_value="8522009600"), @@ -157,7 +158,6 @@ def test_convert_falsy_fields() -> None: item = convert_span_to_item(cast(CompatibleSpan, message)) - assert item.attributes.get("sentry.duration_ms") == AnyValue(int_value=0) assert item.attributes.get("sentry.is_segment") == AnyValue(bool_value=False) @@ -188,5 +188,5 @@ def test_convert_span_links_to_json() -> None: item = convert_span_to_item(cast(CompatibleSpan, message)) assert item.attributes.get("sentry.links") == AnyValue( - string_value='[{"trace_id":"d099bf9ad5a143cf8f83a98081d0ed3b","span_id":"8873a98879faf06d","sampled":true,"attributes":{"sentry.link.type":"parent","sentry.dropped_attributes_count":4}},{"trace_id":"d099bf9ad5a143cf8f83a98081d0ed3b","span_id":"873a988879faf06d"}]' + string_value='[{"trace_id":"d099bf9ad5a143cf8f83a98081d0ed3b","span_id":"8873a98879faf06d","sampled":true,"attributes":{"sentry.link.type":{"type":"string","value":"parent"},"sentry.dropped_attributes_count":{"type":"integer","value":4}}},{"trace_id":"d099bf9ad5a143cf8f83a98081d0ed3b","span_id":"873a988879faf06d"}]' ) From a97ec98f13cf6b30289ed797a2824fbfdd734a41 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 30 Sep 2025 15:24:43 +0200 Subject: [PATCH 11/21] Revert js changes --- .../dashboards/widgetBuilder/components/datasetSelector.tsx | 6 +++--- .../widgetBuilder/components/widgetBuilderSlideout.tsx | 6 +++--- .../widgetBuilder/hooks/useSegmentSpanWidgetState.tsx | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/static/app/views/dashboards/widgetBuilder/components/datasetSelector.tsx b/static/app/views/dashboards/widgetBuilder/components/datasetSelector.tsx index c6a973a0c77e97..edbb2c74a68836 100644 --- a/static/app/views/dashboards/widgetBuilder/components/datasetSelector.tsx +++ b/static/app/views/dashboards/widgetBuilder/components/datasetSelector.tsx @@ -21,7 +21,7 @@ import {useWidgetBuilderContext} from 'sentry/views/dashboards/widgetBuilder/con import {useCacheBuilderState} from 'sentry/views/dashboards/widgetBuilder/hooks/useCacheBuilderState'; import useDashboardWidgetSource from 'sentry/views/dashboards/widgetBuilder/hooks/useDashboardWidgetSource'; import useIsEditingWidget from 'sentry/views/dashboards/widgetBuilder/hooks/useIsEditingWidget'; -import {useSpanEventWidgetState} from 'sentry/views/dashboards/widgetBuilder/hooks/useSpanEventWidgetState'; +import {useSegmentSpanWidgetState} from 'sentry/views/dashboards/widgetBuilder/hooks/useSegmentSpanWidgetState'; import {isLogsEnabled} from 'sentry/views/explore/logs/isLogsEnabled'; function WidgetBuilderDatasetSelector() { @@ -31,7 +31,7 @@ function WidgetBuilderDatasetSelector() { const source = useDashboardWidgetSource(); const isEditing = useIsEditingWidget(); const {cacheBuilderState, restoreOrSetBuilderState} = useCacheBuilderState(); - const {setSpanEventBuilderState} = useSpanEventWidgetState(); + const {setSegmentSpanBuilderState} = useSegmentSpanWidgetState(); const disabledChoices: RadioGroupProps['disabledChoices'] = []; const datasetChoices: Array> = []; @@ -56,7 +56,7 @@ function WidgetBuilderDatasetSelector() { }} onClick={() => { cacheBuilderState(state.dataset ?? WidgetType.ERRORS); - setSpanEventBuilderState(); + setSegmentSpanBuilderState(); }} > {t('spans')} diff --git a/static/app/views/dashboards/widgetBuilder/components/widgetBuilderSlideout.tsx b/static/app/views/dashboards/widgetBuilder/components/widgetBuilderSlideout.tsx index 6f67dfa1be18a3..cae3e04b965f29 100644 --- a/static/app/views/dashboards/widgetBuilder/components/widgetBuilderSlideout.tsx +++ b/static/app/views/dashboards/widgetBuilder/components/widgetBuilderSlideout.tsx @@ -48,7 +48,7 @@ import {useCacheBuilderState} from 'sentry/views/dashboards/widgetBuilder/hooks/ import useDashboardWidgetSource from 'sentry/views/dashboards/widgetBuilder/hooks/useDashboardWidgetSource'; import {useDisableTransactionWidget} from 'sentry/views/dashboards/widgetBuilder/hooks/useDisableTransactionWidget'; import useIsEditingWidget from 'sentry/views/dashboards/widgetBuilder/hooks/useIsEditingWidget'; -import {useSpanEventWidgetState} from 'sentry/views/dashboards/widgetBuilder/hooks/useSpanEventWidgetState'; +import {useSegmentSpanWidgetState} from 'sentry/views/dashboards/widgetBuilder/hooks/useSegmentSpanWidgetState'; import {convertBuilderStateToWidget} from 'sentry/views/dashboards/widgetBuilder/utils/convertBuilderStateToWidget'; import {convertWidgetToBuilderStateParams} from 'sentry/views/dashboards/widgetBuilder/utils/convertWidgetToBuilderStateParams'; import {getTopNConvertedDefaultWidgets} from 'sentry/views/dashboards/widgetLibrary/data'; @@ -92,7 +92,7 @@ function WidgetBuilderSlideout({ const isEditing = useIsEditingWidget(); const source = useDashboardWidgetSource(); const {cacheBuilderState} = useCacheBuilderState(); - const {setSpanEventBuilderState} = useSpanEventWidgetState(); + const {setSegmentSpanBuilderState} = useSegmentSpanWidgetState(); const disableTransactionWidget = useDisableTransactionWidget(); const isTransactionsWidget = state.dataset === WidgetType.TRANSACTIONS; const [showTransactionsDeprecationAlert, setShowTransactionsDeprecationAlert] = @@ -260,7 +260,7 @@ function WidgetBuilderSlideout({ }} onClick={() => { cacheBuilderState(state.dataset ?? WidgetType.ERRORS); - setSpanEventBuilderState(); + setSegmentSpanBuilderState(); }} > {t('spans')} diff --git a/static/app/views/dashboards/widgetBuilder/hooks/useSegmentSpanWidgetState.tsx b/static/app/views/dashboards/widgetBuilder/hooks/useSegmentSpanWidgetState.tsx index 86fdc15ed1fa1f..bb94304f23810e 100644 --- a/static/app/views/dashboards/widgetBuilder/hooks/useSegmentSpanWidgetState.tsx +++ b/static/app/views/dashboards/widgetBuilder/hooks/useSegmentSpanWidgetState.tsx @@ -5,10 +5,10 @@ import {useWidgetBuilderContext} from 'sentry/views/dashboards/widgetBuilder/con import {BuilderStateAction} from 'sentry/views/dashboards/widgetBuilder/hooks/useWidgetBuilderState'; import {convertBuilderStateToStateQueryParams} from 'sentry/views/dashboards/widgetBuilder/utils/convertBuilderStateToStateQueryParams'; -export function useSpanEventWidgetState() { +export function useSegmentSpanWidgetState() { const {dispatch, state} = useWidgetBuilderContext(); - const setSpanEventBuilderState = useCallback(() => { + const setSegmentSpanBuilderState = useCallback(() => { const nextDataset = WidgetType.SPANS; const stateParams = convertBuilderStateToStateQueryParams(state); dispatch({ @@ -22,6 +22,6 @@ export function useSpanEventWidgetState() { }, [dispatch, state]); return { - setSpanEventBuilderState, + setSegmentSpanBuilderState, }; } From e15c5ee083ea95e7f2150e7cba71deae3d119092 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 30 Sep 2025 15:28:29 +0200 Subject: [PATCH 12/21] Apply suggestion from @jjbayer --- src/sentry/insights/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/insights/__init__.py b/src/sentry/insights/__init__.py index ebe0b0aadc3c04..634fe190cf1c43 100644 --- a/src/sentry/insights/__init__.py +++ b/src/sentry/insights/__init__.py @@ -27,7 +27,7 @@ def from_span_v1(cls, span: dict[str, Any]) -> "FilterSpan": @classmethod def from_span_attributes(cls, attributes: dict[str, Any]) -> "FilterSpan": - """Get relevant fields from `span.data`.""" + """Get relevant fields from `span.attributes`.""" return cls( op=(attributes.get("sentry.op") or {}).get("value"), category=(attributes.get("sentry.category") or {}).get("value"), From 1c81672e0f10c8dc893ea144735972a025826827 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 1 Oct 2025 10:18:55 +0200 Subject: [PATCH 13/21] fix --- src/sentry/spans/buffer.py | 10 ++++++++-- .../spans/consumers/process_segments/enrichment.py | 6 ++++-- tests/sentry/spans/test_buffer.py | 6 ++++-- 3 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/sentry/spans/buffer.py b/src/sentry/spans/buffer.py index 80d72933eed0b1..d6048b1853fe92 100644 --- a/src/sentry/spans/buffer.py +++ b/src/sentry/spans/buffer.py @@ -432,10 +432,16 @@ def flush_segments(self, now: int) -> dict[SegmentKey, FlushedSegment]: span = orjson.loads(payload) if not attribute_value(span, "sentry.segment.id"): - span.setdefault("attributes", {})["sentry.segment.id"] = segment_span_id + span.setdefault("attributes", {})["sentry.segment.id"] = { + "type": "string", + "value": segment_span_id, + } is_segment = segment_span_id == span["span_id"] - span.setdefault("attributes", {})["sentry.is_segment"] = is_segment + span.setdefault("attributes", {})["sentry.is_segment"] = { + "type": "boolean", + "value": is_segment, + } if is_segment: has_root_span = True diff --git a/src/sentry/spans/consumers/process_segments/enrichment.py b/src/sentry/spans/consumers/process_segments/enrichment.py index 83544862c37257..a0009fd86bcc03 100644 --- a/src/sentry/spans/consumers/process_segments/enrichment.py +++ b/src/sentry/spans/consumers/process_segments/enrichment.py @@ -87,14 +87,16 @@ def _attributes(self, span: SpanEvent) -> dict[str, Any]: segment_attrs = self._segment_span.get("attributes", {}) shared_attrs = {k: v for k, v in segment_attrs.items() if k in SHARED_SENTRY_ATTRIBUTES} - is_mobile = attribute_value(span, "sentry.mobile") == "true" + is_mobile = attribute_value(self._segment_span, "sentry.mobile") == "true" mobile_start_type = _get_mobile_start_type(self._segment_span) if is_mobile: # NOTE: Like in Relay's implementation, shared tags are added at the # very end. This does not have access to the shared tag value. We # keep behavior consistent, although this should be revisited. - if attributes.get("sentry.thread.name") == MOBILE_MAIN_THREAD_NAME: + if (attributes.get("sentry.thread.name") or {}).get( + "value" + ) == MOBILE_MAIN_THREAD_NAME: attributes["sentry.main_thread"] = {"type": "string", "value": "true"} if not attributes.get("sentry.app_start_type") and mobile_start_type: attributes["sentry.app_start_type"] = { diff --git a/tests/sentry/spans/test_buffer.py b/tests/sentry/spans/test_buffer.py index 91f76f66711b6b..e55463733a6d92 100644 --- a/tests/sentry/spans/test_buffer.py +++ b/tests/sentry/spans/test_buffer.py @@ -46,8 +46,10 @@ def _output_segment(span_id: bytes, segment_id: bytes, is_segment: bool) -> Outp return OutputSpan( payload={ "span_id": span_id.decode("ascii"), - "segment_id": segment_id.decode("ascii"), - "is_segment": is_segment, + "attributes": { + "sentry.segment.id": {"type": "string", "value": segment_id.decode("ascii")}, + "sentry.is_segment": {"type": "boolean", "value": is_segment}, + }, } ) From 41c2ae9aa0bfdc80cf8067516333b69e068bf35a Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 1 Oct 2025 11:18:50 +0200 Subject: [PATCH 14/21] kafka version and sentry.name --- pyproject.toml | 2 +- src/sentry/spans/consumers/process_segments/convert.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 565ff1b31d76fd..07e1e316bce692 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,7 +77,7 @@ dependencies = [ # [end] jsonschema format validators "sentry-arroyo>=2.25.5", "sentry-forked-email-reply-parser>=0.5.12.post1", - "sentry-kafka-schemas>=2.1.3", + "sentry-kafka-schemas>=2.1.4", "sentry-ophio>=1.1.3", "sentry-protos>=0.4.0", "sentry-redis-tools>=0.5.0", diff --git a/src/sentry/spans/consumers/process_segments/convert.py b/src/sentry/spans/consumers/process_segments/convert.py index cce4b3e4c89d3a..703ee517357fc0 100644 --- a/src/sentry/spans/consumers/process_segments/convert.py +++ b/src/sentry/spans/consumers/process_segments/convert.py @@ -18,6 +18,7 @@ "is_remote": "sentry.is_remote", "is_segment": "sentry.is_segment", "kind": "sentry.kind", + "name": "sentry.name", "origin": "sentry.origin", "parent_span_id": "sentry.parent_span_id", "received": "sentry.received", From 75baa25c638c150c9cdf4421e6b66d12dfff75bb Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Wed, 1 Oct 2025 11:37:47 +0200 Subject: [PATCH 15/21] mock segment --- src/sentry/spans/consumers/process_segments/shim.py | 1 + src/sentry/spans/consumers/process_segments/types.py | 1 + 2 files changed, 2 insertions(+) diff --git a/src/sentry/spans/consumers/process_segments/shim.py b/src/sentry/spans/consumers/process_segments/shim.py index d64e46e78bd8ba..113f303d7344fd 100644 --- a/src/sentry/spans/consumers/process_segments/shim.py +++ b/src/sentry/spans/consumers/process_segments/shim.py @@ -33,6 +33,7 @@ def make_compatible(span: SpanEvent) -> CompatibleSpan: "sentry_tags": _sentry_tags(span.get("attributes") or {}), "op": get_span_op(span), "exclusive_time": attribute_value(span, "sentry.exclusive_time_ms"), + "is_segment": bool(attribute_value(span, "sentry.is_segment")), } return ret diff --git a/src/sentry/spans/consumers/process_segments/types.py b/src/sentry/spans/consumers/process_segments/types.py index f622f57c3f5170..9673fab2cfb96d 100644 --- a/src/sentry/spans/consumers/process_segments/types.py +++ b/src/sentry/spans/consumers/process_segments/types.py @@ -27,6 +27,7 @@ class CompatibleSpan(SpanEvent, total=True): exclusive_time: float op: str sentry_tags: dict[str, str] + is_segment: bool # Added by `SpanGroupingResults.write_to_spans` in `_enrich_spans` hash: NotRequired[str] From 8867dfb2c89a7f336663f5762e36a8517dc89bcf Mon Sep 17 00:00:00 2001 From: "getsantry[bot]" <66042841+getsantry[bot]@users.noreply.github.com> Date: Thu, 2 Oct 2025 04:32:37 +0000 Subject: [PATCH 16/21] :snowflake: re-freeze requirements --- uv.lock | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/uv.lock b/uv.lock index 3d6b38fb8acf91..bbb9586ed32d98 100644 --- a/uv.lock +++ b/uv.lock @@ -2069,11 +2069,11 @@ requires-dist = [ { name = "rfc3986-validator", specifier = ">=0.1.1" }, { name = "sentry-arroyo", specifier = ">=2.25.5" }, { name = "sentry-forked-email-reply-parser", specifier = ">=0.5.12.post1" }, - { name = "sentry-kafka-schemas", specifier = ">=2.1.3" }, + { name = "sentry-kafka-schemas", specifier = ">=2.1.4" }, { name = "sentry-ophio", specifier = ">=1.1.3" }, { name = "sentry-protos", specifier = ">=0.4.0" }, { name = "sentry-redis-tools", specifier = ">=0.5.0" }, - { name = "sentry-relay", specifier = ">=0.9.15" }, + { name = "sentry-relay", specifier = ">=0.9.16" }, { name = "sentry-sdk", extras = ["http2"], specifier = ">=2.35.1" }, { name = "sentry-usage-accountant", specifier = ">=0.0.10" }, { name = "setuptools", specifier = ">=70.0.0" }, @@ -2240,7 +2240,7 @@ wheels = [ [[package]] name = "sentry-kafka-schemas" -version = "2.1.3" +version = "2.1.4" source = { registry = "https://pypi.devinfra.sentry.io/simple" } dependencies = [ { name = "fastjsonschema", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, @@ -2251,7 +2251,7 @@ dependencies = [ { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, ] wheels = [ - { url = "https://pypi.devinfra.sentry.io/wheels/sentry_kafka_schemas-2.1.3-py2.py3-none-any.whl", hash = "sha256:bf294c727d66fef81d24602600495933dccdefa625430f7938f99b9a252e5fbb" }, + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_kafka_schemas-2.1.4-py2.py3-none-any.whl", hash = "sha256:37de4a4f046fb89f959696356c1fa4cad4b487d6a462468ef1aeb55afcacd856" }, ] [[package]] @@ -2291,16 +2291,16 @@ wheels = [ [[package]] name = "sentry-relay" -version = "0.9.15" +version = "0.9.16" source = { registry = "https://pypi.devinfra.sentry.io/simple" } dependencies = [ { name = "milksnake", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, ] wheels = [ - { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.15-py2.py3-none-macosx_13_0_x86_64.whl", hash = "sha256:034cd4ea3549fad77bd1743e9327af85ca1d6daf8289cbf2f46263921587dfc5" }, - { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.15-py2.py3-none-macosx_14_0_arm64.whl", hash = "sha256:a736413c89784f48f589b8ec0c5611c90d3e4b479c7b6200956a720a8f51f64d" }, - { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.15-py2.py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:3bd2959e8496a7bddfd72a4722ba268768a09e8d1493d3e541318cc23cb89bc3" }, - { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.15-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:b3cdf49c23cebd6fd3f821cb4f099abc269e38a387ebaed29b8bf721487291cc" }, + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.16-py2.py3-none-macosx_13_0_x86_64.whl", hash = "sha256:141b0b5ffdfec5194a1bc7851e0f4d9304a090589ed3c181bac6b85b9c7db142" }, + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.16-py2.py3-none-macosx_14_0_arm64.whl", hash = "sha256:59e6ff9cd52b6b9976c866adb33a1b4ffc9b1e545816010169601d76953c3cb7" }, + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.16-py2.py3-none-manylinux_2_28_aarch64.whl", hash = "sha256:005c3dc3a97c49fda0b481a08664d02e8e9ade1cac99c816febca3f836a1856c" }, + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_relay-0.9.16-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:255c5f40c7b480df42e4899e0ddaf4fccd1edfd3450b78f237cbe3e473e7b4ff" }, ] [[package]] From 8a9a1435da0255c782128483fbbb7b4c2b90c424 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 2 Oct 2025 09:32:41 +0200 Subject: [PATCH 17/21] fix tests --- .../spans/consumers/process_segments/types.py | 2 +- src/sentry/spans/grouping/strategy/base.py | 2 +- .../performance_issues/span_builder.py | 20 +++++++++++++++++++ .../spans/consumers/process/test_consumer.py | 4 ++-- .../process_segments/test_convert.py | 1 + tests/sentry/spans/grouping/test_strategy.py | 13 ++++++++---- 6 files changed, 34 insertions(+), 8 deletions(-) diff --git a/src/sentry/spans/consumers/process_segments/types.py b/src/sentry/spans/consumers/process_segments/types.py index 9673fab2cfb96d..9d1e5898a9b30c 100644 --- a/src/sentry/spans/consumers/process_segments/types.py +++ b/src/sentry/spans/consumers/process_segments/types.py @@ -3,7 +3,7 @@ from sentry_kafka_schemas.schema_types.ingest_spans_v1 import SpanEvent from sentry_kafka_schemas.schema_types.ingest_spans_v1 import ( - _FileColonFullStopIngestSpansFullStopV1FullStopSchemaFullStopJsonNumberSignDefinitionsAttributevalue as AttributeValue, + _FileColonIngestSpansFullStopV1FullStopSchemaFullStopJsonNumberSignDefinitionsAttributevalue as AttributeValue, ) Attributes = dict[str, AttributeValue] diff --git a/src/sentry/spans/grouping/strategy/base.py b/src/sentry/spans/grouping/strategy/base.py index cf1ae185faef56..3e6549fd2c8359 100644 --- a/src/sentry/spans/grouping/strategy/base.py +++ b/src/sentry/spans/grouping/strategy/base.py @@ -57,7 +57,7 @@ def get_standalone_span_group(self, span: Span) -> str: # compatibility with transaction events, but fall back to default # fingerprinting if the span doesn't have a transaction. if ( - span.get("is_segment") + attribute_value(span, "sentry.is_segment") and (transaction := attribute_value(span, "sentry.transaction")) is not None ): result = Hash() diff --git a/src/sentry/testutils/performance_issues/span_builder.py b/src/sentry/testutils/performance_issues/span_builder.py index 5e0ff051ad262f..8dc1c57ef179d5 100644 --- a/src/sentry/testutils/performance_issues/span_builder.py +++ b/src/sentry/testutils/performance_issues/span_builder.py @@ -72,3 +72,23 @@ def build(self) -> Span: if self.hash is not None: span["hash"] = self.hash return span + + def build_v2(self) -> Span: + """Return a sp""" + span: Span = { + "trace_id": self.trace_id, + "parent_span_id": self.parent_span_id, + "span_id": self.span_id, + "start_timestamp": self.start_timestamp, + "timestamp": self.timestamp, + "same_process_as_parent": self.same_process_as_parent, + "attributes": { + "sentry.is_segment": {"value": self.is_segment}, + "sentry.op": {"value": self.op}, + "sentry.description": {"value": self.description}, + **{k: {"value": v} for (k, v) in (self.tags or {}).items()}, + **{k: {"value": v} for (k, v) in (self.data or {}).items()}, + }, + } + + return span diff --git a/tests/sentry/spans/consumers/process/test_consumer.py b/tests/sentry/spans/consumers/process/test_consumer.py index 95beb16e82edf8..b0487fb31cc651 100644 --- a/tests/sentry/spans/consumers/process/test_consumer.py +++ b/tests/sentry/spans/consumers/process/test_consumer.py @@ -77,8 +77,8 @@ def add_commit(offsets, force=False): "spans": [ { "attributes": { - "sentry.is_segment": True, - "sentry.segment.id": "aaaaaaaaaaaaaaaa", + "sentry.is_segment": {"type": "boolean", "value": True}, + "sentry.segment.id": {"type": "string", "value": "aaaaaaaaaaaaaaaa"}, }, "project_id": 12, "span_id": "aaaaaaaaaaaaaaaa", diff --git a/tests/sentry/spans/consumers/process_segments/test_convert.py b/tests/sentry/spans/consumers/process_segments/test_convert.py index d048429cff28ed..4d575f27f73b70 100644 --- a/tests/sentry/spans/consumers/process_segments/test_convert.py +++ b/tests/sentry/spans/consumers/process_segments/test_convert.py @@ -120,6 +120,7 @@ def test_convert_span_to_item() -> None: "sentry.environment": AnyValue(string_value="development"), "sentry.is_remote": AnyValue(bool_value=True), "sentry.is_segment": AnyValue(bool_value=True), + "sentry.name": AnyValue(string_value="endpoint"), "sentry.normalized_description": AnyValue(string_value="normalized_description"), "sentry.op": AnyValue(string_value="http.server"), "sentry.origin": AnyValue(string_value="auto.http.django"), diff --git a/tests/sentry/spans/grouping/test_strategy.py b/tests/sentry/spans/grouping/test_strategy.py index 74565818493baf..4d398ce6d908f6 100644 --- a/tests/sentry/spans/grouping/test_strategy.py +++ b/tests/sentry/spans/grouping/test_strategy.py @@ -588,11 +588,16 @@ def test_default_2022_10_27_strategy(spans: list[Span], expected: Mapping[str, l def test_standalone_spans_compat() -> None: - spans = [ + spans_v1 = [ SpanBuilder().with_span_id("b" * 16).with_description("b" * 16).build(), SpanBuilder().with_span_id("c" * 16).with_description("c" * 16).build(), SpanBuilder().with_span_id("d" * 16).with_description("d" * 16).build(), ] + spans_v2 = [ + SpanBuilder().with_span_id("b" * 16).with_description("b" * 16).build_v2(), + SpanBuilder().with_span_id("c" * 16).with_description("c" * 16).build_v2(), + SpanBuilder().with_span_id("d" * 16).with_description("d" * 16).build_v2(), + ] event = { "transaction": "transaction name", @@ -601,15 +606,15 @@ def test_standalone_spans_compat() -> None: "span_id": "a" * 16, }, }, - "spans": spans, + "spans": spans_v1, } - standalone_spans = spans + [ + standalone_spans = spans_v2 + [ SpanBuilder() .with_span_id("a" * 16) .segment() .with_data({"sentry.transaction": "transaction name"}) - .build() + .build_v2() ] cfg = CONFIGURATIONS[DEFAULT_CONFIG_ID] From 2f07ad46020c5e9b193706a3fe1e2db2f0c7ac8a Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 2 Oct 2025 12:15:47 +0200 Subject: [PATCH 18/21] schema and type hints --- pyproject.toml | 2 +- .../spans/consumers/process_segments/convert.py | 14 ++++++++------ .../consumers/process_segments/enrichment.py | 15 +++++++++------ .../spans/consumers/process_segments/types.py | 10 +++++++--- 4 files changed, 25 insertions(+), 16 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 07e1e316bce692..9067970274ba26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,7 +77,7 @@ dependencies = [ # [end] jsonschema format validators "sentry-arroyo>=2.25.5", "sentry-forked-email-reply-parser>=0.5.12.post1", - "sentry-kafka-schemas>=2.1.4", + "sentry-kafka-schemas>=2.1.6", "sentry-ophio>=1.1.3", "sentry-protos>=0.4.0", "sentry-redis-tools>=0.5.0", diff --git a/src/sentry/spans/consumers/process_segments/convert.py b/src/sentry/spans/consumers/process_segments/convert.py index 703ee517357fc0..6d9be27fe30f88 100644 --- a/src/sentry/spans/consumers/process_segments/convert.py +++ b/src/sentry/spans/consumers/process_segments/convert.py @@ -37,8 +37,10 @@ def convert_span_to_item(span: CompatibleSpan) -> TraceItem: client_sample_rate = 1.0 server_sample_rate = 1.0 - for k, field_value in (span.get("attributes") or {}).items(): - if (value := field_value.get("value")) is None: + for k, attribute in (span.get("attributes") or {}).items(): + if attribute is None: + continue + if (value := attribute.get("value")) is None: continue try: # NOTE: This ignores the `type` field of the attribute itself @@ -58,9 +60,9 @@ def convert_span_to_item(span: CompatibleSpan) -> TraceItem: pass for field_name, attribute_name in FIELD_TO_ATTRIBUTE.items(): - field_value = span.get(field_name) # type:ignore[assignment] - if field_value is not None: - attributes[attribute_name] = _anyvalue(field_value) + attribute = span.get(field_name) # type:ignore[assignment] + if attribute is not None: + attributes[attribute_name] = _anyvalue(attribute) # Rename some attributes from their sentry-conventions name to what the product currently expects. # Eventually this should all be handled by deprecation policies in sentry-conventions. @@ -143,7 +145,7 @@ def _sanitize_span_link(link: SpanLink) -> SpanLink: # attributes count. Respect that count, if it's present. It should always be # an integer. try: - dropped_attributes_count = int(attributes["sentry.dropped_attributes_count"]["value"]) # type: ignore[arg-type] + dropped_attributes_count = int(attributes["sentry.dropped_attributes_count"]["value"]) # type: ignore[arg-type,index] except (KeyError, ValueError, TypeError): dropped_attributes_count = 0 diff --git a/src/sentry/spans/consumers/process_segments/enrichment.py b/src/sentry/spans/consumers/process_segments/enrichment.py index a0009fd86bcc03..874a614f44ad1a 100644 --- a/src/sentry/spans/consumers/process_segments/enrichment.py +++ b/src/sentry/spans/consumers/process_segments/enrichment.py @@ -4,7 +4,7 @@ from sentry_kafka_schemas.schema_types.ingest_spans_v1 import SpanEvent -from sentry.spans.consumers.process_segments.types import Attributes, attribute_value, get_span_op +from sentry.spans.consumers.process_segments.types import attribute_value, get_span_op # Keys of shared sentry attributes that are shared across all spans in a segment. This list # is taken from `extract_shared_tags` in Relay. @@ -79,7 +79,12 @@ def __init__(self, spans: list[SpanEvent]) -> None: self._span_map.setdefault(parent_span_id, []).append(interval) def _attributes(self, span: SpanEvent) -> dict[str, Any]: - attributes: Attributes = {**span.get("attributes", {})} + attributes: dict[str, Any] = {**(span.get("attributes") or {})} + + def get_value(key: str) -> Any: + attr: dict[str, Any] = attributes.get(key) or {} + return attr.get("value") + if self._segment_span is not None: # Assume that Relay has extracted the shared tags into `data` on the # root span. Once `sentry_tags` is removed, the logic from @@ -94,11 +99,9 @@ def _attributes(self, span: SpanEvent) -> dict[str, Any]: # NOTE: Like in Relay's implementation, shared tags are added at the # very end. This does not have access to the shared tag value. We # keep behavior consistent, although this should be revisited. - if (attributes.get("sentry.thread.name") or {}).get( - "value" - ) == MOBILE_MAIN_THREAD_NAME: + if get_value("sentry.thread.name") == MOBILE_MAIN_THREAD_NAME: attributes["sentry.main_thread"] = {"type": "string", "value": "true"} - if not attributes.get("sentry.app_start_type") and mobile_start_type: + if not get_value("sentry.app_start_type") and mobile_start_type: attributes["sentry.app_start_type"] = { "type": "string", "value": mobile_start_type, diff --git a/src/sentry/spans/consumers/process_segments/types.py b/src/sentry/spans/consumers/process_segments/types.py index 9d1e5898a9b30c..629e00869e62d8 100644 --- a/src/sentry/spans/consumers/process_segments/types.py +++ b/src/sentry/spans/consumers/process_segments/types.py @@ -1,12 +1,16 @@ from collections.abc import Mapping from typing import Any, NotRequired -from sentry_kafka_schemas.schema_types.ingest_spans_v1 import SpanEvent from sentry_kafka_schemas.schema_types.ingest_spans_v1 import ( - _FileColonIngestSpansFullStopV1FullStopSchemaFullStopJsonNumberSignDefinitionsAttributevalue as AttributeValue, + SpanEvent, + _FileColonIngestSpansFullStopV1FullStopSchemaFullStopJsonNumberSignDefinitionsAttributevalueObject, ) -Attributes = dict[str, AttributeValue] +Attributes = dict[ + str, + None + | _FileColonIngestSpansFullStopV1FullStopSchemaFullStopJsonNumberSignDefinitionsAttributevalueObject, +] # The default span.op to assume if it is missing on the span. This should be From 43138d27a8afc246c16912c618d38338b3b8e48e Mon Sep 17 00:00:00 2001 From: "getsantry[bot]" <66042841+getsantry[bot]@users.noreply.github.com> Date: Thu, 2 Oct 2025 10:16:44 +0000 Subject: [PATCH 19/21] :snowflake: re-freeze requirements --- uv.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/uv.lock b/uv.lock index bbb9586ed32d98..13c7984cdabe15 100644 --- a/uv.lock +++ b/uv.lock @@ -2069,7 +2069,7 @@ requires-dist = [ { name = "rfc3986-validator", specifier = ">=0.1.1" }, { name = "sentry-arroyo", specifier = ">=2.25.5" }, { name = "sentry-forked-email-reply-parser", specifier = ">=0.5.12.post1" }, - { name = "sentry-kafka-schemas", specifier = ">=2.1.4" }, + { name = "sentry-kafka-schemas", specifier = ">=2.1.6" }, { name = "sentry-ophio", specifier = ">=1.1.3" }, { name = "sentry-protos", specifier = ">=0.4.0" }, { name = "sentry-redis-tools", specifier = ">=0.5.0" }, @@ -2240,7 +2240,7 @@ wheels = [ [[package]] name = "sentry-kafka-schemas" -version = "2.1.4" +version = "2.1.6" source = { registry = "https://pypi.devinfra.sentry.io/simple" } dependencies = [ { name = "fastjsonschema", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, @@ -2251,7 +2251,7 @@ dependencies = [ { name = "typing-extensions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, ] wheels = [ - { url = "https://pypi.devinfra.sentry.io/wheels/sentry_kafka_schemas-2.1.4-py2.py3-none-any.whl", hash = "sha256:37de4a4f046fb89f959696356c1fa4cad4b487d6a462468ef1aeb55afcacd856" }, + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_kafka_schemas-2.1.6-py2.py3-none-any.whl", hash = "sha256:385e43b268c81a822fd88fc797f6143d79e3b4c9de86ce67a974b07ddbdcb25f" }, ] [[package]] From cdc8be8d0b62a17eb801bdf7124703763f9d0083 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 2 Oct 2025 13:07:22 +0200 Subject: [PATCH 20/21] typing --- .../consumers/process_segments/convert.py | 2 - .../process_segments/test_convert.py | 2 +- .../process_segments/test_enrichment.py | 58 ++++++++++--------- .../consumers/process_segments/test_shim.py | 16 +++-- 4 files changed, 42 insertions(+), 36 deletions(-) diff --git a/src/sentry/spans/consumers/process_segments/convert.py b/src/sentry/spans/consumers/process_segments/convert.py index 6d9be27fe30f88..9d4b94e960c30f 100644 --- a/src/sentry/spans/consumers/process_segments/convert.py +++ b/src/sentry/spans/consumers/process_segments/convert.py @@ -16,10 +16,8 @@ "event_id": "sentry.event_id", "hash": "sentry.hash", "is_remote": "sentry.is_remote", - "is_segment": "sentry.is_segment", "kind": "sentry.kind", "name": "sentry.name", - "origin": "sentry.origin", "parent_span_id": "sentry.parent_span_id", "received": "sentry.received", "start_timestamp": "sentry.start_timestamp_precise", diff --git a/tests/sentry/spans/consumers/process_segments/test_convert.py b/tests/sentry/spans/consumers/process_segments/test_convert.py index 4d575f27f73b70..679e0b8bbcc38e 100644 --- a/tests/sentry/spans/consumers/process_segments/test_convert.py +++ b/tests/sentry/spans/consumers/process_segments/test_convert.py @@ -79,7 +79,7 @@ def test_convert_span_to_item() -> None: - item = convert_span_to_item(SPAN_KAFKA_MESSAGE) + item = convert_span_to_item(cast(CompatibleSpan, SPAN_KAFKA_MESSAGE)) assert item.organization_id == 1 assert item.project_id == 1 diff --git a/tests/sentry/spans/consumers/process_segments/test_enrichment.py b/tests/sentry/spans/consumers/process_segments/test_enrichment.py index 99cc07a5922bc2..f9bdcba29fab35 100644 --- a/tests/sentry/spans/consumers/process_segments/test_enrichment.py +++ b/tests/sentry/spans/consumers/process_segments/test_enrichment.py @@ -1,11 +1,10 @@ +from typing import cast + from sentry_kafka_schemas.schema_types.ingest_spans_v1 import SpanEvent -from sentry.spans.consumers.process_segments.enrichment import ( - TreeEnricher, - attribute_value, - compute_breakdowns, -) +from sentry.spans.consumers.process_segments.enrichment import TreeEnricher, compute_breakdowns from sentry.spans.consumers.process_segments.shim import make_compatible +from sentry.spans.consumers.process_segments.types import CompatibleSpan, attribute_value from tests.sentry.spans.consumers.process import build_mock_span # Tests ported from Relay @@ -425,10 +424,10 @@ def test_write_tags_for_performance_issue_detection(): segment_span, ] - _, spans = TreeEnricher.enrich_spans(spans) - spans = [make_compatible(span) for span in spans] + _, enriched_spans = TreeEnricher.enrich_spans(spans) + compatible_spans: list[CompatibleSpan] = [make_compatible(span) for span in enriched_spans] - child_span, segment_span = spans + child_span, segment_span = compatible_spans assert segment_span["sentry_tags"] == { "sdk.name": "sentry.php.laravel", @@ -448,24 +447,27 @@ def test_write_tags_for_performance_issue_detection(): def _mock_performance_issue_span(is_segment, attributes, **fields) -> SpanEvent: - return { - "duration_ms": 107, - "parent_span_id": None, - "profile_id": "dbae2b82559649a1a34a2878134a007b", - "project_id": 1, - "organization_id": 1, - "received": 1707953019.044972, - "retention_days": 90, - "segment_id": "a49b42af9fb69da0", - "attributes": { - **attributes, - "sentry.is_segment": {"type": "boolean", "value": is_segment}, - "sentry.description": {"type": "string", "value": "OrganizationNPlusOne"}, + return cast( + SpanEvent, + { + "duration_ms": 107, + "parent_span_id": None, + "profile_id": "dbae2b82559649a1a34a2878134a007b", + "project_id": 1, + "organization_id": 1, + "received": 1707953019.044972, + "retention_days": 90, + "segment_id": "a49b42af9fb69da0", + "attributes": { + **attributes, + "sentry.is_segment": {"type": "boolean", "value": is_segment}, + "sentry.description": {"type": "string", "value": "OrganizationNPlusOne"}, + }, + "span_id": "a49b42af9fb69da0", + "start_timestamp_ms": 1707953018865, + "start_timestamp": 1707953018.865, + "end_timestamp": 1707953018.972, + "trace_id": "94576097f3a64b68b85a59c7d4e3ee2a", + **fields, }, - "span_id": "a49b42af9fb69da0", - "start_timestamp_ms": 1707953018865, - "start_timestamp": 1707953018.865, - "end_timestamp": 1707953018.972, - "trace_id": "94576097f3a64b68b85a59c7d4e3ee2a", - **fields, - } + ) diff --git a/tests/sentry/spans/consumers/process_segments/test_shim.py b/tests/sentry/spans/consumers/process_segments/test_shim.py index c6f521ca384209..77404ab0fe0a73 100644 --- a/tests/sentry/spans/consumers/process_segments/test_shim.py +++ b/tests/sentry/spans/consumers/process_segments/test_shim.py @@ -1,16 +1,22 @@ +from typing import cast + +from sentry_kafka_schemas.schema_types.ingest_spans_v1 import SpanEvent + from sentry.spans.consumers.process_segments.shim import make_compatible +from sentry.spans.consumers.process_segments.types import Attributes from tests.sentry.spans.consumers.process_segments.test_convert import SPAN_KAFKA_MESSAGE def test_make_compatible(): message = {**SPAN_KAFKA_MESSAGE} - message["attributes"] = { - "sentry.exclusive_time_ms": {"type": "float", "value": 100.0}, - **message["attributes"], + attributes: Attributes = { + "sentry.exclusive_time_ms": {"type": "double", "value": 100.0}, + **message["attributes"], # type:ignore[dict-item] } - compatible = make_compatible(message) + message["attributes"] = attributes + compatible = make_compatible(cast(SpanEvent, message)) assert compatible["exclusive_time"] == 100.0 - assert compatible["op"] == message["attributes"]["sentry.op"]["value"] + assert compatible["op"] == message["attributes"]["sentry.op"]["value"] # type: ignore[index] # Pre-existing tags got overwritten: assert compatible["sentry_tags"] == { From 5f8a8953114e4b00999ad94d9583b31644bfe347 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Thu, 2 Oct 2025 13:27:49 +0200 Subject: [PATCH 21/21] typing --- src/sentry/testutils/performance_issues/span_builder.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/sentry/testutils/performance_issues/span_builder.py b/src/sentry/testutils/performance_issues/span_builder.py index 8dc1c57ef179d5..b731e8a06a5d06 100644 --- a/src/sentry/testutils/performance_issues/span_builder.py +++ b/src/sentry/testutils/performance_issues/span_builder.py @@ -73,9 +73,9 @@ def build(self) -> Span: span["hash"] = self.hash return span - def build_v2(self) -> Span: - """Return a sp""" - span: Span = { + def build_v2(self) -> dict[str, Any]: + """Return a Span V2""" + return { "trace_id": self.trace_id, "parent_span_id": self.parent_span_id, "span_id": self.span_id, @@ -90,5 +90,3 @@ def build_v2(self) -> Span: **{k: {"value": v} for (k, v) in (self.data or {}).items()}, }, } - - return span