Skip to content

Commit f90c230

Browse files
feat(cbrs): Time Window Routing (#7418)
This PR implements a routing strategy for routing to TIER_1 and instead of downsampling storage tiers, shrinking time windows. # Design decisions ## Window Sizing Algorithm This is the easiest thing to change in this PR (and likely it will change). At the moment, it looks up the amount of outcomes for the requested time range and shrinks the time window down assuming that the distribution of datapoints is uniform across time (which is not true). Most recent datapoints are prioritized first ## Pagination Only the TraceItemTable endpoint makes use of the recommendations of this routing strategy, the endpoint and routing strategy interact across queries. This is to facilitate a simple client side UX where all the client has to do is pass the `page_token` across their requests and not worry about anything else. Here's a diagram explaining the flow: ``` ┌─────────────────┐ ┌────────────────┐ │ │ │ │ │ │ │ │ │ │ │ Routing │ │ Client ├─────────────►│ Strategy ┼───────────────────┐ │ │ page_token │ │ │ │ │ │ │ │ │ │ │ │ Narrows│time window └─────────────────┘ └────────────────┘ For Endpoint ▲ │ │ │ │ ┌────────────────┐ │ │ │ │ │ │ │ │ │ │ │ TraceItemTable │◄─────────────────┘ └─────────────────────────┼ Endpoint │ Encodes │ │ Time Window └────────────────┘ In Page Token ``` In order to facilitate pagination, the TraceItemTable endpoint now queries for `limit + 1` rows in order to know if there are more items in this current window or if we can move on to the next one # What's missing 1. This functionality can be tested more rigorously, I deliberately did not spend too much time on it because I know it will change and the priority is to get something out there to try 2. We could probably have more observability into what the strategy is doing and understanding our success metrics better. These will be added as we understand the problem more --------- Co-authored-by: getsantry[bot] <66042841+getsantry[bot]@users.noreply.github.com>
1 parent 0330c37 commit f90c230

File tree

15 files changed

+847
-147
lines changed

15 files changed

+847
-147
lines changed

.vscode/settings.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
"mypy-type-checker.args": [
7070
"--strict"
7171
],
72-
"makefile.configureOnOpen": false
72+
"makefile.configureOnOpen": false,
73+
"cursorpyright.analysis.autoImportCompletions": true
7374

7475
}

snuba/web/rpc/common/common.py

Lines changed: 22 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datetime import datetime, timedelta
1+
from datetime import datetime, timedelta, timezone
22
from typing import Callable, TypeVar, cast
33

44
from google.protobuf.message import Message as ProtobufMessage
@@ -56,9 +56,9 @@ def truncate_request_meta_to_day(meta: RequestMeta) -> None:
5656
start_timestamp = start_timestamp.replace(
5757
hour=0, minute=0, second=0, microsecond=0
5858
) - timedelta(days=1)
59-
end_timestamp = end_timestamp.replace(
60-
hour=0, minute=0, second=0, microsecond=0
61-
) + timedelta(days=1)
59+
end_timestamp = end_timestamp.replace(hour=0, minute=0, second=0, microsecond=0) + timedelta(
60+
days=1
61+
)
6262

6363
meta.start_timestamp.seconds = int(start_timestamp.timestamp())
6464
meta.end_timestamp.seconds = int(end_timestamp.timestamp())
@@ -142,51 +142,32 @@ def trace_item_filters_to_expression(
142142
if len(filters) == 0:
143143
return literal(True)
144144
elif len(filters) == 1:
145-
return trace_item_filters_to_expression(
146-
filters[0], attribute_key_to_expression
147-
)
145+
return trace_item_filters_to_expression(filters[0], attribute_key_to_expression)
148146
return and_cond(
149-
*(
150-
trace_item_filters_to_expression(x, attribute_key_to_expression)
151-
for x in filters
152-
)
147+
*(trace_item_filters_to_expression(x, attribute_key_to_expression) for x in filters)
153148
)
154149

155150
if item_filter.HasField("or_filter"):
156151
filters = item_filter.or_filter.filters
157152
if len(filters) == 0:
158-
raise BadSnubaRPCRequestException(
159-
"Invalid trace item filter, empty 'or' clause"
160-
)
153+
raise BadSnubaRPCRequestException("Invalid trace item filter, empty 'or' clause")
161154
elif len(filters) == 1:
162-
return trace_item_filters_to_expression(
163-
filters[0], attribute_key_to_expression
164-
)
155+
return trace_item_filters_to_expression(filters[0], attribute_key_to_expression)
165156
return or_cond(
166-
*(
167-
trace_item_filters_to_expression(x, attribute_key_to_expression)
168-
for x in filters
169-
)
157+
*(trace_item_filters_to_expression(x, attribute_key_to_expression) for x in filters)
170158
)
171159

172160
if item_filter.HasField("not_filter"):
173161
filters = item_filter.not_filter.filters
174162
if len(filters) == 0:
175-
raise BadSnubaRPCRequestException(
176-
"Invalid trace item filter, empty 'not' clause"
177-
)
163+
raise BadSnubaRPCRequestException("Invalid trace item filter, empty 'not' clause")
178164
elif len(filters) == 1:
179165
return not_cond(
180-
trace_item_filters_to_expression(
181-
filters[0], attribute_key_to_expression
182-
)
166+
trace_item_filters_to_expression(filters[0], attribute_key_to_expression)
183167
)
184168
return not_cond(
185169
and_cond(
186-
*(
187-
trace_item_filters_to_expression(x, attribute_key_to_expression)
188-
for x in filters
189-
)
170+
*(trace_item_filters_to_expression(x, attribute_key_to_expression) for x in filters)
190171
)
191172
)
192173

@@ -198,9 +179,7 @@ def trace_item_filters_to_expression(
198179

199180
value_type = v.WhichOneof("value")
200181
if value_type is None:
201-
raise BadSnubaRPCRequestException(
202-
"comparison does not have a right hand side"
203-
)
182+
raise BadSnubaRPCRequestException("comparison does not have a right hand side")
204183

205184
if v.is_null:
206185
v_expression: Expression = literal(None)
@@ -246,9 +225,7 @@ def trace_item_filters_to_expression(
246225
)
247226
# we redefine the way equals works for nulls
248227
# now null=null is true
249-
expr_with_null = or_cond(
250-
expr, and_cond(f.isNull(k_expression), f.isNull(v_expression))
251-
)
228+
expr_with_null = or_cond(expr, and_cond(f.isNull(k_expression), f.isNull(v_expression)))
252229
return expr_with_null
253230
if op == ComparisonFilter.OP_NOT_EQUALS:
254231
_check_non_string_values_cannot_ignore_case(item_filter.comparison_filter)
@@ -259,9 +236,7 @@ def trace_item_filters_to_expression(
259236
)
260237
# we redefine the way not equals works for nulls
261238
# now null!=null is true
262-
expr_with_null = or_cond(
263-
expr, f.xor(f.isNull(k_expression), f.isNull(v_expression))
264-
)
239+
expr_with_null = or_cond(expr, f.xor(f.isNull(k_expression), f.isNull(v_expression)))
265240
return expr_with_null
266241
if op == ComparisonFilter.OP_LIKE:
267242
if k.type != AttributeKey.Type.TYPE_STRING:
@@ -354,11 +329,15 @@ def timestamp_in_range_condition(start_ts: int, end_ts: int) -> Expression:
354329
return and_cond(
355330
f.less(
356331
column("timestamp"),
357-
f.toDateTime(end_ts),
332+
f.toDateTime(
333+
datetime.fromtimestamp(end_ts, tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
334+
),
358335
),
359336
f.greaterOrEquals(
360337
column("timestamp"),
361-
f.toDateTime(start_ts),
338+
f.toDateTime(
339+
datetime.fromtimestamp(start_ts, tz=timezone.utc).strftime("%Y-%m-%d %H:%M:%S")
340+
),
362341
),
363342
)
364343

@@ -372,9 +351,7 @@ def base_conditions_and(meta: RequestMeta, *other_exprs: Expression) -> Expressi
372351
"""
373352
return and_cond(
374353
project_id_and_org_conditions(meta),
375-
timestamp_in_range_condition(
376-
meta.start_timestamp.seconds, meta.end_timestamp.seconds
377-
),
354+
timestamp_in_range_condition(meta.start_timestamp.seconds, meta.end_timestamp.seconds),
378355
*other_exprs,
379356
)
380357

snuba/web/rpc/common/pagination.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
"""
2+
This file contains functionality to encode and decode custom page tokens
3+
"""
4+
5+
from google.protobuf.timestamp_pb2 import Timestamp
6+
from sentry_protos.snuba.v1.request_common_pb2 import PageToken
7+
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeKey, AttributeValue
8+
from sentry_protos.snuba.v1.trace_item_filter_pb2 import (
9+
AndFilter,
10+
ComparisonFilter,
11+
TraceItemFilter,
12+
)
13+
14+
15+
class FlexibleTimeWindowPage:
16+
_START_TIMESTAMP_KEY = "sentry.start_timestamp"
17+
_END_TIMESTAMP_KEY = "sentry.end_timestamp"
18+
_OFFSET_KEY = "sentry.offset"
19+
20+
def __init__(
21+
self,
22+
start_timestamp: Timestamp | None,
23+
end_timestamp: Timestamp | None,
24+
offset: int | None = None,
25+
):
26+
self.start_timestamp = start_timestamp
27+
self.end_timestamp = end_timestamp
28+
self.offset = offset
29+
30+
def encode(self) -> PageToken:
31+
if self.start_timestamp is not None and self.end_timestamp is not None:
32+
return PageToken(
33+
filter_offset=TraceItemFilter(
34+
and_filter=AndFilter(
35+
filters=[
36+
TraceItemFilter(
37+
comparison_filter=ComparisonFilter(
38+
key=AttributeKey(name=self._START_TIMESTAMP_KEY),
39+
op=ComparisonFilter.OP_GREATER_THAN_OR_EQUALS,
40+
value=AttributeValue(val_int=self.start_timestamp.seconds),
41+
)
42+
),
43+
TraceItemFilter(
44+
comparison_filter=ComparisonFilter(
45+
key=AttributeKey(name=self._END_TIMESTAMP_KEY),
46+
op=ComparisonFilter.OP_LESS_THAN,
47+
value=AttributeValue(val_int=self.end_timestamp.seconds),
48+
)
49+
),
50+
TraceItemFilter(
51+
comparison_filter=ComparisonFilter(
52+
key=AttributeKey(name=self._OFFSET_KEY),
53+
op=ComparisonFilter.OP_EQUALS,
54+
value=AttributeValue(
55+
val_int=self.offset if self.offset is not None else 0
56+
),
57+
)
58+
),
59+
]
60+
)
61+
)
62+
)
63+
else:
64+
return PageToken(offset=self.offset if self.offset is not None else 0)
65+
66+
@classmethod
67+
def decode(cls, page_token: PageToken) -> "FlexibleTimeWindowPage":
68+
start_timestamp = None
69+
end_timestamp = None
70+
offset = None
71+
if page_token.filter_offset.HasField("and_filter"):
72+
for filter in page_token.filter_offset.and_filter.filters:
73+
if (
74+
filter.HasField("comparison_filter")
75+
and filter.comparison_filter.key.name == cls._START_TIMESTAMP_KEY
76+
):
77+
start_timestamp = Timestamp(seconds=filter.comparison_filter.value.val_int)
78+
if (
79+
filter.HasField("comparison_filter")
80+
and filter.comparison_filter.key.name == cls._END_TIMESTAMP_KEY
81+
):
82+
end_timestamp = Timestamp(seconds=filter.comparison_filter.value.val_int)
83+
if (
84+
filter.HasField("comparison_filter")
85+
and filter.comparison_filter.key.name == cls._OFFSET_KEY
86+
):
87+
offset = filter.comparison_filter.value.val_int
88+
elif page_token.HasField("offset"):
89+
offset = page_token.offset
90+
return cls(start_timestamp, end_timestamp, offset)
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from sentry_protos.snuba.v1.request_common_pb2 import TraceItemType
2+
3+
4+
class OutcomeCategory:
5+
SPAN_INDEXED = 16
6+
LOG_ITEM = 23
7+
8+
9+
class Outcome:
10+
ACCEPTED = 0
11+
12+
13+
ITEM_TYPE_TO_OUTCOME_CATEGORY = {
14+
TraceItemType.TRACE_ITEM_TYPE_UNSPECIFIED: OutcomeCategory.SPAN_INDEXED,
15+
TraceItemType.TRACE_ITEM_TYPE_SPAN: OutcomeCategory.SPAN_INDEXED,
16+
TraceItemType.TRACE_ITEM_TYPE_LOG: OutcomeCategory.LOG_ITEM,
17+
}

snuba/web/rpc/storage_routing/routing_strategies/outcomes_based.py

Lines changed: 8 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
import sentry_sdk
66
from google.protobuf.json_format import MessageToDict
7-
from sentry_protos.snuba.v1.request_common_pb2 import RequestMeta, TraceItemType
7+
from sentry_protos.snuba.v1.request_common_pb2 import RequestMeta
88

99
from snuba import state
1010
from snuba.attribution.appid import AppID
@@ -28,29 +28,17 @@
2828
treeify_or_and_conditions,
2929
)
3030
from snuba.web.rpc.storage_routing.common import extract_message_meta
31+
from snuba.web.rpc.storage_routing.routing_strategies.common import (
32+
ITEM_TYPE_TO_OUTCOME_CATEGORY,
33+
Outcome,
34+
)
3135
from snuba.web.rpc.storage_routing.routing_strategies.storage_routing import (
3236
BaseRoutingStrategy,
3337
RoutingContext,
3438
RoutingDecision,
3539
)
3640

3741

38-
# TODO import these from sentry-relay
39-
class OutcomeCategory:
40-
SPAN_INDEXED = 16
41-
LOG_ITEM = 23
42-
43-
44-
class Outcome:
45-
ACCEPTED = 0
46-
47-
48-
_ITEM_TYPE_TO_OUTCOME = {
49-
TraceItemType.TRACE_ITEM_TYPE_SPAN: OutcomeCategory.SPAN_INDEXED,
50-
TraceItemType.TRACE_ITEM_TYPE_LOG: OutcomeCategory.LOG_ITEM,
51-
}
52-
53-
5442
def project_id_and_org_conditions(meta: RequestMeta) -> Expression:
5543
return and_cond(
5644
in_cond(
@@ -111,11 +99,7 @@ def get_ingested_items_for_timerange(self, routing_context: RoutingContext) -> i
11199
),
112100
f.equals(column("outcome"), Outcome.ACCEPTED),
113101
f.equals(
114-
column("category"),
115-
_ITEM_TYPE_TO_OUTCOME.get(
116-
in_msg_meta.trace_item_type,
117-
OutcomeCategory.SPAN_INDEXED,
118-
),
102+
column("category"), ITEM_TYPE_TO_OUTCOME_CATEGORY[in_msg_meta.trace_item_type]
119103
),
120104
),
121105
)
@@ -186,8 +170,8 @@ def _get_routing_decision(self, routing_context: RoutingContext) -> RoutingDecis
186170
# for GetTraces, there is no type specified so we assume spans because
187171
# that is necessary for traces anyways
188172
# if the type is specified and we don't know its outcome, route to Tier_1
189-
in_msg_meta.trace_item_type != TraceItemType.TRACE_ITEM_TYPE_UNSPECIFIED
190-
and in_msg_meta.trace_item_type not in _ITEM_TYPE_TO_OUTCOME
173+
in_msg_meta.trace_item_type
174+
not in ITEM_TYPE_TO_OUTCOME_CATEGORY
191175
):
192176
return routing_decision
193177
# if we're querying a short enough timeframe, don't bother estimating, route to tier 1 and call it a day

0 commit comments

Comments
 (0)