Skip to content

Commit 8259996

Browse files
committed
add more spans
Signed-off-by: sallyom <[email protected]>
1 parent 619a81e commit 8259996

File tree

1 file changed

+67
-6
lines changed

1 file changed

+67
-6
lines changed

pkg/kvcache/indexer.go

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -124,47 +124,107 @@ func (k *Indexer) KVBlockIndex() kvblock.Index {
124124
func (k *Indexer) GetPodScores(ctx context.Context, prompt, modelName string,
125125
podIdentifiers []string,
126126
) (map[string]int, error) {
127-
tracer := otel.GetTracerProvider().Tracer("llm-d-epp")
128-
ctx, span := tracer.Start(ctx, "kv-cache-manager.GetPodScores")
127+
tracer := otel.GetTracerProvider().Tracer("llm-d-kv-cache-manager")
128+
ctx, span := tracer.Start(ctx, "llm_d.kv_cache_manager.GetPodScores")
129129
defer span.End()
130130

131131
span.SetAttributes(
132-
attribute.String("component", "llm-d-kv-cache-manager"),
133-
attribute.String("operation", "get_pod_scores"),
134132
attribute.String("gen_ai.request.model", modelName),
135-
attribute.Int("llm_d.kv_cache.pod_count", len(podIdentifiers)),
133+
attribute.Int("llm_d.kv_cache_manager.pod_count", len(podIdentifiers)),
136134
)
137135

138136
traceLogger := klog.FromContext(ctx).V(logging.TRACE).WithName("kvcache.GetPodScores")
139137

140138
// 1. tokenize prompt
139+
// 1. get available tokens of longest prefix
140+
_, tokenSpan := tracer.Start(ctx, "llm_d.kv_cache_manager.find_tokens")
141+
tokenSpan.SetAttributes(
142+
attribute.String("gen_ai.request.model", modelName),
143+
)
141144
tokens := k.tokenizersPool.Tokenize(prompt, modelName)
145+
if len(tokens) == 0 {
146+
tokenSpan.SetAttributes(
147+
attribute.Int("llm_d.kv_cache_manager.tokens_found", 0),
148+
attribute.String("operation.outcome", "success"),
149+
)
150+
tokenSpan.End()
151+
//nolint:nilnil // no need to return an error
152+
return nil, nil
153+
}
154+
tokenSpan.SetAttributes(
155+
attribute.Int("llm_d.kv_cache_manager.tokens_found", len(tokens)),
156+
attribute.String("operation.outcome", "success"),
157+
)
158+
tokenSpan.End()
142159

143160
// 2. get block keys
161+
_, blockSpan := tracer.Start(ctx, "llm_d.kv_cache_manager.tokens_to_block_keys")
162+
blockSpan.SetAttributes(
163+
attribute.String("gen_ai.request.model", modelName),
164+
attribute.Int("llm_d.kv_cache_manager.input_tokens", len(tokens)),
165+
)
144166
blockKeys := k.tokensProcessor.TokensToKVBlockKeys(tokens, modelName)
145167
if len(blockKeys) == 0 {
168+
blockSpan.SetAttributes(
169+
attribute.Int("llm_d.kv_cache_manager.block_keys_generated", 0),
170+
attribute.String("operation.outcome", "success"),
171+
)
172+
blockSpan.End()
146173
traceLogger.Info("no block keys found, returning empty scores")
147174
//nolint:nilnil // no need to return an error
148175
return nil, nil
149176
}
177+
blockSpan.SetAttributes(
178+
attribute.Int("llm_d.kv_cache_manager.block_keys_generated", len(blockKeys)),
179+
attribute.String("operation.outcome", "success"),
180+
)
181+
blockSpan.End()
150182

151183
traceLogger.Info("found tokens", "tokens", tokens, "block-keys", blockKeys)
152184

153185
// 3. query kvblock indexer for pods
186+
_, lookupSpan := tracer.Start(ctx, "llm_d.kv_cache_manager.lookup_pods")
187+
lookupSpan.SetAttributes(
188+
attribute.String("gen_ai.request.model", modelName),
189+
attribute.Int("llm_d.kv_cache_manager.block_keys_count", len(blockKeys)),
190+
)
154191
keyToPods, err := k.kvBlockIndex.Lookup(ctx, blockKeys, sets.New(podIdentifiers...))
155192
if err != nil {
193+
lookupSpan.RecordError(err)
194+
lookupSpan.SetAttributes(attribute.String("operation.outcome", "error"))
195+
lookupSpan.End()
156196
span.RecordError(err)
197+
span.SetAttributes(attribute.String("operation.outcome", "error"))
157198
return nil, fmt.Errorf("failed to query kvblock indexer: %w", err)
158199
}
200+
lookupSpan.SetAttributes(
201+
attribute.Int("llm_d.kv_cache_manager.lookup_results", len(keyToPods)),
202+
attribute.String("operation.outcome", "success"),
203+
)
204+
lookupSpan.End()
159205
traceLogger.Info("found block keys", "block-keys", blockKeys,
160206
"pods", podsPerKeyPrintHelper(keyToPods))
161207

162208
// 4. score pods
209+
_, scoreSpan := tracer.Start(ctx, "llm_d.kv_cache_manager.score_pods")
210+
scoreSpan.SetAttributes(
211+
attribute.String("gen_ai.request.model", modelName),
212+
attribute.Int("llm_d.kv_cache_manager.block_keys_count", len(blockKeys)),
213+
)
163214
podScores, err := k.kvBlockScorer.Score(blockKeys, keyToPods)
164215
if err != nil {
216+
scoreSpan.RecordError(err)
217+
scoreSpan.SetAttributes(attribute.String("operation.outcome", "error"))
218+
scoreSpan.End()
165219
span.RecordError(err)
220+
span.SetAttributes(attribute.String("operation.outcome", "error"))
166221
return nil, fmt.Errorf("failed to query kvblock scorer: %w", err)
167222
}
223+
scoreSpan.SetAttributes(
224+
attribute.Int("llm_d.kv_cache_manager.scored_pods", len(podScores)),
225+
attribute.String("operation.outcome", "success"),
226+
)
227+
scoreSpan.End()
168228
traceLogger.Info("found pod scores", "pod-scores", podScores)
169229

170230
// Calculate hit ratio for observability
@@ -180,7 +240,8 @@ func (k *Indexer) GetPodScores(ctx context.Context, prompt, modelName string,
180240
}
181241

182242
span.SetAttributes(
183-
attribute.Float64("llm_d.kv_cache.hit_ratio", hitRatio),
243+
attribute.Float64("llm_d.kv_cache_manager.hit_ratio", hitRatio),
244+
attribute.String("operation.outcome", "success"),
184245
)
185246

186247
return podScores, nil

0 commit comments

Comments
 (0)