Skip to content

Commit 39b36ba

Browse files
committed
add more spans
Signed-off-by: sallyom <[email protected]>
1 parent d18deff commit 39b36ba

File tree

1 file changed

+62
-6
lines changed

1 file changed

+62
-6
lines changed

pkg/kvcache/indexer.go

Lines changed: 62 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -119,41 +119,96 @@ func (k *Indexer) KVBlockIndex() kvblock.Index {
119119
func (k *Indexer) GetPodScores(ctx context.Context, prompt, modelName string,
120120
podIdentifiers []string,
121121
) (map[string]int, error) {
122-
tracer := otel.GetTracerProvider().Tracer("llm-d-epp")
123-
ctx, span := tracer.Start(ctx, "kv-cache-manager.GetPodScores")
122+
tracer := otel.GetTracerProvider().Tracer("llm-d-kv-cache-manager")
123+
ctx, span := tracer.Start(ctx, "llm_d.kv_cache_manager.GetPodScores")
124124
defer span.End()
125125

126126
span.SetAttributes(
127-
attribute.String("component", "llm-d-kv-cache-manager"),
128-
attribute.String("operation", "get_pod_scores"),
129127
attribute.String("gen_ai.request.model", modelName),
130-
attribute.Int("llm_d.kv_cache.pod_count", len(podIdentifiers)),
128+
attribute.Int("llm_d.kv_cache_manager.pod_count", len(podIdentifiers)),
131129
)
132130

133131
traceLogger := klog.FromContext(ctx).V(logging.TRACE).WithName("kvcache.GetPodScores")
134132

135133
// 1. tokenize prompt
134+
// 1. get available tokens of longest prefix
135+
_, tokenSpan := tracer.Start(ctx, "llm_d.kv_cache_manager.find_tokens")
136+
tokenSpan.SetAttributes(
137+
attribute.String("gen_ai.request.model", modelName),
138+
)
136139
tokens := k.tokenizersPool.Tokenize(prompt, modelName)
140+
if len(tokens) == 0 {
141+
tokenSpan.SetAttributes(
142+
attribute.Int("llm_d.kv_cache_manager.tokens_found", 0),
143+
attribute.String("operation.outcome", "success"),
144+
)
145+
tokenSpan.End()
146+
//nolint:nilnil // no need to return an error
147+
return nil, nil
148+
}
149+
tokenSpan.SetAttributes(
150+
attribute.Int("llm_d.kv_cache_manager.tokens_found", len(tokens)),
151+
attribute.String("operation.outcome", "success"),
152+
)
153+
tokenSpan.End()
137154

138155
// 2. get block keys
156+
_, blockSpan := tracer.Start(ctx, "llm_d.kv_cache_manager.tokens_to_block_keys")
157+
blockSpan.SetAttributes(
158+
attribute.String("gen_ai.request.model", modelName),
159+
attribute.Int("llm_d.kv_cache_manager.input_tokens", len(tokens)),
160+
)
139161
blockKeys := k.tokensProcessor.TokensToKVBlockKeys(tokens, modelName)
162+
blockSpan.SetAttributes(
163+
attribute.Int("llm_d.kv_cache_manager.block_keys_generated", len(blockKeys)),
164+
attribute.String("operation.outcome", "success"),
165+
)
166+
blockSpan.End()
140167
traceLogger.Info("found tokens", "tokens", tokens, "block-keys", blockKeys)
141168

142169
// 3. query kvblock indexer for pods
170+
_, lookupSpan := tracer.Start(ctx, "llm_d.kv_cache_manager.lookup_pods")
171+
lookupSpan.SetAttributes(
172+
attribute.String("gen_ai.request.model", modelName),
173+
attribute.Int("llm_d.kv_cache_manager.block_keys_count", len(blockKeys)),
174+
)
143175
keyToPods, err := k.kvBlockIndex.Lookup(ctx, blockKeys, sets.New(podIdentifiers...))
144176
if err != nil {
177+
lookupSpan.RecordError(err)
178+
lookupSpan.SetAttributes(attribute.String("operation.outcome", "error"))
179+
lookupSpan.End()
145180
span.RecordError(err)
181+
span.SetAttributes(attribute.String("operation.outcome", "error"))
146182
return nil, fmt.Errorf("failed to query kvblock indexer: %w", err)
147183
}
184+
lookupSpan.SetAttributes(
185+
attribute.Int("llm_d.kv_cache_manager.lookup_results", len(keyToPods)),
186+
attribute.String("operation.outcome", "success"),
187+
)
188+
lookupSpan.End()
148189
traceLogger.Info("found block keys", "block-keys", blockKeys,
149190
"pods", podsPerKeyPrintHelper(keyToPods))
150191

151192
// 4. score pods
193+
_, scoreSpan := tracer.Start(ctx, "llm_d.kv_cache_manager.score_pods")
194+
scoreSpan.SetAttributes(
195+
attribute.String("gen_ai.request.model", modelName),
196+
attribute.Int("llm_d.kv_cache_manager.block_keys_count", len(blockKeys)),
197+
)
152198
podScores, err := k.kvBlockScorer.Score(blockKeys, keyToPods)
153199
if err != nil {
200+
scoreSpan.RecordError(err)
201+
scoreSpan.SetAttributes(attribute.String("operation.outcome", "error"))
202+
scoreSpan.End()
154203
span.RecordError(err)
204+
span.SetAttributes(attribute.String("operation.outcome", "error"))
155205
return nil, fmt.Errorf("failed to query kvblock scorer: %w", err)
156206
}
207+
scoreSpan.SetAttributes(
208+
attribute.Int("llm_d.kv_cache_manager.scored_pods", len(podScores)),
209+
attribute.String("operation.outcome", "success"),
210+
)
211+
scoreSpan.End()
157212
traceLogger.Info("found pod scores", "pod-scores", podScores)
158213

159214
// Calculate hit ratio for observability
@@ -169,7 +224,8 @@ func (k *Indexer) GetPodScores(ctx context.Context, prompt, modelName string,
169224
}
170225

171226
span.SetAttributes(
172-
attribute.Float64("llm_d.kv_cache.hit_ratio", hitRatio),
227+
attribute.Float64("llm_d.kv_cache_manager.hit_ratio", hitRatio),
228+
attribute.String("operation.outcome", "success"),
173229
)
174230

175231
return podScores, nil

0 commit comments

Comments
 (0)