Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,36 @@ require (
github.com/redis/go-redis/v9 v9.7.3
github.com/stretchr/testify v1.10.0
github.com/vmihailenco/msgpack/v5 v5.4.1
go.opentelemetry.io/otel v1.33.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0
go.opentelemetry.io/otel/sdk v1.33.0
go.opentelemetry.io/otel/trace v1.33.0
go.uber.org/multierr v1.11.0
golang.org/x/net v0.38.0
golang.org/x/sync v0.12.0
google.golang.org/grpc v1.68.1
google.golang.org/protobuf v1.36.5
k8s.io/apimachinery v0.33.0
k8s.io/client-go v0.33.0
k8s.io/klog/v2 v2.130.1
sigs.k8s.io/controller-runtime v0.21.0
)

require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-openapi/jsonpointer v0.21.0 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/swag v0.23.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/google/gnostic-models v0.6.9 // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.24.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/mailru/easyjson v0.7.7 // indirect
Expand All @@ -53,17 +61,21 @@ require (
github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect
github.com/x448/float16 v0.8.4 // indirect
github.com/yuin/gopher-lua v1.1.1 // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 // indirect
go.opentelemetry.io/otel/metric v1.33.0 // indirect
go.opentelemetry.io/proto/otlp v1.4.0 // indirect
golang.org/x/oauth2 v0.27.0 // indirect
golang.org/x/sys v0.35.0 // indirect
golang.org/x/term v0.30.0 // indirect
golang.org/x/text v0.23.0 // indirect
golang.org/x/time v0.9.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 // indirect
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/api v0.33.0 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 // indirect
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
Expand Down
29 changes: 29 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs=
github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c=
github.com/bsm/gomega v1.27.10 h1:yeMWxP2pV2fG3FgAODIY8EiRE3dy0aeFYt4l7wh6yKA=
github.com/bsm/gomega v1.27.10/go.mod h1:JyEr/xRbxbtgWNi8tIEVPUYZ5Dzef52k01W3YH0H+O0=
github.com/cenkalti/backoff/v4 v4.3.0 h1:MyRJ/UdXutAwSAT+s3wNd7MfTIcy71VQueUuFK343L8=
github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
Expand All @@ -27,8 +29,11 @@ github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxER
github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc=
github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E=
github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs=
github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ=
github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY=
Expand All @@ -53,6 +58,8 @@ github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db h1:097atOisP2aRj7vFgY
github.com/google/pprof v0.0.0-20241029153458-d1b30febd7db/go.mod h1:vavhavw2zAxS5dIdcRluK6cSGGPlZynqzFM8NdvU144=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.24.0 h1:TmHmbvxPmaegwhDubVz0lICL0J5Ka2vwTzhoePEXsGE=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.24.0/go.mod h1:qztMSjm835F2bXf+5HKAPIS5qsmQDqZna/PgVt4rWtI=
github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k=
github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM=
github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY=
Expand All @@ -61,13 +68,17 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr
github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo=
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0=
github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
Expand Down Expand Up @@ -123,6 +134,22 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M=
github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/otel v1.33.0 h1:/FerN9bax5LoK51X/sI0SVYrjSE0/yUL7DpxW4K3FWw=
go.opentelemetry.io/otel v1.33.0/go.mod h1:SUUkR6csvUQl+yjReHu5uM3EtVV7MBm5FHKRlNx4I8I=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0 h1:Vh5HayB/0HHfOQA7Ctx69E/Y/DcQSMPpKANYVMQ7fBA=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.33.0/go.mod h1:cpgtDBaqD/6ok/UG0jT15/uKjAY8mRA53diogHBg3UI=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0 h1:5pojmb1U1AogINhN3SurB+zm/nIcusopeBNp42f45QM=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.33.0/go.mod h1:57gTHJSE5S1tqg+EKsLPlTWhpHMsWlVmer+LA926XiA=
go.opentelemetry.io/otel/metric v1.33.0 h1:r+JOocAyeRVXD8lZpjdQjzMadVZp2M4WmQ+5WtEnklQ=
go.opentelemetry.io/otel/metric v1.33.0/go.mod h1:L9+Fyctbp6HFTddIxClbQkjtubW6O9QS3Ann/M82u6M=
go.opentelemetry.io/otel/sdk v1.33.0 h1:iax7M131HuAm9QkZotNHEfstof92xM+N8sr3uHXc2IM=
go.opentelemetry.io/otel/sdk v1.33.0/go.mod h1:A1Q5oi7/9XaMlIWzPSxLRWOI8nG3FnzHJNbiENQuihM=
go.opentelemetry.io/otel/trace v1.33.0 h1:cCJuF7LRjUFso9LPnEAHJDB2pqzp+hbO8eu1qqW2d/s=
go.opentelemetry.io/otel/trace v1.33.0/go.mod h1:uIcdVUZMpTAmz0tI1z04GoVSezK37CbGV4fr1f2nBck=
go.opentelemetry.io/proto/otlp v1.4.0 h1:TA9WRvW6zMwP+Ssb6fLoUIuirti1gGbP28GcKG1jgeg=
go.opentelemetry.io/proto/otlp v1.4.0/go.mod h1:PPBWZIP98o2ElSqI35IHfu7hIhSwvc5N38Jw8pXuGFY=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
Expand Down Expand Up @@ -168,6 +195,8 @@ golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576 h1:CkkIfIt50+lT6NHAVoRYEyAvQGFM7xEwXUUywFvEb3Q=
google.golang.org/genproto/googleapis/api v0.0.0-20241209162323-e6fa225c2576/go.mod h1:1R3kvZ1dtP3+4p4d3G8uJ8rFk/fWlScl38vanWACI08=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576 h1:8ZmaLZE4XWrtU3MyClkYqqtl6Oegr3235h7jxsDyqCY=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241209162323-e6fa225c2576/go.mod h1:5uTbfoYQed2U9p3KIj2/Zzm02PYhndfdmML0qC3q3FU=
google.golang.org/grpc v1.68.1 h1:oI5oTa11+ng8r8XMMN7jAOmWfPZWbYpCFaMUTACxkM0=
Expand Down
141 changes: 137 additions & 4 deletions pkg/kvcache/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,15 @@
"context"
"fmt"

"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/trace"
"k8s.io/apimachinery/pkg/util/sets"
"sigs.k8s.io/controller-runtime/pkg/log"

"github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvblock"
preprocessing "github.com/llm-d/llm-d-kv-cache-manager/pkg/preprocessing/chat_completions"
"github.com/llm-d/llm-d-kv-cache-manager/pkg/telemetry"
"github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization"
"github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization/prefixstore"
"github.com/llm-d/llm-d-kv-cache-manager/pkg/utils/logging"
Expand Down Expand Up @@ -132,6 +136,20 @@
func (k *Indexer) GetPodScores(ctx context.Context, renderReq *preprocessing.RenderJinjaTemplateRequest, prompt, modelName string,
podIdentifiers []string,
) (map[string]float64, error) {
// Start tracing span for main operation
tracer := telemetry.Tracer()
ctx, span := tracer.Start(ctx, "llm_d.kv_cache_manager.get_scores",
trace.WithSpanKind(trace.SpanKindServer),
)
defer span.End()

// Set initial attributes
span.SetAttributes(
attribute.String("gen_ai.request.model", modelName),
attribute.Int("llm_d.kv_cache_manager.pod_count", len(podIdentifiers)),
attribute.StringSlice("llm_d.kv_cache_manager.considered_pods", podIdentifiers),
)

traceLogger := log.FromContext(ctx).V(logging.TRACE).WithName("kvcache.GetPodScores")

// 1. tokenize prompt
Expand All @@ -141,30 +159,145 @@
blockKeys := k.tokensProcessor.TokensToKVBlockKeys(tokens, modelName)
if len(blockKeys) == 0 {
traceLogger.Info("no block keys found, returning empty scores")
span.SetAttributes(attribute.Int("llm_d.kv_cache_manager.block_keys.count", 0))
span.SetStatus(codes.Ok, "")
//nolint:nilnil // no need to return an error
return nil, nil
}

span.SetAttributes(attribute.Int("llm_d.kv_cache_manager.block_keys.count", len(blockKeys)))
traceLogger.Info("found tokens", "tokens", tokens, "block-keys", blockKeys)

// 3. query kvblock indexer for pods
keyToPods, err := k.kvBlockIndex.Lookup(ctx, blockKeys, sets.New(podIdentifiers...))
// 3. query kvblock indexer for pods (with child span)
keyToPods, err := k.lookupWithSpan(ctx, blockKeys, sets.New(podIdentifiers...))
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
return nil, fmt.Errorf("failed to query kvblock indexer: %w", err)
}
traceLogger.Info("found block keys", "block-keys", blockKeys,
"pods", podsPerKeyPrintHelper(keyToPods))

// 4. score pods
podScores, err := k.kvBlockScorer.Score(blockKeys, keyToPods)
// Calculate total blocks available
totalBlocksAvailable := 0
for _, pods := range keyToPods {
totalBlocksAvailable += len(pods)
}
span.SetAttributes(attribute.Int("llm_d.kv_cache_manager.total_blocks_available", totalBlocksAvailable))

// 4. score pods (with child span)
podScores, err := k.scoreWithSpan(ctx, blockKeys, keyToPods)
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
return nil, fmt.Errorf("failed to query kvblock scorer: %w", err)
}
traceLogger.Info("found pod scores", "pod-scores", podScores)

// Calculate hit ratio (pods with non-zero scores / total pods)
podsWithHits := 0
podsWithHitsList := []string{}
for pod, score := range podScores {
if score > 0 {
podsWithHits++
podsWithHitsList = append(podsWithHitsList, pod)
}
}
hitRatio := 0.0
if len(podIdentifiers) > 0 {
hitRatio = float64(podsWithHits) / float64(len(podIdentifiers))
}
span.SetAttributes(
attribute.Float64("llm_d.kv_cache_manager.hit_ratio", hitRatio),
attribute.Int("llm_d.kv_cache_manager.pods_with_hits", podsWithHits),
attribute.StringSlice("llm_d.kv_cache_manager.pods_with_hits_list", podsWithHitsList),
)

span.SetStatus(codes.Ok, "")
return podScores, nil
}

// lookupWithSpan wraps kvBlockIndex.Lookup with a tracing span

Check failure on line 220 in pkg/kvcache/indexer.go

View workflow job for this annotation

GitHub Actions / lint-and-test

Comment should end in a period (godot)
func (k *Indexer) lookupWithSpan(ctx context.Context, blockKeys []kvblock.Key, podSet sets.Set[string]) (map[kvblock.Key][]kvblock.PodEntry, error) {

Check failure on line 221 in pkg/kvcache/indexer.go

View workflow job for this annotation

GitHub Actions / lint-and-test

The line is 149 characters long, which exceeds the maximum of 130 characters. (lll)
tracer := telemetry.Tracer()
ctx, span := tracer.Start(ctx, "llm_d.kv_cache_manager.storage.lookup",
trace.WithSpanKind(trace.SpanKindInternal),
)
defer span.End()

span.SetAttributes(
attribute.Int("llm_d.kv_cache_manager.lookup.block_count", len(blockKeys)),
attribute.Int("llm_d.kv_cache_manager.lookup.pod_filter_count", podSet.Len()),
)

result, err := k.kvBlockIndex.Lookup(ctx, blockKeys, podSet)
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
return nil, err
}

// Calculate cache hit metrics
blocksFound := 0
for _, pods := range result {
if len(pods) > 0 {
blocksFound++
}
}
cacheHit := blocksFound > 0

span.SetAttributes(
attribute.Bool("llm_d.kv_cache_manager.lookup.cache_hit", cacheHit),
attribute.Int("llm_d.kv_cache_manager.lookup.blocks_found", blocksFound),
)

span.SetStatus(codes.Ok, "")
return result, nil
}

// scoreWithSpan wraps kvBlockScorer.Score with a tracing span

Check failure on line 258 in pkg/kvcache/indexer.go

View workflow job for this annotation

GitHub Actions / lint-and-test

Comment should end in a period (godot)
func (k *Indexer) scoreWithSpan(ctx context.Context, keys []kvblock.Key, keyToPods map[kvblock.Key][]kvblock.PodEntry) (map[string]float64, error) {

Check failure on line 259 in pkg/kvcache/indexer.go

View workflow job for this annotation

GitHub Actions / lint-and-test

The line is 148 characters long, which exceeds the maximum of 130 characters. (lll)
tracer := telemetry.Tracer()
ctx, span := tracer.Start(ctx, "llm_d.kv_cache_manager.scorer.compute",

Check failure on line 261 in pkg/kvcache/indexer.go

View workflow job for this annotation

GitHub Actions / lint-and-test

SA4006: this value of ctx is never used (staticcheck)

Check failure on line 261 in pkg/kvcache/indexer.go

View workflow job for this annotation

GitHub Actions / lint-and-test

ineffectual assignment to ctx (ineffassign)
trace.WithSpanKind(trace.SpanKindInternal),
)
defer span.End()

span.SetAttributes(
attribute.String("llm_d.kv_cache_manager.scorer.algorithm", string(k.kvBlockScorer.Strategy())),
attribute.Int("llm_d.kv_cache_manager.scorer.key_count", len(keys)),
)

scores, err := k.kvBlockScorer.Score(keys, keyToPods)
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
return nil, err
}

// Calculate score distribution
if len(scores) > 0 {
maxScore := 0.0
totalScore := 0.0
for _, score := range scores {
if score > maxScore {
maxScore = score
}
totalScore += score
}
avgScore := totalScore / float64(len(scores))

span.SetAttributes(
attribute.Float64("llm_d.kv_cache_manager.score.max", maxScore),
attribute.Float64("llm_d.kv_cache_manager.score.avg", avgScore),
attribute.Int("llm_d.kv_cache_manager.scorer.pods_scored", len(scores)),
)
}

span.SetStatus(codes.Ok, "")
return scores, nil
}

// podsPerKeyPrintHelper formats a map of keys to pod entries for printing.
func podsPerKeyPrintHelper(ks map[kvblock.Key][]kvblock.PodEntry) string {
flattened := ""
Expand Down
Loading
Loading