Skip to content

Commit 14f34cd

Browse files
cryo-zdrootfs
andauthored
fix: keep memory cache metrics accurate (#372)
* fix: keep memory cache metrics accurate Signed-off-by: cryo <[email protected]> * test: add test for metrics fix for UpdateWithResponse Signed-off-by: cryo <[email protected]> --------- Signed-off-by: cryo <[email protected]> Co-authored-by: Huamin Chen <[email protected]>
1 parent ee7ca36 commit 14f34cd

File tree

3 files changed

+54
-14
lines changed

3 files changed

+54
-14
lines changed

src/semantic-router/go.mod

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ require (
5959
github.com/json-iterator/go v1.1.12 // indirect
6060
github.com/kr/pretty v0.3.1 // indirect
6161
github.com/kr/text v0.2.0 // indirect
62+
github.com/kylelemons/godebug v1.1.0 // indirect
6263
github.com/milvus-io/milvus-proto/go-api/v2 v2.4.10-0.20240819025435-512e3b98866a // indirect
6364
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
6465
github.com/modern-go/reflect2 v1.0.2 // indirect

src/semantic-router/pkg/cache/cache_test.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,13 @@ import (
55
"path/filepath"
66
"strings"
77
"testing"
8+
"time"
89

910
candle_binding "github.com/vllm-project/semantic-router/candle-binding"
1011
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/cache"
12+
"github.com/vllm-project/semantic-router/src/semantic-router/pkg/metrics"
13+
14+
"github.com/prometheus/client_golang/prometheus/testutil"
1115

1216
. "github.com/onsi/ginkgo/v2"
1317
. "github.com/onsi/gomega"
@@ -501,6 +505,32 @@ development:
501505
Expect(response).To(Equal([]byte("response")))
502506
})
503507

508+
It("should update cache entries metric when cleanup occurs during UpdateWithResponse", func() {
509+
// Reset gauge defensively so the assertion stands alone even if other specs fail early
510+
metrics.UpdateCacheEntries("memory", 0)
511+
512+
Expect(inMemoryCache.Close()).NotTo(HaveOccurred())
513+
inMemoryCache = cache.NewInMemoryCache(cache.InMemoryCacheOptions{
514+
Enabled: true,
515+
SimilarityThreshold: 0.8,
516+
MaxEntries: 100,
517+
TTLSeconds: 1,
518+
})
519+
520+
err := inMemoryCache.AddPendingRequest("expired-request-id", "test-model", "stale query", []byte("request"))
521+
Expect(err).NotTo(HaveOccurred())
522+
Expect(testutil.ToFloat64(metrics.CacheEntriesTotal.WithLabelValues("memory"))).To(Equal(float64(1)))
523+
524+
// Wait for TTL to expire before triggering the update path
525+
time.Sleep(2 * time.Second)
526+
527+
err = inMemoryCache.UpdateWithResponse("expired-request-id", []byte("response"))
528+
Expect(err).To(HaveOccurred())
529+
Expect(err.Error()).To(ContainSubstring("no pending request"))
530+
531+
Expect(testutil.ToFloat64(metrics.CacheEntriesTotal.WithLabelValues("memory"))).To(BeZero())
532+
})
533+
504534
It("should respect similarity threshold", func() {
505535
// Add entry with a very high similarity threshold
506536
highThresholdOptions := cache.InMemoryCacheOptions{

src/semantic-router/pkg/cache/inmemory_cache.go

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,10 @@ func (c *InMemoryCache) Close() error {
324324

325325
// Clear all entries to free memory
326326
c.entries = nil
327+
328+
// Zero cache entries metrics
329+
metrics.UpdateCacheEntries("memory", 0)
330+
327331
return nil
328332
}
329333

@@ -355,7 +359,7 @@ func (c *InMemoryCache) GetStats() CacheStats {
355359
return stats
356360
}
357361

358-
// cleanupExpiredEntries removes entries that have exceeded their TTL
362+
// cleanupExpiredEntries removes entries that have exceeded their TTL and updates the cache entry count metric to keep metrics in sync.
359363
// Caller must hold a write lock
360364
func (c *InMemoryCache) cleanupExpiredEntries() {
361365
if c.ttlSeconds <= 0 {
@@ -372,20 +376,25 @@ func (c *InMemoryCache) cleanupExpiredEntries() {
372376
}
373377
}
374378

375-
if len(validEntries) < len(c.entries) {
376-
expiredCount := len(c.entries) - len(validEntries)
377-
observability.Debugf("InMemoryCache: TTL cleanup removed %d expired entries (remaining: %d)",
378-
expiredCount, len(validEntries))
379-
observability.LogEvent("cache_cleanup", map[string]interface{}{
380-
"backend": "memory",
381-
"expired_count": expiredCount,
382-
"remaining_count": len(validEntries),
383-
"ttl_seconds": c.ttlSeconds,
384-
})
385-
c.entries = validEntries
386-
cleanupTime := time.Now()
387-
c.lastCleanupTime = &cleanupTime
379+
if len(validEntries) == len(c.entries) {
380+
return
388381
}
382+
383+
expiredCount := len(c.entries) - len(validEntries)
384+
observability.Debugf("InMemoryCache: TTL cleanup removed %d expired entries (remaining: %d)",
385+
expiredCount, len(validEntries))
386+
observability.LogEvent("cache_cleanup", map[string]interface{}{
387+
"backend": "memory",
388+
"expired_count": expiredCount,
389+
"remaining_count": len(validEntries),
390+
"ttl_seconds": c.ttlSeconds,
391+
})
392+
c.entries = validEntries
393+
cleanupTime := time.Now()
394+
c.lastCleanupTime = &cleanupTime
395+
396+
// Update metrics after cleanup
397+
metrics.UpdateCacheEntries("memory", len(c.entries))
389398
}
390399

391400
// cleanupExpiredEntriesReadOnly identifies expired entries without modifying the cache

0 commit comments

Comments
 (0)