From 82a85df32865064b80992793c7320884a4ab7b33 Mon Sep 17 00:00:00 2001
From: Sage Ahrac <sagiahrak@gmail.com>
Date: Mon, 25 Aug 2025 14:09:59 +0300
Subject: [PATCH 1/3] Reuse block hasher logic

Signed-off-by: Sage Ahrac <sagiahrak@gmail.com>
---
 pkg/tokenization/prefixstore/block_hasher.go | 78 ++++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 pkg/tokenization/prefixstore/block_hasher.go

diff --git a/pkg/tokenization/prefixstore/block_hasher.go b/pkg/tokenization/prefixstore/block_hasher.go
new file mode 100644
index 00000000..147bece2
--- /dev/null
+++ b/pkg/tokenization/prefixstore/block_hasher.go
@@ -0,0 +1,78 @@
+/*
+Copyright 2025 The llm-d Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package prefixstore
+
+import (
+	"encoding/binary"
+	"fmt"
+
+	"github.com/cespare/xxhash/v2"
+)
+
+// BlockHasher handles the computation of block hashes for chunked text data.
+// It maintains state for sequential hash computation where each block's hash
+// depends on the previous block's hash.
+type BlockHasher struct {
+	digest       *xxhash.Digest
+	previousHash uint64
+	blockSize    int
+}
+
+// NewBlockHasher creates a new BlockHasher with the specified block size.
+func NewBlockHasher(blockSize int) *BlockHasher {
+	return &BlockHasher{
+		digest:       xxhash.New(),
+		previousHash: 0,
+		blockSize:    blockSize,
+	}
+}
+
+// Reset resets the hasher state for a new sequence of blocks.
+func (h *BlockHasher) Reset() {
+	h.digest.Reset()
+}
+
+// ComputeBlockHash computes the hash for a block of text data.
+// The hash depends on both the current block content and the previous block's hash.
+func (h *BlockHasher) ComputeBlockHash(data []byte) (uint64, error) {
+	h.digest.Reset()
+
+	// Include previous hash to create a chain
+	if err := binary.Write(h.digest, binary.LittleEndian, h.previousHash); err != nil {
+		return 0, fmt.Errorf("failed to write previous hash: %w", err)
+	}
+
+	// Include current block data
+	if _, err := h.digest.Write(data); err != nil {
+		return 0, fmt.Errorf("failed to write block data: %w", err)
+	}
+
+	blockHash := h.digest.Sum64()
+	h.previousHash = blockHash
+
+	return blockHash, nil
+}
+
+// GetPreviousHash returns the current previous hash value.
+func (h *BlockHasher) GetPreviousHash() uint64 {
+	return h.previousHash
+}
+
+// GetBlockSize returns the configured block size.
+func (h *BlockHasher) GetBlockSize() int {
+	return h.blockSize
+}

From 243d6854dae2ef2e6d2ecf1b83f24a15243f92a7 Mon Sep 17 00:00:00 2001
From: Sage Ahrac <sagiahrak@gmail.com>
Date: Mon, 25 Aug 2025 14:15:37 +0300
Subject: [PATCH 2/3] update lru prefix store

Signed-off-by: Sage Ahrac <sagiahrak@gmail.com>
---
 pkg/tokenization/prefixstore/lru_store.go | 30 ++++++-----------------
 1 file changed, 7 insertions(+), 23 deletions(-)

diff --git a/pkg/tokenization/prefixstore/lru_store.go b/pkg/tokenization/prefixstore/lru_store.go
index b4cf8b36..42ef3e17 100644
--- a/pkg/tokenization/prefixstore/lru_store.go
+++ b/pkg/tokenization/prefixstore/lru_store.go
@@ -17,11 +17,9 @@ limitations under the License.
 package prefixstore
 
 import (
-	"encoding/binary"
 	"fmt"
 	"sync"
 
-	"github.com/cespare/xxhash/v2"
 	"github.com/daulet/tokenizers"
 	lru "github.com/hashicorp/golang-lru/v2"
 )
@@ -109,8 +107,7 @@ func (c *LRUTokenStore) AddTokenization(modelName string, prompt string, tokens
 
 	promptBytes := []byte(prompt)
 	tokenIdxIterator := 0
-	previousHash := uint64(0)
-	digest := xxhash.New()
+	hasher := NewBlockHasher(c.blockSize)
 
 	// Chunk the text into blocks and populate the cache
 	for start := 0; start < len(promptBytes); start += c.blockSize {
@@ -120,17 +117,11 @@ func (c *LRUTokenStore) AddTokenization(modelName string, prompt string, tokens
 		}
 
 		// Compute the hash for the current block
-		digest.Reset()
-		if err := binary.Write(digest, binary.LittleEndian, previousHash); err != nil {
-			return fmt.Errorf("failed to add token: %w", err)
-		}
-		if _, err := digest.Write(promptBytes[start:end]); err != nil {
-			return fmt.Errorf("failed to add token: %w", err)
+		blockHash, err := hasher.ComputeBlockHash(promptBytes[start:end])
+		if err != nil {
+			return fmt.Errorf("failed to compute block hash: %w", err)
 		}
 
-		blockHash := digest.Sum64()
-		previousHash = blockHash
-
 		// Only add tokens with [_, high] offset associated with the chunk range.
 		// If a token's [low, _] index is less than the start, it is OK as long as
 		// the above condition is satisfied.
@@ -165,8 +156,7 @@ func (c *LRUTokenStore) FindLongestContainedTokens(prompt, modelName string) []u
 	containedTokens := []uint32{}
 
 	promptBytes := []byte(prompt)
-	previousHash := uint64(0)
-	digest := xxhash.New()
+	hasher := NewBlockHasher(c.blockSize)
 
 	// Chunk the text into blocks and populate the cache
 	for i := 0; i < len(promptBytes); i += c.blockSize {
@@ -176,17 +166,11 @@ func (c *LRUTokenStore) FindLongestContainedTokens(prompt, modelName string) []u
 		}
 
 		// Compute the hash for the current block
-		digest.Reset()
-		if err := binary.Write(digest, binary.LittleEndian, previousHash); err != nil {
-			break
-		}
-		if _, err := digest.Write(promptBytes[i:end]); err != nil {
+		blockHash, err := hasher.ComputeBlockHash(promptBytes[i:end])
+		if err != nil {
 			break
 		}
 
-		blockHash := digest.Sum64()
-		previousHash = blockHash
-
 		block, ok := cache.Get(blockHash)
 		if !ok {
 			break // early-stop

From b0fc5de978224c05e77b16619cc6b5fe78028bcf Mon Sep 17 00:00:00 2001
From: Sage Ahrac <sagiahrak@gmail.com>
Date: Mon, 25 Aug 2025 19:59:44 +0300
Subject: [PATCH 3/3] detach block size from hasher

Signed-off-by: Sage Ahrac <sagiahrak@gmail.com>
---
 pkg/tokenization/prefixstore/block_hasher.go | 10 +---------
 pkg/tokenization/prefixstore/lru_store.go    |  4 ++--
 2 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/pkg/tokenization/prefixstore/block_hasher.go b/pkg/tokenization/prefixstore/block_hasher.go
index 147bece2..ff01b478 100644
--- a/pkg/tokenization/prefixstore/block_hasher.go
+++ b/pkg/tokenization/prefixstore/block_hasher.go
@@ -29,15 +29,12 @@ import (
 type BlockHasher struct {
 	digest       *xxhash.Digest
 	previousHash uint64
-	blockSize    int
 }
 
-// NewBlockHasher creates a new BlockHasher with the specified block size.
-func NewBlockHasher(blockSize int) *BlockHasher {
+func NewBlockHasher() *BlockHasher {
 	return &BlockHasher{
 		digest:       xxhash.New(),
 		previousHash: 0,
-		blockSize:    blockSize,
 	}
 }
 
@@ -71,8 +68,3 @@ func (h *BlockHasher) ComputeBlockHash(data []byte) (uint64, error) {
 func (h *BlockHasher) GetPreviousHash() uint64 {
 	return h.previousHash
 }
-
-// GetBlockSize returns the configured block size.
-func (h *BlockHasher) GetBlockSize() int {
-	return h.blockSize
-}
diff --git a/pkg/tokenization/prefixstore/lru_store.go b/pkg/tokenization/prefixstore/lru_store.go
index 42ef3e17..ae30601a 100644
--- a/pkg/tokenization/prefixstore/lru_store.go
+++ b/pkg/tokenization/prefixstore/lru_store.go
@@ -107,7 +107,7 @@ func (c *LRUTokenStore) AddTokenization(modelName string, prompt string, tokens
 
 	promptBytes := []byte(prompt)
 	tokenIdxIterator := 0
-	hasher := NewBlockHasher(c.blockSize)
+	hasher := NewBlockHasher()
 
 	// Chunk the text into blocks and populate the cache
 	for start := 0; start < len(promptBytes); start += c.blockSize {
@@ -156,7 +156,7 @@ func (c *LRUTokenStore) FindLongestContainedTokens(prompt, modelName string) []u
 	containedTokens := []uint32{}
 
 	promptBytes := []byte(prompt)
-	hasher := NewBlockHasher(c.blockSize)
+	hasher := NewBlockHasher()
 
 	// Chunk the text into blocks and populate the cache
 	for i := 0; i < len(promptBytes); i += c.blockSize {