QiJune
diff --git a/‎benchmarks/cpp/disaggServerBenchmark.cpp‎
Lines changed: 2 additions & 1 deletion b/‎benchmarks/cpp/disaggServerBenchmark.cpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎benchmarks/cpp/gptManagerBenchmark.cpp‎
Lines changed: 2 additions & 1 deletion b/‎benchmarks/cpp/gptManagerBenchmark.cpp‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎cpp/include/tensorrt_llm/batch_manager/kvCacheManager.h‎
Lines changed: 6 additions & 3 deletions b/‎cpp/include/tensorrt_llm/batch_manager/kvCacheManager.h‎
Lines changed: 6 additions & 3 deletions
diff --git a/‎cpp/include/tensorrt_llm/batch_manager/llmRequest.h‎
Lines changed: 26 additions & 11 deletions b/‎cpp/include/tensorrt_llm/batch_manager/llmRequest.h‎
Lines changed: 26 additions & 11 deletions
diff --git a/‎cpp/include/tensorrt_llm/executor/executor.h‎
Lines changed: 5 additions & 2 deletions b/‎cpp/include/tensorrt_llm/executor/executor.h‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎cpp/include/tensorrt_llm/executor/types.h‎
Lines changed: 1 addition & 0 deletions b/‎cpp/include/tensorrt_llm/executor/types.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎cpp/include/tensorrt_llm/runtime/common.h‎
Lines changed: 1 addition & 0 deletions b/‎cpp/include/tensorrt_llm/runtime/common.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎cpp/tensorrt_llm/batch_manager/dataTransceiver.cpp‎
Lines changed: 40 additions & 30 deletions b/‎cpp/tensorrt_llm/batch_manager/dataTransceiver.cpp‎
Lines changed: 40 additions & 30 deletions
@@ -542,7 +542,8 @@ texec::Request makeExecutorContextRequest(Sample const& sample, SizeType32 const
             std::nullopt,    // kvCacheRetentionConfig
             std::nullopt,    // logitsPostProcessorName
             std::nullopt,    // logitsPostProcessor
-            encoderInputTokenIds.has_value() ? encoderInputTokenIds : std::nullopt);
+            encoderInputTokenIds.has_value() ? encoderInputTokenIds : std::nullopt,
+            std::nullopt);   // cacheSaltID
     request.setRequestType(tensorrt_llm::executor::RequestType::REQUEST_TYPE_CONTEXT_ONLY);
     return request;
 }
 
@@ -837,7 +837,8 @@ texec::Request makeExecutorRequest(Sample const& sample, SizeType32 const& beamW
         std::nullopt,    // kvCacheRetentionConfig
         std::nullopt,    // logitsPostProcessorName
         std::nullopt,    // logitsPostProcessor
-        encoderInputTokenIds.has_value() ? encoderInputTokenIds : std::nullopt);
+        encoderInputTokenIds.has_value() ? encoderInputTokenIds : std::nullopt,
+        std::nullopt);   // cacheSaltID
 }
 
 void benchmarkExecutor(std::optional<std::filesystem::path> const& decoderEngineDir,
 
@@ -69,6 +69,7 @@ using UniqueToken = tensorrt_llm::runtime::UniqueToken;
 using VecUniqueTokens = tensorrt_llm::runtime::VecUniqueTokens;
 using LoraTaskIdType = tensorrt_llm::runtime::LoraTaskIdType;
 using BlocksPerWindow = std::map<SizeType32, std::tuple<SizeType32, SizeType32>>;
+using CacheSaltIDType = tensorrt_llm::runtime::CacheSaltIDType;
 
 // Type alias for multimodal hash key (hash array + start offset)
 using MmKey = std::pair<std::array<uint8_t, 32>, SizeType32>;
@@ -115,6 +116,7 @@ struct BlockKey
     // Extra keys for multimodal data (similar to VLLM's approach)
     // Each extra key is a pair of (mm_hash, start_offset_in_block)
     std::vector<MmKey> extraKeys;
+    std::optional<CacheSaltIDType> cacheSaltID = std::nullopt;
 
     BlockKey() = default;
 
@@ -129,24 +131,25 @@ struct BlockKey
     }
 
     explicit BlockKey(bool usesExtraIds, std::optional<LoraTaskIdType> loraTaskId, VecUniqueTokens uniqueTokens,
-        std::vector<MmKey> extraKeys = {})
+        std::vector<MmKey> extraKeys = {}, std::optional<CacheSaltIDType> cacheSaltID = std::nullopt)
         : usesExtraIds{usesExtraIds}
         , loraTaskId{loraTaskId}
         , uniqueTokens{std::move(uniqueTokens)}
         , extraKeys{std::move(extraKeys)}
+        , cacheSaltID{cacheSaltID}
     {
     }
 
     bool operator==(BlockKey const& other) const noexcept
     {
         return (usesExtraIds == other.usesExtraIds && loraTaskId == other.loraTaskId
-            && uniqueTokens == other.uniqueTokens && extraKeys == other.extraKeys);
+            && uniqueTokens == other.uniqueTokens && extraKeys == other.extraKeys && cacheSaltID == other.cacheSaltID);
     }
 
     int partialMatch(BlockKey const& other) const noexcept
     {
         SizeType32 numMatched{0};
-        if (loraTaskId == other.loraTaskId && extraKeys == other.extraKeys)
+        if (loraTaskId == other.loraTaskId && extraKeys == other.extraKeys && cacheSaltID == other.cacheSaltID)
         {
             auto [matchEnd, otherMatchEnd] = std::mismatch(
                 uniqueTokens.begin(), uniqueTokens.end(), other.uniqueTokens.begin(), other.uniqueTokens.end());
 
@@ -100,8 +100,8 @@ class GenericLlmRequest
         RequestIdType, TensorPtr&, BeamTokens const&, TStream const&, std::optional<RequestIdType>)>;
     using RequestPtr = std::shared_ptr<GenericLlmRequest>;
     using MillisecondsType = std::chrono::milliseconds;
+    using CacheSaltIDType = runtime::CacheSaltIDType;
 
-    // 49 parameters, 56 items in initialization list
     GenericLlmRequest(RequestIdType requestId, SizeType32 maxNewTokens, std::shared_ptr<VecTokens> const& inputTokens,
         runtime::SamplingConfig const& samplingConfig, bool isStreaming, std::optional<SizeType32> endId = std::nullopt,
         std::optional<SizeType32> padId = std::nullopt, std::optional<TensorPtr> embeddingBias = std::nullopt,
@@ -137,7 +137,8 @@ class GenericLlmRequest
         std::optional<executor::GuidedDecodingParams> guidedDecodingParams = std::nullopt,
         std::optional<SizeType32> languageAdapterUid = std::nullopt,
         std::optional<MillisecondsType> allottedTimeMs = std::nullopt,
-        std::optional<executor::ContextPhaseParams> const& contextPhaseParams = std::nullopt)
+        std::optional<executor::ContextPhaseParams> const& contextPhaseParams = std::nullopt,
+        std::optional<CacheSaltIDType> cacheSaltID = std::nullopt)
         : mRequestId(requestId)
         , mPromptLen(inputTokens->size())
         , mMaxNewTokens(maxNewTokens)
@@ -194,6 +195,7 @@ class GenericLlmRequest
         , mGuidedDecodingParams(std::move(guidedDecodingParams))
         , mLanguageAdapterUid(languageAdapterUid)
         , mAllottedTimeMs(allottedTimeMs)
+        , mCacheSaltID(cacheSaltID)
     {
         if (mEncoderTokens.has_value() || encoderInputFeatures.has_value())
         {
@@ -203,7 +205,6 @@ class GenericLlmRequest
         initialize(*inputTokens, returnLogProbs);
     }
 
-    // 32 parameters, 39 items in initialization list
     GenericLlmRequest(RequestIdType requestId, SizeType32 maxNewTokens, VecTokens const& inputTokens,
         runtime::SamplingConfig const& samplingConfig, bool isStreaming, std::optional<SizeType32> endId = std::nullopt,
         std::optional<SizeType32> padId = std::nullopt, std::optional<TensorPtr> embeddingBias = std::nullopt,
@@ -221,7 +222,8 @@ class GenericLlmRequest
         bool returnEncoderOutput = false, std::optional<RequestIdType> clientId = std::nullopt,
         executor::PriorityType priority = executor::Request::kDefaultPriority, SizeType32 numReturnSequences = 1,
         std::optional<SizeType32> languageAdapterUid = std::nullopt,
-        std::optional<executor::ContextPhaseParams> const& contextPhaseParams = std::nullopt)
+        std::optional<executor::ContextPhaseParams> const& contextPhaseParams = std::nullopt,
+        std::optional<CacheSaltIDType> cacheSaltID = std::nullopt)
         : mRequestId(requestId)
         , mPromptLen(inputTokens.size())
         , mMaxNewTokens(maxNewTokens)
@@ -261,6 +263,7 @@ class GenericLlmRequest
         , mContextPhaseParams(contextPhaseParams)
         , mNumReturnSequences(numReturnSequences)
         , mLanguageAdapterUid(languageAdapterUid)
+        , mCacheSaltID(cacheSaltID)
     {
         if (mEncoderTokens.has_value())
         {
@@ -269,7 +272,6 @@ class GenericLlmRequest
         initialize(inputTokens, returnLogProbs);
     }
 
-    // 29 items in initialization list
     GenericLlmRequest(RequestIdType requestId, executor::Request const& req)
         : mRequestId(requestId)
         , mPromptLen(req.getInputTokenIds().size())
@@ -300,6 +302,7 @@ class GenericLlmRequest
         , mGuidedDecodingParams(req.getGuidedDecodingParams())
         , mLanguageAdapterUid(req.getLanguageAdapterUid())
         , mAllottedTimeMs(req.getAllottedTimeMs())
+        , mCacheSaltID(req.getCacheSaltID())
     {
         if (req.getRequestType() == executor::RequestType::REQUEST_TYPE_GENERATION_ONLY)
         {
@@ -1764,6 +1767,11 @@ class GenericLlmRequest
         return mLanguageAdapterUid;
     }
 
+    [[nodiscard]] std::optional<CacheSaltIDType> getCacheSaltID() const
+    {
+        return mCacheSaltID;
+    }
+
     std::vector<SizeType32> getLanguageAdapterRouting(
         SizeType32 const reqNumLanguages, SizeType32 const inputLength) const
     {
@@ -2042,6 +2050,9 @@ class GenericLlmRequest
 
     bool mUseDraftModel{false};
 
+    // Cache salt id for each request.
+    std::optional<CacheSaltIDType> mCacheSaltID{std::nullopt};
+
 private:
     void initialize(VecTokens const& inputTokens, bool outputLogProbs)
     {
@@ -2222,7 +2233,8 @@ class LlmRequest : public GenericLlmRequest<runtime::ITensor::SharedPtr>
         std::optional<executor::GuidedDecodingParams> guidedDecodingParams = std::nullopt,
         std::optional<SizeType32> languageAdapterUid = std::nullopt,
         std::optional<MillisecondsType> allottedTimeMs = std::nullopt,
-        std::optional<executor::ContextPhaseParams> const& contextPhaseParams = std::nullopt)
+        std::optional<executor::ContextPhaseParams> const& contextPhaseParams = std::nullopt,
+        std::optional<CacheSaltIDType> cacheSaltID = std::nullopt)
         : Base(requestId, maxNewTokens, std::move(inputTokens), samplingConfig, isStreaming, endId, padId,
             std::move(embeddingBias), std::move(badWordsList), std::move(stopWordsList), std::move(positionIds),
             std::move(promptEmbeddingTable), promptVocabSize, std::move(multimodalHashes),
@@ -2234,7 +2246,8 @@ class LlmRequest : public GenericLlmRequest<runtime::ITensor::SharedPtr>
             std::move(encoderInputTokens), returnEncoderOutput, clientId, priority, std::move(encoderInputFeatures),
             std::move(encoderOutputLength), std::move(crossAttentionMask), llmRequestType,
             std::move(inputTokenExtraIds), numReturnSequences, std::move(eagleConfig), std::move(skipCrossAttnBlocks),
-            returnPerfMetrics, std::move(guidedDecodingParams), languageAdapterUid, allottedTimeMs, contextPhaseParams)
+            returnPerfMetrics, std::move(guidedDecodingParams), languageAdapterUid, allottedTimeMs, contextPhaseParams,
+            cacheSaltID)
     {
     }
 
@@ -2272,7 +2285,8 @@ class LlmRequest : public GenericLlmRequest<runtime::ITensor::SharedPtr>
         std::optional<executor::GuidedDecodingParams> guidedDecodingParams = std::nullopt,
         std::optional<SizeType32> languageAdapterUid = std::nullopt,
         std::optional<MillisecondsType> allottedTimeMs = std::nullopt,
-        std::optional<executor::ContextPhaseParams> const& contextPhaseParams = std::nullopt)
+        std::optional<executor::ContextPhaseParams> const& contextPhaseParams = std::nullopt,
+        std::optional<CacheSaltIDType> cacheSaltID = std::nullopt)
         : Base(requestId, maxNewTokens, std::make_shared<std::vector<TokenIdType>>(std::move(inputTokens)),
             samplingConfig, isStreaming, endId, padId, std::move(embeddingBias), std::move(badWordsList),
             std::move(stopWordsList),
@@ -2302,7 +2316,7 @@ class LlmRequest : public GenericLlmRequest<runtime::ITensor::SharedPtr>
             inputTokenExtraIds ? std::make_optional(std::make_shared<VecTokenExtraIds>(std::move(*inputTokenExtraIds)))
                                : std::optional<std::shared_ptr<VecTokenExtraIds>>(std::nullopt),
             numReturnSequences, std::move(eagleConfig), skipCrossAttnBlocks, returnPerfMetrics,
-            std::move(guidedDecodingParams), languageAdapterUid, allottedTimeMs, contextPhaseParams)
+            std::move(guidedDecodingParams), languageAdapterUid, allottedTimeMs, contextPhaseParams, cacheSaltID)
     {
     }
 
@@ -2324,14 +2338,15 @@ class LlmRequest : public GenericLlmRequest<runtime::ITensor::SharedPtr>
         bool returnEncoderOutput = false, std::optional<RequestIdType> clientId = std::nullopt,
         executor::PriorityType priority = executor::Request::kDefaultPriority, SizeType32 numReturnSequences = 1,
         std::optional<SizeType32> languageAdapterUid = std::nullopt,
-        std::optional<executor::ContextPhaseParams> const& contextPhaseParams = std::nullopt)
+        std::optional<executor::ContextPhaseParams> const& contextPhaseParams = std::nullopt,
+        std::optional<CacheSaltIDType> cacheSaltID = std::nullopt)
         : Base(requestId, maxNewTokens, inputTokens, samplingConfig, isStreaming, endId, padId,
             std::move(embeddingBias), std::move(badWordsList), std::move(stopWordsList), std::move(positionIds),
             std::move(promptEmbeddingTable), promptVocabSize, loraTaskId, std::move(loraWeights), std::move(loraConfig),
             lookaheadConfig, returnLogProbs, returnContextLogits, returnGenerationLogits, std::move(draftTokens),
             std::move(draftLogits), excludeInputFromOutput, std::move(logitsPostProcessor),
             applyLogitsPostProcessorBatched, std::move(encoderInputTokens), returnEncoderOutput, clientId, priority,
-            numReturnSequences, languageAdapterUid, contextPhaseParams)
+            numReturnSequences, languageAdapterUid, contextPhaseParams, cacheSaltID)
     {
     }
 
 
@@ -670,7 +670,7 @@ class Request
     /// @param allottedTimeMs The allotted time in milliseconds after which the request is cancelled with a timedOut
     /// finish reason. The request may exceed this time slightly, but at most by 1 forward pass (in pipeline parallelism
     /// that may involve multiple micro-batches). A request can be timed-out before ever being scheduled.
-    // 34 parameters
+    /// @param cacheSaltID Salt ID for KV cache blocks to limit the kv cache reuse to the requests with the same string.
     Request(VecTokens inputTokenIds, SizeType32 maxTokens, bool streaming = false,
         SamplingConfig const& samplingConfig = SamplingConfig(), OutputConfig const& outputConfig = OutputConfig(),
         std::optional<SizeType32> const& endId = std::nullopt, std::optional<SizeType32> const& padId = std::nullopt,
@@ -697,7 +697,8 @@ class Request
         std::optional<EagleConfig> eagleConfig = std::nullopt, std::optional<Tensor> skipCrossAttnBlocks = std::nullopt,
         std::optional<GuidedDecodingParams> guidedDecodingParams = std::nullopt,
         std::optional<SizeType32> languageAdapterUid = std::nullopt,
-        std::optional<MillisecondsType> allottedTimeMs = std::nullopt);
+        std::optional<MillisecondsType> allottedTimeMs = std::nullopt,
+        std::optional<CacheSaltIDType> cacheSaltID = std::nullopt);
 
     /// @brief This logits postprocessor name will dispatch to the batched logits postprocessor
     static auto constexpr kBatchedPostProcessorName = "batched";
@@ -745,6 +746,7 @@ class Request
     [[nodiscard]] std::optional<GuidedDecodingParams> getGuidedDecodingParams() const;
     [[nodiscard]] std::optional<SizeType32> getLanguageAdapterUid() const;
     [[nodiscard]] std::optional<MillisecondsType> getAllottedTimeMs() const;
+    [[nodiscard]] std::optional<CacheSaltIDType> getCacheSaltID() const;
     [[nodiscard]] std::optional<std::vector<std::string>> getAdditionalOutputNames() const;
 
     void setStreaming(bool streaming);
@@ -780,6 +782,7 @@ class Request
     void setGuidedDecodingParams(GuidedDecodingParams const& guidedDecodingParams);
     void setLanguageAdapterUid(SizeType32 languageAdapterUid);
     void setAllottedTimeMs(MillisecondsType allottedTimeMs);
+    void setCacheSaltID(CacheSaltIDType cacheSaltID);
 
 private:
     friend class Serialization;
 
@@ -58,6 +58,7 @@ using RandomSeedType = std::uint64_t;
 using VecLogProbs = std::vector<FloatType>;
 using StreamPtr = std::shared_ptr<tensorrt_llm::runtime::CudaStream>;
 using MillisecondsType = std::chrono::milliseconds;
+using CacheSaltIDType = std::uint64_t;
 using LogitsPostProcessor
     = std::function<void(IdType, Tensor&, BeamTokens const&, StreamPtr const&, std::optional<IdType>)>;
 using LogitsPostProcessorMap = std::unordered_map<std::string, LogitsPostProcessor>;
 
@@ -44,6 +44,7 @@ using TokenIdType = std::int32_t;
 using LoraTaskIdType = std::uint64_t;
 using TokenExtraIdType = std::uint64_t;
 using VecTokenExtraIds = std::vector<TokenExtraIdType>;
+using CacheSaltIDType = std::uint64_t;
 
 struct UniqueToken
 {
 
@@ -204,6 +204,34 @@ class DataResponder::Impl
         }
     }
 
+    void sendResponse(std::vector<size_t> const& blockHashes, std::map<RequestIdType, Response>::iterator it)
+    {
+        auto reqId = mCurrentRequest.value();
+        auto count = --mRemainSendCount[reqId];
+        TLLM_CHECK(count >= 0);
+        if (count == 0)
+        {
+            mRemainSendCount.erase(reqId);
+
+            // TODO(zhengd): pass the hashes directly instead of update llmRequest
+            auto llmRequest = it->second.mRequest;
+            llmRequest->setRequestedBlockHashes(std::move(blockHashes));
+
+            if (common::getEnvParallelCacheSend())
+            {
+                // TODO: Use a thread pool and check for thread safety.
+                std::thread(&DataResponder::Impl::sendAndRemoveResponse, this, it->first, std::move(it->second))
+                    .detach();
+            }
+            else
+            {
+                DataResponder::Impl::sendAndRemoveResponse(it->first, std::move(it->second));
+            }
+            removeResponse(it);
+        }
+        mCurrentRequest = std::nullopt;
+    }
+
     void response() noexcept
     {
         try
@@ -237,40 +265,22 @@ class DataResponder::Impl
                 auto it = getCurrentResponse();
                 if (it != mReadyResponses.end())
                 {
-                    auto reqId = mCurrentRequest.value();
-                    auto count = --mRemainSendCount[reqId];
-                    TLLM_CHECK(count >= 0);
-                    if (count == 0)
+                    sendResponse(blockHashes, it);
+                }
+                else
+                {
+                    auto it = getCurrentResponse();
+                    while (it == mReadyResponses.end())
                     {
-                        mRemainSendCount.erase(reqId);
-
-                        // TODO(zhengd): pass the hashes directly instead of update llmRequest
-                        auto llmRequest = it->second.mRequest;
-                        llmRequest->setRequestedBlockHashes(std::move(blockHashes));
-
-                        if (common::getEnvParallelCacheSend())
-                        {
-                            // TODO: Use a thread pool and check for thread safety.
-                            std::thread(
-                                &DataResponder::Impl::sendAndRemoveResponse, this, it->first, std::move(it->second))
-                                .detach();
-                        }
-                        else
+                        std::unique_lock lk(mCondMutex);
+                        mResponderCv.wait(lk, [this]() { return (mAnyReady || mTerminate); });
+                        if (mTerminate)
                         {
-                            DataResponder::Impl::sendAndRemoveResponse(it->first, std::move(it->second));
+                            break;
                         }
-                        removeResponse(it);
+                        it = getCurrentResponse();
                     }
-                    mCurrentRequest = std::nullopt;
-                }
-                else
-                {
-                    TLLM_CHECK_WITH_INFO(!mCurrentRequest.has_value(),
-                        "This executor does not have a prepared KV cache for request ID: %zu, and the "
-                        "mReadyResponses size is: %zu. mpi rank :%d     ",
-                        mCurrentRequest.value(), mReadyResponses.size(), mpi::MpiComm::world().getRank());
-                    std::unique_lock lk(mCondMutex);
-                    mResponderCv.wait(lk, [this]() { return (mAnyReady || mTerminate); });
+                    sendResponse(blockHashes, it);
                 }
             }
         }
Original file line number	Diff line number	Diff line change
`@@ -837,7 +837,8 @@ texec::Request makeExecutorRequest(Sample const& sample, SizeType32 const& beamW`
`837`	`837`	`std::nullopt, // kvCacheRetentionConfig`
`838`	`838`	`std::nullopt, // logitsPostProcessorName`
`839`	`839`	`std::nullopt, // logitsPostProcessor`
`840`		`- encoderInputTokenIds.has_value() ? encoderInputTokenIds : std::nullopt);`
	`840`	`+ encoderInputTokenIds.has_value() ? encoderInputTokenIds : std::nullopt,`
	`841`	`+ std::nullopt); // cacheSaltID`
`841`	`842`	`}`
`842`	`843`
`843`	`844`	`void benchmarkExecutor(std::optional<std::filesystem::path> const& decoderEngineDir,`
Original file line number	Diff line number	Diff line change
`@@ -69,6 +69,7 @@ using UniqueToken = tensorrt_llm::runtime::UniqueToken;`
`69`	`69`	`using VecUniqueTokens = tensorrt_llm::runtime::VecUniqueTokens;`
`70`	`70`	`using LoraTaskIdType = tensorrt_llm::runtime::LoraTaskIdType;`
`71`	`71`	`using BlocksPerWindow = std::map<SizeType32, std::tuple<SizeType32, SizeType32>>;`
	`72`	`+using CacheSaltIDType = tensorrt_llm::runtime::CacheSaltIDType;`
`72`	`73`
`73`	`74`	`// Type alias for multimodal hash key (hash array + start offset)`
`74`	`75`	`using MmKey = std::pair<std::array<uint8_t, 32>, SizeType32>;`
`@@ -115,6 +116,7 @@ struct BlockKey`
`115`	`116`	`// Extra keys for multimodal data (similar to VLLM's approach)`
`116`	`117`	`// Each extra key is a pair of (mm_hash, start_offset_in_block)`
`117`	`118`	`std::vector<MmKey> extraKeys;`
	`119`	`+ std::optional<CacheSaltIDType> cacheSaltID = std::nullopt;`
`118`	`120`
`119`	`121`	`BlockKey() = default;`
`120`	`122`
`@@ -129,24 +131,25 @@ struct BlockKey`
`129`	`131`	`}`
`130`	`132`
`131`	`133`	`explicit BlockKey(bool usesExtraIds, std::optional<LoraTaskIdType> loraTaskId, VecUniqueTokens uniqueTokens,`
`132`		`- std::vector<MmKey> extraKeys = {})`
	`134`	`+ std::vector<MmKey> extraKeys = {}, std::optional<CacheSaltIDType> cacheSaltID = std::nullopt)`
`133`	`135`	`: usesExtraIds{usesExtraIds}`
`134`	`136`	`, loraTaskId{loraTaskId}`
`135`	`137`	`, uniqueTokens{std::move(uniqueTokens)}`
`136`	`138`	`, extraKeys{std::move(extraKeys)}`
	`139`	`+ , cacheSaltID{cacheSaltID}`
`137`	`140`	`{`
`138`	`141`	`}`
`139`	`142`
`140`	`143`	`bool operator==(BlockKey const& other) const noexcept`
`141`	`144`	`{`
`142`	`145`	`return (usesExtraIds == other.usesExtraIds && loraTaskId == other.loraTaskId`
`143`		`- && uniqueTokens == other.uniqueTokens && extraKeys == other.extraKeys);`
	`146`	`+ && uniqueTokens == other.uniqueTokens && extraKeys == other.extraKeys && cacheSaltID == other.cacheSaltID);`
`144`	`147`	`}`
`145`	`148`
`146`	`149`	`int partialMatch(BlockKey const& other) const noexcept`
`147`	`150`	`{`
`148`	`151`	`SizeType32 numMatched{0};`
`149`		`- if (loraTaskId == other.loraTaskId && extraKeys == other.extraKeys)`
	`152`	`+ if (loraTaskId == other.loraTaskId && extraKeys == other.extraKeys && cacheSaltID == other.cacheSaltID)`
`150`	`153`	`{`
`151`	`154`	`auto [matchEnd, otherMatchEnd] = std::mismatch(`
`152`	`155`	`uniqueTokens.begin(), uniqueTokens.end(), other.uniqueTokens.begin(), other.uniqueTokens.end());`
Original file line number	Diff line number	Diff line change
`@@ -44,6 +44,7 @@ using TokenIdType = std::int32_t;`
`44`	`44`	`using LoraTaskIdType = std::uint64_t;`
`45`	`45`	`using TokenExtraIdType = std::uint64_t;`
`46`	`46`	`using VecTokenExtraIds = std::vector<TokenExtraIdType>;`
	`47`	`+using CacheSaltIDType = std::uint64_t;`
`47`	`48`
`48`	`49`	`struct UniqueToken`
`49`	`50`	`{`