Skip to content

Commit 73f363e

Browse files
committed
WIP
1 parent a5aebdd commit 73f363e

File tree

2 files changed

+56
-0
lines changed

2 files changed

+56
-0
lines changed

src/cpp/src/continuous_batching/cache_eviction.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,7 @@ namespace ov::genai {
406406
std::priority_queue<ScoreAndBlockIdx> score_block_queue;
407407
double total_sum = 0.0;
408408
for (size_t i = 0; i < evictable_area_block_scores.size(); i++) {
409+
total_sum += evictable_area_block_scores[i];
409410
score_block_queue.push({evictable_area_block_scores[i], i});
410411
}
411412

tests/cpp/cache_eviction.cpp

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -978,6 +978,61 @@ INSTANTIATE_TEST_SUITE_P(VariousSetsOfLowScoreBlocks, CacheEvictionLowScoreBlock
978978
return info.param.test_id;
979979
});
980980

981+
struct CacheEvictionAdaptiveRKVLowScoreAndSimilarityTestStruct {
982+
std::string test_id;
983+
size_t tokens_over_max_cache_size;
984+
ov::genai::AdaptiveRKVConfig adaptive_rkv_config;
985+
std::vector<float> evictable_area_token_scores;
986+
std::vector<float> evictable_area_token_similarity;
987+
std::set<size_t> ref_evicted_blocks;
988+
};
989+
990+
using CacheEvictionAdaptiveRKVLowScoreAndSimilarityParameterizedTest = ::testing::TestWithParam<CacheEvictionAdaptiveRKVLowScoreAndSimilarityTestStruct>;
991+
992+
// clang-format off
993+
const std::vector<CacheEvictionAdaptiveRKVLowScoreAndSimilarityTestStruct> ADAPTIVE_RKV_LOW_SCORE_AND_SIMILARITY_EVICTION_TEST_CASES = {
994+
// Expecting `max_cache_size - start_area - evictable_area equal` to 3 blocks, block size of 2
995+
// same, but with multiple blocks in evictable area
996+
{
997+
"three_blocks",
998+
2 * 4 + 2, // 2 blocks worth of overflow + 2 tokens, amounting to 3 blocks to be evicted
999+
ov::genai::AdaptiveRKVConfig(0.9, 1),
1000+
{999.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0},
1001+
{10.0, 11.0, 0.5, 0.1, 18.0, 19.0, 0.2, 0.4, 23.1, 24.2, 19.8, 18.7},
1002+
{2, 4}
1003+
}
1004+
};
1005+
// clang-format on
1006+
1007+
TEST_P(CacheEvictionAdaptiveRKVLowScoreAndSimilarityParameterizedTest, EvictsLowestScoredBlocksAndKeepsDiverse) {
1008+
auto test_struct = GetParam();
1009+
size_t num_decoder_layers = DEFAULT_NUM_DECODER_LAYERS;
1010+
auto algo = ov::genai::CacheEvictionAlgorithm(ov::genai::CacheEvictionConfig(2, 2, 6, ov::genai::AggregationMode::ADAPTIVE_RKV, /* apply_rotation = */ false, /* snapkv_window_size = */ 0), 2, num_decoder_layers, /* max_pool_window_size = */ 1);
1011+
1012+
auto scores = get_mock_scores(num_decoder_layers, algo.get_max_cache_size_after_eviction() + test_struct.tokens_over_max_cache_size);
1013+
for (size_t layer_idx = 0; layer_idx < num_decoder_layers; layer_idx++) {
1014+
auto& scores_per_layer = scores[layer_idx];
1015+
fill_scores(scores_per_layer, 0, scores_per_layer.get_size(), 1.0);
1016+
for (size_t evictable_area_tok_idx = 0; evictable_area_tok_idx < test_struct.evictable_area_token_scores.size(); evictable_area_tok_idx++) {
1017+
scores_per_layer.data<float>()[2 + evictable_area_tok_idx] = test_struct.evictable_area_token_scores[evictable_area_tok_idx];
1018+
}
1019+
}
1020+
algo.register_new_token_scores(scores);
1021+
auto similarity = std::vector<std::vector<float>>(DEFAULT_NUM_DECODER_LAYERS, test_struct.evictable_area_token_similarity);
1022+
algo.register_token_similarity(get_layer_scores_from_2d_vector(similarity));
1023+
1024+
auto test_evicted_blocks = algo.evict_logical_blocks();
1025+
auto ref_evicted_blocks = test_struct.ref_evicted_blocks;
1026+
for (size_t layer_idx = 0; layer_idx < num_decoder_layers; layer_idx++) {
1027+
EXPECT_EQ(test_evicted_blocks[layer_idx], ref_evicted_blocks);
1028+
}
1029+
}
1030+
1031+
INSTANTIATE_TEST_SUITE_P(VariousSetsOfLowScoreAndDiverseBlocks, CacheEvictionAdaptiveRKVLowScoreAndSimilarityParameterizedTest,
1032+
::testing::ValuesIn(ADAPTIVE_RKV_LOW_SCORE_AND_SIMILARITY_EVICTION_TEST_CASES),
1033+
[](const testing::TestParamInfo<CacheEvictionAdaptiveRKVLowScoreAndSimilarityParameterizedTest::ParamType>& info) {
1034+
return info.param.test_id;
1035+
});
9811036

9821037
static constexpr size_t BLOCKS_TO_EVICT = 3; // 3 blocks to evict
9831038
struct NormalizationSettingTestStruct {

0 commit comments

Comments
 (0)