@@ -978,6 +978,61 @@ INSTANTIATE_TEST_SUITE_P(VariousSetsOfLowScoreBlocks, CacheEvictionLowScoreBlock
978
978
return info.param .test_id ;
979
979
});
980
980
981
+ struct CacheEvictionAdaptiveRKVLowScoreAndSimilarityTestStruct {
982
+ std::string test_id;
983
+ size_t tokens_over_max_cache_size;
984
+ ov::genai::AdaptiveRKVConfig adaptive_rkv_config;
985
+ std::vector<float > evictable_area_token_scores;
986
+ std::vector<float > evictable_area_token_similarity;
987
+ std::set<size_t > ref_evicted_blocks;
988
+ };
989
+
990
+ using CacheEvictionAdaptiveRKVLowScoreAndSimilarityParameterizedTest = ::testing::TestWithParam<CacheEvictionAdaptiveRKVLowScoreAndSimilarityTestStruct>;
991
+
992
+ // clang-format off
993
+ const std::vector<CacheEvictionAdaptiveRKVLowScoreAndSimilarityTestStruct> ADAPTIVE_RKV_LOW_SCORE_AND_SIMILARITY_EVICTION_TEST_CASES = {
994
+ // Expecting `max_cache_size - start_area - evictable_area equal` to 3 blocks, block size of 2
995
+ // same, but with multiple blocks in evictable area
996
+ {
997
+ " three_blocks" ,
998
+ 2 * 4 + 2 , // 2 blocks worth of overflow + 2 tokens, amounting to 3 blocks to be evicted
999
+ ov::genai::AdaptiveRKVConfig (0.9 , 1 ),
1000
+ {999.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 , 0.0 },
1001
+ {10.0 , 11.0 , 0.5 , 0.1 , 18.0 , 19.0 , 0.2 , 0.4 , 23.1 , 24.2 , 19.8 , 18.7 },
1002
+ {2 , 4 }
1003
+ }
1004
+ };
1005
+ // clang-format on
1006
+
1007
+ TEST_P (CacheEvictionAdaptiveRKVLowScoreAndSimilarityParameterizedTest, EvictsLowestScoredBlocksAndKeepsDiverse) {
1008
+ auto test_struct = GetParam ();
1009
+ size_t num_decoder_layers = DEFAULT_NUM_DECODER_LAYERS;
1010
+ auto algo = ov::genai::CacheEvictionAlgorithm (ov::genai::CacheEvictionConfig (2 , 2 , 6 , ov::genai::AggregationMode::ADAPTIVE_RKV, /* apply_rotation = */ false , /* snapkv_window_size = */ 0 ), 2 , num_decoder_layers, /* max_pool_window_size = */ 1 );
1011
+
1012
+ auto scores = get_mock_scores (num_decoder_layers, algo.get_max_cache_size_after_eviction () + test_struct.tokens_over_max_cache_size );
1013
+ for (size_t layer_idx = 0 ; layer_idx < num_decoder_layers; layer_idx++) {
1014
+ auto & scores_per_layer = scores[layer_idx];
1015
+ fill_scores (scores_per_layer, 0 , scores_per_layer.get_size (), 1.0 );
1016
+ for (size_t evictable_area_tok_idx = 0 ; evictable_area_tok_idx < test_struct.evictable_area_token_scores .size (); evictable_area_tok_idx++) {
1017
+ scores_per_layer.data <float >()[2 + evictable_area_tok_idx] = test_struct.evictable_area_token_scores [evictable_area_tok_idx];
1018
+ }
1019
+ }
1020
+ algo.register_new_token_scores (scores);
1021
+ auto similarity = std::vector<std::vector<float >>(DEFAULT_NUM_DECODER_LAYERS, test_struct.evictable_area_token_similarity );
1022
+ algo.register_token_similarity (get_layer_scores_from_2d_vector (similarity));
1023
+
1024
+ auto test_evicted_blocks = algo.evict_logical_blocks ();
1025
+ auto ref_evicted_blocks = test_struct.ref_evicted_blocks ;
1026
+ for (size_t layer_idx = 0 ; layer_idx < num_decoder_layers; layer_idx++) {
1027
+ EXPECT_EQ (test_evicted_blocks[layer_idx], ref_evicted_blocks);
1028
+ }
1029
+ }
1030
+
1031
+ INSTANTIATE_TEST_SUITE_P (VariousSetsOfLowScoreAndDiverseBlocks, CacheEvictionAdaptiveRKVLowScoreAndSimilarityParameterizedTest,
1032
+ ::testing::ValuesIn (ADAPTIVE_RKV_LOW_SCORE_AND_SIMILARITY_EVICTION_TEST_CASES),
1033
+ [](const testing::TestParamInfo<CacheEvictionAdaptiveRKVLowScoreAndSimilarityParameterizedTest::ParamType>& info) {
1034
+ return info.param .test_id ;
1035
+ });
981
1036
982
1037
static constexpr size_t BLOCKS_TO_EVICT = 3 ; // 3 blocks to evict
983
1038
struct NormalizationSettingTestStruct {
0 commit comments