Skip to content

Commit 92f191d

Browse files
committed
move test to precommit scope
1 parent e890901 commit 92f191d

File tree

1 file changed

+17
-12
lines changed

1 file changed

+17
-12
lines changed

tests/python_tests/test_kv_cache_eviction.py

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -268,36 +268,41 @@ def test_optimized_generation_longbench(test_struct):
268268
assert avg_optimization_ratio >= test_struct.avg_cache_usage_optimization_ratio
269269

270270

271-
MILEBENCH_CACHE_EVICTION_CONFIG = CacheEvictionConfig(start_size=32, recent_size=64, max_cache_size=352, aggregation_mode=AggregationMode.SUM)
272-
273-
@pytest.mark.nightly
274-
@pytest.mark.parametrize("device", ["CPU", "GPU"])
271+
@pytest.mark.precommit
275272
@pytest.mark.parametrize(
276273
("test_struct", "download_test_content"), [
277-
(BenchmarkTestData("ALFRED", 0.011, 1.440, 1.574), "MileBench_part0.tar.gz"),
278-
(BenchmarkTestData("MMCoQA", 0.032, 1.843, 1.620), "MileBench_part2.tar.gz"),
279-
(BenchmarkTestData("WikiVQA", 0.032, 1.412, 1.527), "MileBench_part5.tar.gz"),
274+
(BenchmarkTestData("ALFRED", 0.006, 2.10, 2.33), "MileBench_part0.tar.gz"),
275+
(BenchmarkTestData("MMCoQA", 0.001, 1.91, 1.73), "MileBench_part2.tar.gz"),
276+
(BenchmarkTestData("WikiVQA", 0.001, 1.41, 1.47), "MileBench_part5.tar.gz"),
280277
],
281278
indirect=["download_test_content"]
282279
)
283-
def test_optimized_generation_milebench(device, test_struct, download_test_content):
284-
seqs_per_request = 32
285-
num_kv_blocks = 1000 if device == "CPU" else 500
280+
def test_optimized_generation_milebench(test_struct, download_test_content):
281+
seqs_per_request = 16
282+
device = "CPU"
283+
num_kv_blocks = 500
286284
model_id = "Qwen/Qwen2-VL-2B-Instruct"
287285
_, _, models_path = _download_and_convert_model(model_id, OVModelForVisualCausalLM)
288286
scheduler_config = get_scheduler_config(num_kv_blocks)
289287

290288
scheduler_config_opt = get_scheduler_config(num_kv_blocks)
291289
scheduler_config_opt.use_cache_eviction = True
292290
if scheduler_config_opt.use_cache_eviction:
293-
scheduler_config_opt.cache_eviction_config = MILEBENCH_CACHE_EVICTION_CONFIG
291+
eviction_config = CacheEvictionConfig(
292+
start_size=32,
293+
recent_size=64,
294+
max_cache_size=224,
295+
aggregation_mode=AggregationMode.SUM,
296+
snapkv_window_size=8,
297+
)
298+
scheduler_config_opt.cache_eviction_config = eviction_config
294299

295300
model_cb_noopt = ContinuousBatchingPipeline(models_path, scheduler_config, device, properties=get_default_llm_properties())
296301
model_cb_opt = ContinuousBatchingPipeline(models_path, scheduler_config_opt, device, properties=get_default_llm_properties())
297302

298303
generation_config = GenerationConfig() # expecting default greedy sampling
299304
generation_config.num_return_sequences = 1
300-
generation_config.max_new_tokens = 512
305+
generation_config.max_new_tokens = 64 # change to 512 for full evaluation
301306
generation_config.do_sample = False
302307

303308
subset = test_struct.subset

0 commit comments

Comments
 (0)