We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent d9a7806 commit 50a8290Copy full SHA for 50a8290
src/plugins/intel_gpu/src/graph/impls/cm/paged_attention_gen.cpp
@@ -174,7 +174,11 @@ size_t get_partition_size() {
174
// k_partition_blok_num = 1;
175
// const size_t k_partition_blok_num = 16;
176
// return k_partition_blok_num * PA_KV_CACHE_BLOCK_SIZE; // 128
177
- return 256;
+ if (PA_KV_CACHE_BLOCK_SIZE < 128) {
178
+ return 128;
179
+ } else {
180
+ return PA_KV_CACHE_BLOCK_SIZE;
181
+ }
182
}
183
184
size_t get_partition_num(const size_t kv_len) {
0 commit comments