Skip to content

Commit 7e1e954

Browse files
davidsnam-intelIamRam3
authored andcommitted
[GPU] Fix performance degradation in flux.1-schnell (openvinotoolkit#32612)
### Details: - Performance degradation in `flux.1-schnell` caused by [PR#32386](openvinotoolkit#32386). - Resolved by not using onednn when feature isn't aligned for concatenation.
1 parent b637562 commit 7e1e954

File tree

3 files changed

+65
-25
lines changed

3 files changed

+65
-25
lines changed

src/plugins/intel_gpu/src/graph/graph_optimizer/basic_memory_dependencies.cpp

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -59,31 +59,6 @@ void basic_memory_dependencies::run(program& p) {
5959
}
6060
}
6161
}
62-
63-
// onednn concatenation doesn't support non-zero padding which can occur for unaligned feature.
64-
if (node->is_type<concatenation>()) {
65-
auto is_feature_aligned = [](const cldnn::layout& l) {
66-
if (!format::is_blocked(l.format)) {
67-
return true;
68-
}
69-
70-
const auto& order = format::internal_order(l.format);
71-
int f_bs = 1;
72-
for (const auto& [dim, bs] : format::block_sizes(l.format)) {
73-
if (dim < order.size() && order[dim] == 'f') {
74-
f_bs = bs;
75-
}
76-
}
77-
return l.feature() % f_bs == 0;
78-
};
79-
80-
if (node->is_dynamic() || (!node->is_dynamic() && !is_feature_aligned(node->get_output_layout()))) {
81-
node->can_share_buffer(false);
82-
for (auto& dep : node->get_dependencies()) {
83-
dep.first->can_share_buffer(false);
84-
}
85-
}
86-
}
8762
}
8863

8964
// Note we iterate over processing order, it means if primitve has processing num greater than any of outputs,

src/plugins/intel_gpu/src/graph/impls/onednn/concatenation_onednn.hpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,36 @@ struct ConcatenationImplementationManager : public ImplementationManager {
5454
if (out_layout.data_padding)
5555
return false;
5656

57+
auto is_feature_aligned = [](const layout& l) {
58+
if (!format::is_blocked(l.format))
59+
return true;
60+
61+
const auto& order = format::internal_order(l.format);
62+
const size_t feature_dim_idx = order.find('f');
63+
if (feature_dim_idx == std::string::npos)
64+
return true;
65+
66+
auto feature_dim = l.get_partial_shape()[feature_dim_idx];
67+
if (feature_dim.is_dynamic())
68+
return false;
69+
70+
const auto& block_sizes = format::block_sizes(l.format);
71+
auto block_it = std::find_if(block_sizes.begin(), block_sizes.end(), [&](const auto& block) {
72+
return block.first == feature_dim_idx;
73+
});
74+
75+
if (block_it == block_sizes.end())
76+
return true;
77+
78+
const int feature_block_size = block_it->second;
79+
return feature_dim.get_length() % feature_block_size == 0;
80+
};
81+
82+
// onednn concatenation doesn't support non-zero padding which can occur for unaligned feature.
83+
if (!is_feature_aligned(out_layout)) {
84+
return false;
85+
}
86+
5787
const auto& concat_node = node.as<concatenation>();
5888
auto concat_axis = concat_node.get_primitive()->axis;
5989

src/plugins/intel_gpu/tests/unit/test_cases/concatenation_gpu_test.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1666,6 +1666,41 @@ TEST(concat_gpu_onednn, basic_input_types) {
16661666
}
16671667
}
16681668

1669+
TEST(concat_gpu_onednn, impl_selection_unaligned_feature_axis) {
1670+
auto& engine = get_test_engine();
1671+
if (!engine.get_device_info().supports_immad)
1672+
return;
1673+
1674+
layout in_layout = { data_types::f16, format::b_fs_yx_fsv16, { 1, 18, 2, 2 } };
1675+
auto input0 = engine.allocate_memory(in_layout);
1676+
auto input1 = engine.allocate_memory(in_layout);
1677+
1678+
topology topology(
1679+
input_layout("input0", in_layout),
1680+
input_layout("input1", in_layout),
1681+
concatenation("concat",
1682+
{ input_info("input0"), input_info("input1") },
1683+
1,
1684+
data_types::f16)
1685+
);
1686+
1687+
ExecutionConfig config = get_test_default_config(engine);
1688+
config.set_property(ov::intel_gpu::optimize_data(true));
1689+
1690+
network network(engine, topology, config);
1691+
network.set_input_data("input0", input0);
1692+
network.set_input_data("input1", input1);
1693+
1694+
auto concat_inst = network.get_primitive("concat");
1695+
auto impl = concat_inst->get_impl();
1696+
ASSERT_TRUE(impl != nullptr);
1697+
ASSERT_TRUE(impl->m_manager != nullptr);
1698+
EXPECT_EQ(impl->m_manager->get_impl_type(), impl_types::ocl);
1699+
EXPECT_FALSE(impl->is_onednn());
1700+
1701+
ASSERT_NO_THROW(network.execute());
1702+
}
1703+
16691704
TEST(concat_gpu_onednn, b_fs_yx_fsv16_input_types) {
16701705
auto& engine = get_test_engine();
16711706
if (!engine.get_device_info().supports_immad)

0 commit comments

Comments
 (0)