Skip to content

Commit dfbd5ea

Browse files
committed
[CPU] Get rid of 'withBias' flag in FullyConnected node
Not necessery anymore, since bias is always present and it is either empty or not
1 parent 5493015 commit dfbd5ea

File tree

8 files changed

+30
-29
lines changed

8 files changed

+30
-29
lines changed

src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -760,12 +760,14 @@ DnnlShapeAgnosticDataPtr DnnlConvolutionPrimitive::createShapeAgnosticData(const
760760
OPENVINO_ASSERT(!cacheWeightsWithUndefData,
761761
"dnnl convolution weights caching for dynamic shapes is not implemented");
762762

763+
const bool hasBias = !memory.at(ARG_BIAS)->getDesc().empty();
764+
763765
ConvAttrs attrs{{1},
764766
{0},
765767
{0},
766768
{0},
767769
AutoPaddingType::None,
768-
fcAttrs.withBias,
770+
hasBias,
769771
fcAttrs.weightsNonTransposed,
770772
false,
771773
false,
@@ -881,7 +883,6 @@ DnnlMemoryDescPtr DnnlConvolutionPrimitive::makeTransposedWeightDescriptor(const
881883
const DnnlMemoryDescPtr& dstDesc,
882884
const ConvAttrs& attrs) {
883885
FCAttrs fcAttrs{};
884-
fcAttrs.withBias = attrs.withBias;
885886
fcAttrs.weightsNonTransposed = attrs.weightsNonTransposed;
886887

887888
return DnnlFCPrimitive::makeTransposedWeightDescriptor(srcDesc, dstDesc, fcAttrs);

src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,6 @@ namespace ov::intel_cpu {
1414

1515
// @todo require explicit initialization of all the attributes?
1616
struct FCAttrs {
17-
// @todo probably we don't want with bias flag, since this information is already
18-
// a part of src memory descs
19-
bool withBias = false;
2017
bool weightsNonTransposed = false;
2118
bool sparseWeights = false;
2219
uint64_t dynamicQuantizationGroupSize = 0;

src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -278,8 +278,9 @@ const std::vector<ExecutorImplementation<FCAttrs>>& getImplementations() {
278278
const std::shared_ptr<DnnlShapeAgnosticData>& shareAgnosticData) const {
279279

280280
const bool fcSemantic = true;
281+
const bool hasBias = !memory.at(ARG_BIAS)->getDesc().empty();
281282
ConvAttrs convAttrs{{1}, {0}, {0}, {0},
282-
AutoPaddingType::None, attrs.withBias, attrs.weightsNonTransposed,
283+
AutoPaddingType::None, hasBias, attrs.weightsNonTransposed,
283284
false, false, fcSemantic, false, ZeroPointsType::None, {}, attrs.postOps};
284285

285286
auto primitive =
@@ -366,9 +367,7 @@ const std::vector<ExecutorImplementation<FCAttrs>>& getImplementations() {
366367
VERIFY(noSparseDecompression(config), UNSUPPORTED_SPARSE_WEIGHTS);
367368
VERIFY(all_of(f32, srcType(config), dstType(config)), UNSUPPORTED_SRC_PRECISIONS);
368369
VERIFY(any_of(weiType(config), f32, i8), UNSUPPORTED_WEI_PRECISIONS);
369-
if (config.attrs.withBias) {
370-
VERIFY(biaType(config) == f32, UNSUPPORTED_SRC_PRECISIONS);
371-
}
370+
VERIFY(implication(hasBias(config), biaType(config) == f32), UNSUPPORTED_SRC_PRECISIONS);
372371
VERIFY(weiRank(config) == 2U, UNSUPPORTED_WEI_RANK);
373372
VERIFY(MatMulKleidiAIExecutor::supports(config), UNSUPPORTED_BY_EXECUTOR);
374373

src/plugins/intel_cpu/src/nodes/executors/implementation_utils.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ ov::element::Type memoryDescType(const Config& config) {
3333
return config.descs.at(idx)->getPrecision();
3434
}
3535

36+
template <typename Config>
37+
bool hasBias(const Config& config) {
38+
return !config.descs.at(ARG_BIAS)->empty();
39+
}
40+
3641
template <typename Config>
3742
ov::element::Type srcType(const Config& config) {
3843
return memoryDescType<Config, ARG_SRC>(config);

src/plugins/intel_cpu/src/nodes/executors/kleidiai/kleidiai_mm.cpp

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
#include "nodes/executors/executor.hpp"
2626
#include "nodes/executors/fullyconnected_config.hpp"
2727
#include "nodes/executors/memory_arguments.hpp"
28+
#include "openvino/core/except.hpp"
2829
#include "openvino/core/parallel.hpp"
2930
#include "openvino/core/type/element_type.hpp"
3031
#include "utils/cpu_utils.hpp"
@@ -73,13 +74,16 @@ MatMulKleidiAIExecutor::MatMulKleidiAIExecutor(const FCAttrs& attrs,
7374
auto N = weiDims[0];
7475
auto K = weiDims[1];
7576

76-
if (memory.at(ARG_BIAS)->getDataAs<float>() == nullptr) {
77+
const bool hasBias = !memory.at(ARG_BIAS)->getDesc().empty();
78+
79+
if (hasBias) {
80+
biasMem = memory.at(ARG_BIAS);
81+
} else {
7782
auto biasDesc = std::make_shared<CpuBlockedMemoryDesc>(f32, Shape({N}));
7883
biasMem = std::make_shared<Memory>(context->getEngine(), biasDesc);
7984
biasMem->nullify();
80-
} else {
81-
biasMem = memory.at(ARG_BIAS);
8285
}
86+
8387
if (memory.at(ARG_SRC)->getPrecision() != memory.at(ARG_WEI)->getPrecision()) {
8488
aclfcAttrs.isConvertedWeights = true;
8589
}
@@ -285,15 +289,8 @@ void MatMulKleidiAIExecutor::execute(const MemoryArgs& memory) {
285289
}
286290
}
287291

288-
void MatMulKleidiAIExecutor::moveMemToNumaNode(int numaNodeID) {
289-
if (curNumaNode == numaNodeID) {
290-
return;
291-
}
292-
curNumaNode = numaNodeID;
293-
mbind_move(packedWeights, numaNodeID);
294-
if (m_attrs.withBias) {
295-
mbind_move(m_memoryArgs.at(ARG_BIAS), numaNodeID);
296-
}
292+
void MatMulKleidiAIExecutor::moveMemToNumaNode([[maybe_unused]] int numaNodeID) {
293+
OPENVINO_THROW_NOT_IMPLEMENTED("'moveMemToNumaNode' is not implemented by the executor");
297294
}
298295

299296
} // namespace ov::intel_cpu

src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ void MlasGemmExecutor::moveMemToNumaNode(int numaNodeID) {
151151
}
152152
curNumaNode = numaNodeID;
153153
mbind_move(packedWeights, numaNodeID);
154-
if (m_attrs.withBias) {
154+
if (!m_memoryArgs.at(ARG_BIAS)->getDesc().empty()) {
155155
mbind_move(m_memoryArgs.at(ARG_BIAS), numaNodeID);
156156
}
157157
}

src/plugins/intel_cpu/src/nodes/executors/shl/shl_fullyconnected.cpp

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "nodes/common/cpu_memcpy.h"
2020
#include "nodes/executors/executor.hpp"
2121
#include "nodes/executors/fullyconnected_config.hpp"
22+
#include "nodes/executors/implementation_utils.hpp"
2223
#include "nodes/executors/memory_arguments.hpp"
2324
#include "nodes/executors/shl/shl_utils.hpp"
2425
#include "openvino/core/except.hpp"
@@ -83,7 +84,7 @@ bool ShlFCExecutor::supports(const FCConfig& config) {
8384
return false;
8485
}
8586

86-
if (config.attrs.withBias) {
87+
if (!hasBias(config)) {
8788
const auto& biaDesc = config.descs.at(ARG_BIAS);
8889
if (biaDesc->getPrecision() != ov::element::f32) {
8990
DEBUG_LOG("ShlFCExecutor: supports only f32 bias");
@@ -104,7 +105,9 @@ bool ShlFCExecutor::supports(const FCConfig& config) {
104105
return true;
105106
}
106107

107-
ShlFCExecutor::ShlFCExecutor(const FCAttrs& attrs, const MemoryArgs& memory, const ExecutorContext::CPtr& context)
108+
ShlFCExecutor::ShlFCExecutor([[maybe_unused]] const FCAttrs& attrs,
109+
const MemoryArgs& memory,
110+
const ExecutorContext::CPtr& context)
108111
: packedWeights(prepareWeightMemory(memory.at(ARG_WEI), context)) {
109112
const auto& srcDesc = memory.at(ARG_SRC)->getDescPtr();
110113
const auto& weiDesc = memory.at(ARG_WEI)->getDescPtr();
@@ -121,7 +124,8 @@ ShlFCExecutor::ShlFCExecutor(const FCAttrs& attrs, const MemoryArgs& memory, con
121124
weiDesc->getShape().getStaticDims());
122125
dst = ShlTensor(sess, precisionToShlDataType(dstDesc->getPrecision()), getShlDataLayoutByMemoryDesc(dstDesc));
123126

124-
if (attrs.withBias) {
127+
const bool hasBias = !memory.at(ARG_BIAS)->getDesc().empty();
128+
if (hasBias) {
125129
const auto& biasDesc = memory.at(ARG_BIAS)->getDescPtr();
126130
bias = ShlTensor(sess,
127131
precisionToShlDataType(biasDesc->getPrecision()),

src/plugins/intel_cpu/src/nodes/fullyconnected.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -561,8 +561,6 @@ static bool useSparseWeightsDecompression(const NodePtr& weightsInput,
561561
}
562562

563563
void FullyConnected::initSupportedPrimitiveDescriptors() {
564-
attrs.withBias = getOriginalInputPrecisionAtPort(BIAS) != ov::element::dynamic;
565-
566564
attrs.sparseWeights = useSparseWeightsDecompression(getParentEdgeAt(WEIGHTS)->getParent(),
567565
getOriginalInputPrecisionAtPort(DATA),
568566
context->getConfig().fcSparseWeiDecompressionRate);
@@ -642,8 +640,8 @@ void FullyConnected::needSplitMemoryForTensorParallel() {
642640
: split_horizontal(context->getEngine(), wgt, 0, tp_cfg.w_rank, tp_cfg.w_size);
643641
memory[ARG_WEI] = tp_cfg.cached_splited_weight;
644642
// bias
645-
if (attrs.withBias) {
646-
auto bias = getSrcMemoryAtPort(BIAS);
643+
const auto& bias = getSrcMemoryAtPort(BIAS);
644+
if (!bias->getDesc().empty()) {
647645
auto select_bias = split_horizontal(context->getEngine(), bias, 0, tp_cfg.w_rank, tp_cfg.w_size);
648646
tp_cfg.cached_splited_bias = std::move(select_bias);
649647
} else {

0 commit comments

Comments
 (0)