diff --git a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp index 4628a6e7686faa..af6389f302176f 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/dnnl/dnnl_convolution_primitive.cpp @@ -759,12 +759,14 @@ DnnlShapeAgnosticDataPtr DnnlConvolutionPrimitive::createShapeAgnosticData(const OPENVINO_ASSERT(!cacheWeightsWithUndefData, "dnnl convolution weights caching for dynamic shapes is not implemented"); + const bool hasBias = !memory.at(ARG_BIAS)->getDesc().empty(); + ConvAttrs attrs{{1}, {0}, {0}, {0}, AutoPaddingType::None, - fcAttrs.withBias, + hasBias, fcAttrs.weightsNonTransposed, false, false, @@ -880,7 +882,6 @@ DnnlMemoryDescPtr DnnlConvolutionPrimitive::makeTransposedWeightDescriptor(const const DnnlMemoryDescPtr& dstDesc, const ConvAttrs& attrs) { FCAttrs fcAttrs{}; - fcAttrs.withBias = attrs.withBias; fcAttrs.weightsNonTransposed = attrs.weightsNonTransposed; return DnnlFCPrimitive::makeTransposedWeightDescriptor(srcDesc, dstDesc, fcAttrs); diff --git a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp index 834c243ce0add4..0f413463f0cf65 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_config.hpp @@ -14,9 +14,6 @@ namespace ov::intel_cpu { // @todo require explicit initialization of all the attributes? struct FCAttrs { - // @todo probably we don't want with bias flag, since this information is already - // a part of src memory descs - bool withBias = false; bool weightsNonTransposed = false; bool sparseWeights = false; uint64_t dynamicQuantizationGroupSize = 0; diff --git a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp index ccbbc4bd7e30dd..24d0b2a7a1951e 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/fullyconnected_implementations.cpp @@ -278,8 +278,9 @@ const std::vector>& getImplementations() { const std::shared_ptr& shareAgnosticData) const { const bool fcSemantic = true; + const bool hasBias = !memory.at(ARG_BIAS)->getDesc().empty(); ConvAttrs convAttrs{{1}, {0}, {0}, {0}, - AutoPaddingType::None, attrs.withBias, attrs.weightsNonTransposed, + AutoPaddingType::None, hasBias, attrs.weightsNonTransposed, false, false, fcSemantic, false, ZeroPointsType::None, {}, attrs.postOps}; auto primitive = @@ -366,9 +367,7 @@ const std::vector>& getImplementations() { VERIFY(noSparseDecompression(config), UNSUPPORTED_SPARSE_WEIGHTS); VERIFY(all_of(f32, srcType(config), dstType(config)), UNSUPPORTED_SRC_PRECISIONS); VERIFY(any_of(weiType(config), f32, i8, i4), UNSUPPORTED_WEI_PRECISIONS); - if (config.attrs.withBias) { - VERIFY(biaType(config) == f32, UNSUPPORTED_SRC_PRECISIONS); - } + VERIFY(implication(hasBias(config), biaType(config) == f32), UNSUPPORTED_SRC_PRECISIONS); VERIFY(weiRank(config) == 2U, UNSUPPORTED_WEI_RANK); VERIFY(MatMulKleidiAIExecutor::supports(config), UNSUPPORTED_BY_EXECUTOR); diff --git a/src/plugins/intel_cpu/src/nodes/executors/implementation_utils.hpp b/src/plugins/intel_cpu/src/nodes/executors/implementation_utils.hpp index 51d64522aa53cc..8a5863cdb6c4cd 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/implementation_utils.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/implementation_utils.hpp @@ -33,6 +33,11 @@ ov::element::Type memoryDescType(const Config& config) { return config.descs.at(idx)->getPrecision(); } +template +bool hasBias(const Config& config) { + return !config.descs.at(ARG_BIAS)->empty(); +} + template ov::element::Type srcType(const Config& config) { return memoryDescType(config); diff --git a/src/plugins/intel_cpu/src/nodes/executors/kleidiai/kleidiai_mm.cpp b/src/plugins/intel_cpu/src/nodes/executors/kleidiai/kleidiai_mm.cpp index d662faf80f967c..9e3ec711647206 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/kleidiai/kleidiai_mm.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/kleidiai/kleidiai_mm.cpp @@ -27,6 +27,7 @@ #include "nodes/executors/executor.hpp" #include "nodes/executors/fullyconnected_config.hpp" #include "nodes/executors/memory_arguments.hpp" +#include "openvino/core/except.hpp" #include "openvino/core/parallel.hpp" #include "openvino/core/type/element_type.hpp" #include "utils/cpu_utils.hpp" @@ -75,14 +76,16 @@ MatMulKleidiAIExecutor::MatMulKleidiAIExecutor(const FCAttrs& attrs, auto N = weiDims[0]; auto K = weiDims[1]; - bool hasBias = memory.at(ARG_BIAS)->getDataAs() != nullptr; - if (!hasBias) { + const bool hasBias = !memory.at(ARG_BIAS)->getDesc().empty(); + + if (hasBias) { + biasMem = memory.at(ARG_BIAS); + } else { auto biasDesc = std::make_shared(f32, Shape({N})); biasMem = std::make_shared(context->getEngine(), biasDesc); biasMem->nullify(); - } else { - biasMem = memory.at(ARG_BIAS); } + if (memory.at(ARG_SRC)->getPrecision() != memory.at(ARG_WEI)->getPrecision()) { aclfcAttrs.isConvertedWeights = true; } @@ -384,15 +387,8 @@ void MatMulKleidiAIExecutor::execute(const MemoryArgs& memory) { } } -void MatMulKleidiAIExecutor::moveMemToNumaNode(int numaNodeID) { - if (curNumaNode == numaNodeID) { - return; - } - curNumaNode = numaNodeID; - mbind_move(packedWeights, numaNodeID); - if (m_attrs.withBias) { - mbind_move(m_memoryArgs.at(ARG_BIAS), numaNodeID); - } +void MatMulKleidiAIExecutor::moveMemToNumaNode([[maybe_unused]] int numaNodeID) { + OPENVINO_THROW_NOT_IMPLEMENTED("'moveMemToNumaNode' is not implemented by the executor"); } } // namespace ov::intel_cpu diff --git a/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.cpp b/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.cpp index 4741396d65a303..b396f7950a438d 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.cpp @@ -103,8 +103,7 @@ bool MlasGemmExecutor::supports(const FCConfig& config) { } MlasGemmExecutor::MlasGemmExecutor(const FCAttrs& attrs, const MemoryArgs& memory, const ExecutorContext::CPtr& context) - : m_attrs(attrs), - m_memoryArgs(memory), + : m_memoryArgs(memory), packedWeights(prepareWeightMemory(memory.at(ARG_WEI), context, !attrs.weightsNonTransposed)), N(batchDim(memory.at(ARG_WEI)->getStaticDims())), @@ -151,7 +150,7 @@ void MlasGemmExecutor::moveMemToNumaNode(int numaNodeID) { } curNumaNode = numaNodeID; mbind_move(packedWeights, numaNodeID); - if (m_attrs.withBias) { + if (!m_memoryArgs.at(ARG_BIAS)->getDesc().empty()) { mbind_move(m_memoryArgs.at(ARG_BIAS), numaNodeID); } } diff --git a/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.hpp b/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.hpp index 8618a32cb9f6e3..6f18c750a87963 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.hpp +++ b/src/plugins/intel_cpu/src/nodes/executors/mlas/mlas_gemm.hpp @@ -32,7 +32,6 @@ class MlasGemmExecutor : public Executor { void moveMemToNumaNode(int numaNodeID) override; private: - const FCAttrs& m_attrs; const MemoryArgs& m_memoryArgs; const MemoryCPtr packedWeights; int64_t M = 0, N, K; diff --git a/src/plugins/intel_cpu/src/nodes/executors/shl/shl_fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/executors/shl/shl_fullyconnected.cpp index f5971844c53710..6efdb762580740 100644 --- a/src/plugins/intel_cpu/src/nodes/executors/shl/shl_fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/executors/shl/shl_fullyconnected.cpp @@ -19,6 +19,7 @@ #include "nodes/common/cpu_memcpy.h" #include "nodes/executors/executor.hpp" #include "nodes/executors/fullyconnected_config.hpp" +#include "nodes/executors/implementation_utils.hpp" #include "nodes/executors/memory_arguments.hpp" #include "nodes/executors/shl/shl_utils.hpp" #include "openvino/core/except.hpp" @@ -83,7 +84,7 @@ bool ShlFCExecutor::supports(const FCConfig& config) { return false; } - if (config.attrs.withBias) { + if (!hasBias(config)) { const auto& biaDesc = config.descs.at(ARG_BIAS); if (biaDesc->getPrecision() != ov::element::f32) { DEBUG_LOG("ShlFCExecutor: supports only f32 bias"); @@ -104,7 +105,9 @@ bool ShlFCExecutor::supports(const FCConfig& config) { return true; } -ShlFCExecutor::ShlFCExecutor(const FCAttrs& attrs, const MemoryArgs& memory, const ExecutorContext::CPtr& context) +ShlFCExecutor::ShlFCExecutor([[maybe_unused]] const FCAttrs& attrs, + const MemoryArgs& memory, + const ExecutorContext::CPtr& context) : packedWeights(prepareWeightMemory(memory.at(ARG_WEI), context)) { const auto& srcDesc = memory.at(ARG_SRC)->getDescPtr(); const auto& weiDesc = memory.at(ARG_WEI)->getDescPtr(); @@ -121,7 +124,8 @@ ShlFCExecutor::ShlFCExecutor(const FCAttrs& attrs, const MemoryArgs& memory, con weiDesc->getShape().getStaticDims()); dst = ShlTensor(sess, precisionToShlDataType(dstDesc->getPrecision()), getShlDataLayoutByMemoryDesc(dstDesc)); - if (attrs.withBias) { + const bool hasBias = !memory.at(ARG_BIAS)->getDesc().empty(); + if (hasBias) { const auto& biasDesc = memory.at(ARG_BIAS)->getDescPtr(); bias = ShlTensor(sess, precisionToShlDataType(biasDesc->getPrecision()), diff --git a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp index 4ac0de259b88fa..139ab94901516e 100644 --- a/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp +++ b/src/plugins/intel_cpu/src/nodes/fullyconnected.cpp @@ -561,8 +561,6 @@ static bool useSparseWeightsDecompression(const NodePtr& weightsInput, } void FullyConnected::initSupportedPrimitiveDescriptors() { - attrs.withBias = getOriginalInputPrecisionAtPort(BIAS) != ov::element::dynamic; - attrs.sparseWeights = useSparseWeightsDecompression(getParentEdgeAt(WEIGHTS)->getParent(), getOriginalInputPrecisionAtPort(DATA), context->getConfig().fcSparseWeiDecompressionRate); @@ -642,8 +640,8 @@ void FullyConnected::needSplitMemoryForTensorParallel() { : split_horizontal(context->getEngine(), wgt, 0, tp_cfg.w_rank, tp_cfg.w_size); memory[ARG_WEI] = tp_cfg.cached_splited_weight; // bias - if (attrs.withBias) { - auto bias = getSrcMemoryAtPort(BIAS); + const auto& bias = getSrcMemoryAtPort(BIAS); + if (!bias->getDesc().empty()) { auto select_bias = split_horizontal(context->getEngine(), bias, 0, tp_cfg.w_rank, tp_cfg.w_size); tp_cfg.cached_splited_bias = std::move(select_bias); } else {