Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -759,12 +759,14 @@ DnnlShapeAgnosticDataPtr DnnlConvolutionPrimitive::createShapeAgnosticData(const
OPENVINO_ASSERT(!cacheWeightsWithUndefData,
"dnnl convolution weights caching for dynamic shapes is not implemented");

const bool hasBias = !memory.at(ARG_BIAS)->getDesc().empty();

ConvAttrs attrs{{1},
{0},
{0},
{0},
AutoPaddingType::None,
fcAttrs.withBias,
hasBias,
fcAttrs.weightsNonTransposed,
false,
false,
Expand Down Expand Up @@ -880,7 +882,6 @@ DnnlMemoryDescPtr DnnlConvolutionPrimitive::makeTransposedWeightDescriptor(const
const DnnlMemoryDescPtr& dstDesc,
const ConvAttrs& attrs) {
FCAttrs fcAttrs{};
fcAttrs.withBias = attrs.withBias;
fcAttrs.weightsNonTransposed = attrs.weightsNonTransposed;

return DnnlFCPrimitive::makeTransposedWeightDescriptor(srcDesc, dstDesc, fcAttrs);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,6 @@ namespace ov::intel_cpu {

// @todo require explicit initialization of all the attributes?
struct FCAttrs {
// @todo probably we don't want with bias flag, since this information is already
// a part of src memory descs
bool withBias = false;
bool weightsNonTransposed = false;
bool sparseWeights = false;
uint64_t dynamicQuantizationGroupSize = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,9 @@ const std::vector<ExecutorImplementation<FCAttrs>>& getImplementations() {
const std::shared_ptr<DnnlShapeAgnosticData>& shareAgnosticData) const {

const bool fcSemantic = true;
const bool hasBias = !memory.at(ARG_BIAS)->getDesc().empty();
ConvAttrs convAttrs{{1}, {0}, {0}, {0},
AutoPaddingType::None, attrs.withBias, attrs.weightsNonTransposed,
AutoPaddingType::None, hasBias, attrs.weightsNonTransposed,
false, false, fcSemantic, false, ZeroPointsType::None, {}, attrs.postOps};

auto primitive =
Expand Down Expand Up @@ -366,9 +367,7 @@ const std::vector<ExecutorImplementation<FCAttrs>>& getImplementations() {
VERIFY(noSparseDecompression(config), UNSUPPORTED_SPARSE_WEIGHTS);
VERIFY(all_of(f32, srcType(config), dstType(config)), UNSUPPORTED_SRC_PRECISIONS);
VERIFY(any_of(weiType(config), f32, i8), UNSUPPORTED_WEI_PRECISIONS);
if (config.attrs.withBias) {
VERIFY(biaType(config) == f32, UNSUPPORTED_SRC_PRECISIONS);
}
VERIFY(implication(hasBias(config), biaType(config) == f32), UNSUPPORTED_SRC_PRECISIONS);
VERIFY(weiRank(config) == 2U, UNSUPPORTED_WEI_RANK);
VERIFY(MatMulKleidiAIExecutor::supports(config), UNSUPPORTED_BY_EXECUTOR);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ ov::element::Type memoryDescType(const Config& config) {
return config.descs.at(idx)->getPrecision();
}

template <typename Config>
bool hasBias(const Config& config) {
return !config.descs.at(ARG_BIAS)->empty();
}

template <typename Config>
ov::element::Type srcType(const Config& config) {
return memoryDescType<Config, ARG_SRC>(config);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "nodes/executors/executor.hpp"
#include "nodes/executors/fullyconnected_config.hpp"
#include "nodes/executors/memory_arguments.hpp"
#include "openvino/core/except.hpp"
#include "openvino/core/parallel.hpp"
#include "openvino/core/type/element_type.hpp"
#include "utils/cpu_utils.hpp"
Expand Down Expand Up @@ -73,13 +74,16 @@ MatMulKleidiAIExecutor::MatMulKleidiAIExecutor(const FCAttrs& attrs,
auto N = weiDims[0];
auto K = weiDims[1];

if (memory.at(ARG_BIAS)->getDataAs<float>() == nullptr) {
const bool hasBias = !memory.at(ARG_BIAS)->getDesc().empty();

if (hasBias) {
biasMem = memory.at(ARG_BIAS);
} else {
auto biasDesc = std::make_shared<CpuBlockedMemoryDesc>(f32, Shape({N}));
biasMem = std::make_shared<Memory>(context->getEngine(), biasDesc);
biasMem->nullify();
} else {
biasMem = memory.at(ARG_BIAS);
}

if (memory.at(ARG_SRC)->getPrecision() != memory.at(ARG_WEI)->getPrecision()) {
aclfcAttrs.isConvertedWeights = true;
}
Expand Down Expand Up @@ -285,15 +289,8 @@ void MatMulKleidiAIExecutor::execute(const MemoryArgs& memory) {
}
}

void MatMulKleidiAIExecutor::moveMemToNumaNode(int numaNodeID) {
if (curNumaNode == numaNodeID) {
return;
}
curNumaNode = numaNodeID;
mbind_move(packedWeights, numaNodeID);
if (m_attrs.withBias) {
mbind_move(m_memoryArgs.at(ARG_BIAS), numaNodeID);
}
void MatMulKleidiAIExecutor::moveMemToNumaNode([[maybe_unused]] int numaNodeID) {
OPENVINO_THROW_NOT_IMPLEMENTED("'moveMemToNumaNode' is not implemented by the executor");
}

} // namespace ov::intel_cpu
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ void MlasGemmExecutor::moveMemToNumaNode(int numaNodeID) {
}
curNumaNode = numaNodeID;
mbind_move(packedWeights, numaNodeID);
if (m_attrs.withBias) {
if (!m_memoryArgs.at(ARG_BIAS)->getDesc().empty()) {
mbind_move(m_memoryArgs.at(ARG_BIAS), numaNodeID);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "nodes/common/cpu_memcpy.h"
#include "nodes/executors/executor.hpp"
#include "nodes/executors/fullyconnected_config.hpp"
#include "nodes/executors/implementation_utils.hpp"
#include "nodes/executors/memory_arguments.hpp"
#include "nodes/executors/shl/shl_utils.hpp"
#include "openvino/core/except.hpp"
Expand Down Expand Up @@ -83,7 +84,7 @@ bool ShlFCExecutor::supports(const FCConfig& config) {
return false;
}

if (config.attrs.withBias) {
if (!hasBias(config)) {
const auto& biaDesc = config.descs.at(ARG_BIAS);
if (biaDesc->getPrecision() != ov::element::f32) {
DEBUG_LOG("ShlFCExecutor: supports only f32 bias");
Expand All @@ -104,7 +105,9 @@ bool ShlFCExecutor::supports(const FCConfig& config) {
return true;
}

ShlFCExecutor::ShlFCExecutor(const FCAttrs& attrs, const MemoryArgs& memory, const ExecutorContext::CPtr& context)
ShlFCExecutor::ShlFCExecutor([[maybe_unused]] const FCAttrs& attrs,
const MemoryArgs& memory,
const ExecutorContext::CPtr& context)
: packedWeights(prepareWeightMemory(memory.at(ARG_WEI), context)) {
const auto& srcDesc = memory.at(ARG_SRC)->getDescPtr();
const auto& weiDesc = memory.at(ARG_WEI)->getDescPtr();
Expand All @@ -121,7 +124,8 @@ ShlFCExecutor::ShlFCExecutor(const FCAttrs& attrs, const MemoryArgs& memory, con
weiDesc->getShape().getStaticDims());
dst = ShlTensor(sess, precisionToShlDataType(dstDesc->getPrecision()), getShlDataLayoutByMemoryDesc(dstDesc));

if (attrs.withBias) {
const bool hasBias = !memory.at(ARG_BIAS)->getDesc().empty();
if (hasBias) {
const auto& biasDesc = memory.at(ARG_BIAS)->getDescPtr();
bias = ShlTensor(sess,
precisionToShlDataType(biasDesc->getPrecision()),
Expand Down
6 changes: 2 additions & 4 deletions src/plugins/intel_cpu/src/nodes/fullyconnected.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -561,8 +561,6 @@ static bool useSparseWeightsDecompression(const NodePtr& weightsInput,
}

void FullyConnected::initSupportedPrimitiveDescriptors() {
attrs.withBias = getOriginalInputPrecisionAtPort(BIAS) != ov::element::dynamic;

attrs.sparseWeights = useSparseWeightsDecompression(getParentEdgeAt(WEIGHTS)->getParent(),
getOriginalInputPrecisionAtPort(DATA),
context->getConfig().fcSparseWeiDecompressionRate);
Expand Down Expand Up @@ -642,8 +640,8 @@ void FullyConnected::needSplitMemoryForTensorParallel() {
: split_horizontal(context->getEngine(), wgt, 0, tp_cfg.w_rank, tp_cfg.w_size);
memory[ARG_WEI] = tp_cfg.cached_splited_weight;
// bias
if (attrs.withBias) {
auto bias = getSrcMemoryAtPort(BIAS);
const auto& bias = getSrcMemoryAtPort(BIAS);
if (!bias->getDesc().empty()) {
auto select_bias = split_horizontal(context->getEngine(), bias, 0, tp_cfg.w_rank, tp_cfg.w_size);
tp_cfg.cached_splited_bias = std::move(select_bias);
} else {
Expand Down
Loading