Skip to content
Open
Show file tree
Hide file tree
Changes from 91 commits
Commits
Show all changes
173 commits
Select commit Hold shift + click to select a range
7a5d226
tbb through thread pool of onednn
sunxiaoxia2022 Apr 1, 2025
2ffe397
add tbb auto in thread pool
sunxiaoxia2022 Apr 2, 2025
63bfc9d
Merge branch 'master' into xiaoxia/auto_tbb_thread_pool
sunxiaoxia2022 Apr 2, 2025
f4372e8
fix tbb auto issue
sunxiaoxia2022 Apr 4, 2025
9aee832
thread pool static
sunxiaoxia2022 Apr 4, 2025
cc7aa3b
remove unused activate_threadpool
sunxiaoxia2022 Apr 8, 2025
131ecd3
change TBB_OPTION
sunxiaoxia2022 Apr 16, 2025
8a19a60
change TBB_OPTION to tbb_partitioner
sunxiaoxia2022 Apr 16, 2025
b417750
fix conflicts
sunxiaoxia2022 Apr 16, 2025
bc57b4e
encapsulate parallel_for and move it to cpu plugin
sunxiaoxia2022 Apr 30, 2025
f524d54
rm diff
sunxiaoxia2022 Apr 30, 2025
c4f0963
change constructor of ThreadPool
sunxiaoxia2022 May 8, 2025
b86cc66
fix conflict
sunxiaoxia2022 May 8, 2025
3572fb5
code style
sunxiaoxia2022 May 8, 2025
ba518cc
code style
sunxiaoxia2022 May 8, 2025
e24fce0
code style
sunxiaoxia2022 May 8, 2025
447e20d
add property ov::hint::TbbPartitioner
sunxiaoxia2022 May 8, 2025
368c2dc
add onednn thread pool
sunxiaoxia2022 May 9, 2025
6f5922b
add set_partitioner into ThreadPool
sunxiaoxia2022 May 12, 2025
df87855
change ov::hint::TbbPartitioner to ov::intel_cpu::TbbPartitioner
sunxiaoxia2022 May 14, 2025
e36686a
code style
sunxiaoxia2022 May 14, 2025
838e4a2
add std::move
sunxiaoxia2022 May 14, 2025
5109def
import TbbPartitioner in python
sunxiaoxia2022 May 16, 2025
d6dfd4e
update onednn
sunxiaoxia2022 May 21, 2025
399b5ac
apply tbb auto scheduling logic
wangleis May 16, 2025
ee905ec
fix typo
wangleis May 16, 2025
a412fef
fix code style issue
wangleis May 16, 2025
f93c698
update scheduling logic
wangleis May 20, 2025
18b6d57
enable blocked cores on Linux
wangleis May 22, 2025
a84c157
fix error in calling compiled_model() twice
sunxiaoxia2022 May 22, 2025
5250f83
fix ci test error
sunxiaoxia2022 May 23, 2025
657de06
fix python test issue
sunxiaoxia2022 May 23, 2025
0e92586
add workload estimation
wangleis May 29, 2025
273bdd2
update estimation
wangleis May 29, 2025
ae68c45
fix conflicts
sunxiaoxia2022 May 30, 2025
c27feb3
code style
sunxiaoxia2022 May 30, 2025
f33fc8d
Add node
wangleis Jun 4, 2025
9ae115d
fix conflicts
sunxiaoxia2022 Jun 5, 2025
86bf17b
code style
sunxiaoxia2022 Jun 5, 2025
15493d6
fix redefinition issue
sunxiaoxia2022 Jun 5, 2025
63434b4
add more indicator
wangleis Jun 6, 2025
bce5714
update indicator
wangleis Jun 6, 2025
e74ea09
fix typo
wangleis Jun 6, 2025
ea23da5
add OV_THREAD_TBB_PARTITIONER_AUTO to OV_THREAD
sunxiaoxia2022 Jun 10, 2025
faface7
Merge branch 'xiaoxia/auto_tbb_thread_pool' of https://github.com/sun…
sunxiaoxia2022 Jun 10, 2025
25380b4
update profiling method
wangleis Jun 10, 2025
5127f25
change log format
sunxiaoxia2022 Jun 11, 2025
0b4be5f
add missing OV_THREAD_TBB_PARTITIONER_AUTO
sunxiaoxia2022 Jun 11, 2025
5498b66
fix profiling data
wangleis Jun 11, 2025
600250f
update default latency algo
wangleis Jun 15, 2025
2b7d6ee
THREADING=TBB_PARTITIONER_AUTO by default for testing convenience
sunxiaoxia2022 Jun 21, 2025
6771571
fix conflicts
sunxiaoxia2022 Jun 21, 2025
5384ef6
code style
sunxiaoxia2022 Jun 22, 2025
5f3201e
change OpenVINOConfig.cmake.in
sunxiaoxia2022 Jun 23, 2025
3f2c74b
fix conflicts in dnnl_utils.cpp
sunxiaoxia2022 Jun 29, 2025
49988e9
update latency threading algo on Pcore+Ecore
wangleis Jun 30, 2025
9eaa1fc
Revert "update latency threading algo on Pcore+Ecore"
wangleis Jul 2, 2025
a8dc98b
update supported platform for auto partitioner
wangleis Jul 3, 2025
c31b7fa
fix conflict
sunxiaoxia2022 Jul 7, 2025
8d34532
Merge branch 'xiaoxia/auto_tbb_thread_pool' of https://github.com/sun…
sunxiaoxia2022 Jul 7, 2025
d69db84
fix conflicts
sunxiaoxia2022 Jul 10, 2025
d2d0899
code style
sunxiaoxia2022 Jul 10, 2025
c4387c4
clang issue
sunxiaoxia2022 Jul 11, 2025
a0e7db2
clang-tidy issue
sunxiaoxia2022 Jul 14, 2025
1ad835a
clang-tidy issue
sunxiaoxia2022 Jul 14, 2025
1a7c2bc
clang-tidy issue
sunxiaoxia2022 Jul 14, 2025
c47e825
rm usued header reference
sunxiaoxia2022 Jul 15, 2025
ee2290c
update support platform
wangleis Jul 15, 2025
76cd99a
fix code style issue
wangleis Jul 15, 2025
a391bbd
Merge branch 'master' into xiaoxia/auto_tbb_thread_pool
wangleis Jul 15, 2025
06c821f
fix ARM64 build issue
wangleis Jul 15, 2025
eda1bc6
fix ARM build issue
wangleis Jul 15, 2025
5555ac9
fix code style issue
wangleis Jul 15, 2025
fa0e0ce
fix test case issue
wangleis Jul 16, 2025
6d3d9b6
remove unused header reference
sunxiaoxia2022 Jul 16, 2025
576f97b
rm dnnl_config.h in graph.cpp
sunxiaoxia2022 Jul 16, 2025
8e8e8d1
clang-tidy issue
sunxiaoxia2022 Jul 16, 2025
a1ba007
update import/export for different model prefer threads in latency an…
wangleis Jul 16, 2025
3c32df7
fix code style issue
wangleis Jul 16, 2025
911c0cb
fix import issue
wangleis Jul 16, 2025
fca2577
clang-tidy issue
sunxiaoxia2022 Jul 16, 2025
9ab9db2
Merge branch 'xiaoxia/auto_tbb_thread_pool' of https://github.com/sun…
sunxiaoxia2022 Jul 16, 2025
93f2a24
update supported platform
wangleis Jul 16, 2025
a3c268b
add tbbPartitioner to cache
sunxiaoxia2022 Jul 17, 2025
54ad388
Merge branch 'xiaoxia/auto_tbb_thread_pool' of https://github.com/sun…
sunxiaoxia2022 Jul 17, 2025
40fbec1
fix conflicts
sunxiaoxia2022 Jul 18, 2025
c89496b
fix conflict
sunxiaoxia2022 Jul 18, 2025
579f345
parallel with partitioner=static when tbb_partitoner is default
sunxiaoxia2022 Jul 18, 2025
e103fe7
update onednn
sunxiaoxia2022 Jul 22, 2025
3bbebd1
fix smoke_ReorderTestCustomStrideWithFactor issue
sunxiaoxia2022 Jul 24, 2025
ff1c33f
fix conflict
sunxiaoxia2022 Jul 24, 2025
fad1a10
fix typo issue
sunxiaoxia2022 Jul 27, 2025
265c514
change onednn
sunxiaoxia2022 Jul 31, 2025
0a272e1
Merge commit '2739b894c21fd5451eaeffeb37317c68d70a5c78' into xiaoxia/…
sunxiaoxia2022 Jul 31, 2025
2be9657
fix conflict
sunxiaoxia2022 Jul 31, 2025
fec192e
Merge branch 'master' into xiaoxia/auto_tbb_thread_pool
wangleis Aug 2, 2025
8f677a8
fix clang-tidy issue
sunxiaoxia2022 Aug 5, 2025
5038f90
fix activate_threadpool issue
sunxiaoxia2022 Aug 5, 2025
316fa84
Merge branch 'master' into xiaoxia/auto_tbb_thread_pool
sunxiaoxia2022 Aug 6, 2025
d578ccd
update model prefer threads
wangleis Aug 12, 2025
d99d844
update supported platform
wangleis Aug 14, 2025
3eebb62
Merge branch 'master' into xiaoxia/auto_tbb_thread_pool
sunxiaoxia2022 Aug 19, 2025
94ee390
fix MemoryReleaseTest issue on ARM
sunxiaoxia2022 Aug 24, 2025
9459d07
fix clang-tidy issue
sunxiaoxia2022 Aug 25, 2025
a2c2033
clang-tidy
sunxiaoxia2022 Aug 25, 2025
58d7a0d
clang-tidy on ARM
sunxiaoxia2022 Aug 25, 2025
235f571
fix threading issue on ARM
sunxiaoxia2022 Aug 25, 2025
ca02d95
clang-tidy
sunxiaoxia2022 Aug 25, 2025
ce63e62
Merge branch 'master' into xiaoxia/auto_tbb_thread_pool
sunxiaoxia2022 Aug 26, 2025
3c490f8
THREADING=TBB instead of THREADING=TBB_PARTITIONER_AUTO by default on…
sunxiaoxia2022 Aug 26, 2025
986b49a
update scheduling log
wangleis Sep 2, 2025
a9f8b49
update scheduling log
wangleis Sep 2, 2025
9aa1fff
merge master
sunxiaoxia2022 Sep 2, 2025
629611a
Merge branch 'xiaoxia/auto_tbb_thread_pool' of https://github.com/sun…
sunxiaoxia2022 Sep 2, 2025
f98d7b3
Merge branch 'master' into xiaoxia/auto_tbb_thread_pool
sunxiaoxia2022 Sep 2, 2025
30ec07b
change TBB_PARTITIONER_AUTO to TBB_ADAPTIVE
sunxiaoxia2022 Sep 3, 2025
674c542
fix build issue
sunxiaoxia2022 Sep 3, 2025
de4dd27
change f to F
sunxiaoxia2022 Sep 3, 2025
61b507e
fix code sytle issue
wangleis Sep 3, 2025
4fe1af5
fix clang-tidy issue
sunxiaoxia2022 Sep 3, 2025
2aeda51
change DEFAULT to NONE
sunxiaoxia2022 Sep 4, 2025
31d5725
fix python test issue
sunxiaoxia2022 Sep 4, 2025
e17678e
update test scheduling logic
wangleis Sep 5, 2025
0d2b685
update scheduling logic
wangleis Sep 8, 2025
37e6716
update scheduling logic for INT8 model
wangleis Sep 9, 2025
a63487b
Merge branch 'master' into xiaoxia/auto_tbb_thread_pool
sunxiaoxia2022 Sep 11, 2025
f57ac29
fix conflicts
sunxiaoxia2022 Sep 17, 2025
01dd239
fix dnnl_get_max_threads failed when update dynamic paremeters
sunxiaoxia2022 Sep 17, 2025
6e8157f
fix clang-tidy
sunxiaoxia2022 Sep 17, 2025
226f9cd
clang-tidy
sunxiaoxia2022 Sep 18, 2025
0c354dc
move threadPool to CpuParallel
sunxiaoxia2022 Sep 21, 2025
52626aa
rm unused header
sunxiaoxia2022 Sep 21, 2025
e8c7afe
code style
sunxiaoxia2022 Sep 21, 2025
29a377c
add cpu_parallel_sum
sunxiaoxia2022 Sep 22, 2025
e83764a
code style
sunxiaoxia2022 Sep 22, 2025
f86aca9
fix a error
sunxiaoxia2022 Sep 22, 2025
7200658
fix code style
sunxiaoxia2022 Sep 22, 2025
f815e1c
code style
sunxiaoxia2022 Sep 22, 2025
a3f9d56
fix clang-tidy issue
sunxiaoxia2022 Sep 22, 2025
7e887ad
store ICpuParallel reference in ThreadPool
sunxiaoxia2022 Sep 26, 2025
b608376
remove activate_threadpool
sunxiaoxia2022 Sep 26, 2025
7da8769
code style
sunxiaoxia2022 Sep 26, 2025
0d211af
clang-tidy issue
sunxiaoxia2022 Sep 26, 2025
327276f
change reorder_node_test
sunxiaoxia2022 Sep 26, 2025
8d562ce
clang-tidy
sunxiaoxia2022 Sep 26, 2025
9266af6
fix clang-tidy
sunxiaoxia2022 Sep 28, 2025
fea37f3
clang-tidy
sunxiaoxia2022 Sep 28, 2025
c7d125e
Merge branch 'master' into xiaoxia/auto_tbb_thread_pool
sunxiaoxia2022 Sep 29, 2025
6563212
update onednn
sunxiaoxia2022 Sep 29, 2025
ca0dc26
move activate to graph init
sunxiaoxia2022 Sep 29, 2025
03fe5a7
remove icpu_parallel.hpp
sunxiaoxia2022 Sep 30, 2025
6d30b20
update onednn
sunxiaoxia2022 Sep 30, 2025
b9efce4
Merge branch 'master' into xiaoxia/auto_tbb_thread_pool
sunxiaoxia2022 Sep 30, 2025
800be30
add const
sunxiaoxia2022 Sep 30, 2025
cb6a815
clang-tidy
sunxiaoxia2022 Sep 30, 2025
39d1496
clang-tidy
sunxiaoxia2022 Sep 30, 2025
b50f3c4
clang-tidy
sunxiaoxia2022 Sep 30, 2025
4f0bb08
add [[nodiscard]]
sunxiaoxia2022 Sep 30, 2025
cd4dc3e
clang-tidy
sunxiaoxia2022 Sep 30, 2025
d943a0c
clang-tidy
sunxiaoxia2022 Sep 30, 2025
22ea461
Merge branch 'master' into xiaoxia/auto_tbb_thread_pool
wangleis Oct 9, 2025
89dbc2b
add a parameter in LinuxCpuMapCacheParserTests
sunxiaoxia2022 Oct 9, 2025
de1c2bf
move cpu_parallel_* functions to private
sunxiaoxia2022 Oct 13, 2025
90e107f
fix clang issue
sunxiaoxia2022 Oct 15, 2025
eb502bd
Merge branch 'master' into xiaoxia/auto_tbb_thread_pool
sunxiaoxia2022 Oct 16, 2025
04c7621
Merge branch 'master' into xiaoxia/auto_tbb_thread_pool
sunxiaoxia2022 Oct 16, 2025
d050b1b
Merge branch 'master' into xiaoxia/auto_tbb_thread_pool
sunxiaoxia2022 Oct 17, 2025
31da94b
Merge branch 'master' into xiaoxia/auto_tbb_thread_pool
wangleis Oct 19, 2025
50ff972
Merge branch 'master' into xiaoxia/auto_tbb_thread_pool
sunxiaoxia2022 Oct 21, 2025
dc87476
Merge branch 'master' into xiaoxia/auto_tbb_thread_pool
wangleis Oct 23, 2025
90e650f
Merge branch 'master' into xiaoxia/auto_tbb_thread_pool
akladiev Oct 23, 2025
ed7790a
simplify conditions OV_THREAD_USE_TBB
sunxiaoxia2022 Oct 26, 2025
642f8fb
Merge branch 'xiaoxia/auto_tbb_thread_pool' of https://github.com/sun…
sunxiaoxia2022 Oct 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cmake/dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ unset(_ov_download_tbb_done CACHE)
# or ENABLE_SYSTEM_TBB is OFF
#
function(ov_download_tbb)
if(_ov_download_tbb_done OR NOT THREADING MATCHES "^(TBB|TBB_AUTO)$")
if(_ov_download_tbb_done OR NOT THREADING MATCHES "^(TBB|TBB_AUTO|TBB_PARTITIONER_AUTO)$")
return()
endif()
set(_ov_download_tbb_done ON CACHE INTERNAL "Whether prebuilt TBB is already downloaded")
Expand Down
6 changes: 3 additions & 3 deletions cmake/features.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@ if(ANDROID)
elseif(RISCV64)
set(THREADING_DEFAULT "OMP")
else()
set(THREADING_DEFAULT "TBB")
set(THREADING_DEFAULT "TBB_PARTITIONER_AUTO")
endif()

set(THREADING_OPTIONS "TBB" "TBB_AUTO" "SEQ" "OMP")
set(THREADING_OPTIONS "TBB" "TBB_AUTO" "SEQ" "OMP" "TBB_PARTITIONER_AUTO")

set(THREADING "${THREADING_DEFAULT}" CACHE STRING "Threading")
set_property(CACHE THREADING PROPERTY STRINGS ${THREADING_OPTIONS})
Expand All @@ -106,7 +106,7 @@ endif()

ov_dependent_option (ENABLE_INTEL_OPENMP "Enables usage of Intel OpenMP instead of default compiler one" ${ENABLE_INTEL_OPENMP_DEFAULT} "THREADING STREQUAL OMP" OFF)

if((THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO") AND
if((THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO" OR THREADING STREQUAL "TBB_PARTITIONER_AUTO") AND
(BUILD_SHARED_LIBS OR (LINUX AND X86_64)))
set(ENABLE_TBBBIND_2_5_DEFAULT ON)
else()
Expand Down
4 changes: 2 additions & 2 deletions cmake/templates/OpenVINOConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ endmacro()

macro(_ov_find_tbb)
set(_ov_threading "@THREADING@")
if(_ov_threading STREQUAL "TBB" OR _ov_threading STREQUAL "TBB_AUTO")
if(_ov_threading STREQUAL "TBB" OR _ov_threading STREQUAL "TBB_AUTO" OR _ov_threading STREQUAL "TBB_PARTITIONER_AUTO")
set(enable_pkgconfig_tbb "@tbb_FOUND@")

# try tbb.pc
Expand Down Expand Up @@ -563,7 +563,7 @@ if(_ov_as_external_package)

# WA for cmake version < 3.16 which does not export
# IMPORTED_LINK_DEPENDENT_LIBRARIES_** properties if no PUBLIC dependencies for the library
if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO" OR THREADING STREQUAL "TBB_PARTITIONER_AUTO")
foreach(type RELEASE DEBUG RELWITHDEBINFO MINSIZEREL)
foreach(tbb_target TBB::tbb TBB::tbbmalloc PkgConfig::tbb)
if(TARGET ${tbb_target})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
# Copyright (C) 2018-2025 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

# Enums
from openvino._pyopenvino.properties.intel_cpu import TbbPartitioner

# Properties
import openvino._pyopenvino.properties.intel_cpu as __intel_cpu
from openvino.properties._properties import __make_properties
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,16 @@ void regmodule_properties(py::module m) {
py::module m_intel_cpu =
m_properties.def_submodule("intel_cpu", "openvino.properties.intel_cpu submodule that simulates ov::intel_cpu");

py::enum_<ov::intel_cpu::TbbPartitioner>(m_intel_cpu, "TbbPartitioner", py::arithmetic())
.value("STATIC", ov::intel_cpu::TbbPartitioner::STATIC)
.value("AUTO", ov::intel_cpu::TbbPartitioner::AUTO);

// Submodule intel_cpu property
wrap_property_RW(m_intel_cpu, ov::intel_cpu::denormals_optimization, "denormals_optimization");
wrap_property_RW(m_intel_cpu,
ov::intel_cpu::sparse_weights_decompression_rate,
"sparse_weights_decompression_rate");
wrap_property_RW(m_intel_cpu, ov::intel_cpu::tbb_partitioner, "tbb_partitioner");

// Submodule intel_gpu
py::module m_intel_gpu =
Expand Down
5 changes: 5 additions & 0 deletions src/bindings/python/src/pyopenvino/utils/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "openvino/core/meta_data.hpp"
#include "openvino/frontend/decoder.hpp"
#include "openvino/frontend/graph_iterator.hpp"
#include "openvino/runtime/intel_cpu/properties.hpp"
#include "openvino/runtime/properties.hpp"

using Version = ov::pass::Serialize::Version;
Expand Down Expand Up @@ -245,6 +246,8 @@ py::object from_ov_any(const ov::Any& any) {
return py::cast(any.as<ov::hint::ExecutionMode>());
} else if (any.is<ov::log::Level>()) {
return py::cast(any.as<ov::log::Level>());
} else if (any.is<ov::intel_cpu::TbbPartitioner>()) {
return py::cast(any.as<ov::intel_cpu::TbbPartitioner>());
} else if (any.is<ov::device::Type>()) {
return py::cast(any.as<ov::device::Type>());
} else if (any.is<ov::streams::Num>()) {
Expand Down Expand Up @@ -544,6 +547,8 @@ ov::Any py_object_to_any(const py::object& py_obj) {
return py::cast<ov::hint::ExecutionMode>(py_obj);
} else if (py::isinstance<ov::log::Level>(py_obj)) {
return py::cast<ov::log::Level>(py_obj);
} else if (py::isinstance<ov::intel_cpu::TbbPartitioner>(py_obj)) {
return py::cast<ov::intel_cpu::TbbPartitioner>(py_obj);
} else if (py::isinstance<ov::device::Type>(py_obj)) {
return py::cast<ov::device::Type>(py_obj);
} else if (py::isinstance<ov::streams::Num>(py_obj)) {
Expand Down
15 changes: 15 additions & 0 deletions src/bindings/python/tests/test_runtime/test_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,13 @@ def test_properties_rw_base():
(log.Level.TRACE, "Level.TRACE", 4),
),
),
(
intel_cpu.TbbPartitioner,
(
(intel_cpu.TbbPartitioner.STATIC, "TbbPartitioner.STATIC", 0),
(intel_cpu.TbbPartitioner.AUTO, "TbbPartitioner.AUTO", 1),
),
),
(
intel_auto.SchedulePolicy,
(
Expand Down Expand Up @@ -356,6 +363,14 @@ def test_properties_ro(ov_property_ro, expected_value):
(2.0, 2.0),
),
),
(
intel_cpu.tbb_partitioner,
"TBB_PARTITIONER",
(
(intel_cpu.TbbPartitioner.STATIC, intel_cpu.TbbPartitioner.STATIC),
(intel_cpu.TbbPartitioner.AUTO, intel_cpu.TbbPartitioner.AUTO),
),
),
(
intel_auto.device_bind_buffer,
"DEVICE_BIND_BUFFER",
Expand Down
4 changes: 2 additions & 2 deletions src/cmake/install_tbb.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ unset(_ov_dynamic_tbbbind_2_5_found)
# install TBB

# define variables for OpenVINOConfig.cmake
if(THREADING MATCHES "^(TBB|TBB_AUTO)$")
if(THREADING MATCHES "^(TBB|TBB_AUTO|TBB_PARTITIONER_AUTO)$")
set(OV_TBB_DIR "${TBB_DIR}")
list(APPEND PATH_VARS "OV_TBB_DIR")
endif()
Expand All @@ -80,7 +80,7 @@ endif()
# - downloaded TBB should be a part of all packages
# - custom TBB provided by users, needs to be a part of wheel packages
# - system TBB also needs to be a part of wheel packages
if(THREADING MATCHES "^(TBB|TBB_AUTO)$" AND
if(THREADING MATCHES "^(TBB|TBB_AUTO|TBB_PARTITIONER_AUTO)$" AND
( (DEFINED TBBROOT AND TBBROOT MATCHES ${TEMP}) OR
(DEFINED TBBROOT OR DEFINED TBB_DIR OR DEFINED ENV{TBBROOT} OR
DEFINED ENV{TBB_DIR}) OR ENABLE_SYSTEM_TBB ) )
Expand Down
12 changes: 8 additions & 4 deletions src/cmake/ov_parallel.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ function(_ov_get_tbb_location tbb_target _tbb_lib_location_var)
endfunction()

macro(ov_find_package_tbb)
if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO" AND NOT TBB_FOUND)
if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO" OR THREADING STREQUAL "TBB_PARTITIONER_AUTO" AND NOT TBB_FOUND)
# conan generates TBBConfig.cmake files, which follows cmake's
# SameMajorVersion scheme, while TBB itself follows AnyNewerVersion one
# see https://cmake.org/cmake/help/latest/module/CMakePackageConfigHelpers.html#generating-a-package-version-file
Expand Down Expand Up @@ -340,7 +340,7 @@ macro(ov_find_package_openmp)
endmacro()

function(ov_set_threading_interface_for TARGET_NAME)
if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO" AND NOT TBB_FOUND)
if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO" OR THREADING STREQUAL "TBB_PARTITIONER_AUTO" AND NOT TBB_FOUND)
# find TBB
ov_find_package_tbb()

Expand Down Expand Up @@ -383,9 +383,13 @@ function(ov_set_threading_interface_for TARGET_NAME)
add_library(openvino::threading ALIAS openvino_threading)
endif()

if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO")
if(THREADING STREQUAL "TBB" OR THREADING STREQUAL "TBB_AUTO" OR THREADING STREQUAL "TBB_PARTITIONER_AUTO")
if(TBB_FOUND)
set(_ov_thread_define "OV_THREAD_TBB")
if(THREADING STREQUAL "TBB_PARTITIONER_AUTO")
set(_ov_thread_define "OV_THREAD_TBB_PARTITIONER_AUTO")
else()
set(_ov_thread_define "OV_THREAD_TBB")
endif()
set(_ov_threading_lib TBB::tbb)
else()
set(THREADING "SEQ" PARENT_SCOPE)
Expand Down
41 changes: 21 additions & 20 deletions src/core/include/openvino/core/parallel.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,13 @@
#include <cstddef>
#include <type_traits>

#define OV_THREAD_TBB 0
#define OV_THREAD_OMP 1
#define OV_THREAD_SEQ 2
#define OV_THREAD_TBB_AUTO 3
#define OV_THREAD_TBB 0
#define OV_THREAD_OMP 1
#define OV_THREAD_SEQ 2
#define OV_THREAD_TBB_AUTO 3
#define OV_THREAD_TBB_PARTITIONER_AUTO 4

#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO || OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO)
# ifndef NOMINMAX
# define NOMINMAX
# endif
Expand Down Expand Up @@ -66,7 +67,7 @@ inline int parallel_get_env_threads() {
inline void parallel_set_max_nested_levels(int levels) {
return;
}
# if OV_THREAD == OV_THREAD_TBB
# if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO)
# define PARTITIONING , tbb::static_partitioner()

// The TBB version less than 2018u1 has no static_partitioner argument for
Expand Down Expand Up @@ -231,7 +232,7 @@ namespace ov {

template <typename F>
void parallel_nt(int nthr, const F& func) {
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO || OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO)
if (nthr == 0)
nthr = parallel_get_max_threads();
if (nthr == 1) {
Expand Down Expand Up @@ -279,7 +280,7 @@ void parallel_nt_static(int nthr, const F& func) {

if (nthr == 0)
nthr = parallel_get_max_threads();
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO || OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO)
tbb::parallel_for(
0,
nthr,
Expand All @@ -305,7 +306,7 @@ void parallel_nt_static(int nthr, const F& func) {

template <typename I, typename F>
void parallel_sort(I begin, I end, const F& comparator) {
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO || OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO)
tbb::parallel_sort(begin, end, comparator);
#elif OV_THREAD == OV_THREAD_OMP
// TODO: propose OpenMP version
Expand All @@ -317,7 +318,7 @@ void parallel_sort(I begin, I end, const F& comparator) {

template <typename T0, typename R, typename F>
R parallel_sum(const T0& D0, const R& input, const F& func) {
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO || OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO)
return _TBB_REDUCE_FUNC(
tbb::blocked_range<T0>(0, D0),
input,
Expand Down Expand Up @@ -351,7 +352,7 @@ R parallel_sum(const T0& D0, const R& input, const F& func) {

template <typename T0, typename T1, typename R, typename F>
R parallel_sum2d(const T0& D0, const T1& D1, const R& input, const F& func) {
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO || OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO)
return _TBB_REDUCE_FUNC(
tbb::blocked_range2d<T0, T1>(0, D0, 0, D1),
input,
Expand Down Expand Up @@ -391,7 +392,7 @@ R parallel_sum2d(const T0& D0, const T1& D1, const R& input, const F& func) {
}
template <typename T0, typename T1, typename T2, typename R, typename F>
R parallel_sum3d(const T0& D0, const T1& D1, const T2& D2, const R& input, const F& func) {
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO || OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO)
return _TBB_REDUCE_FUNC(
tbb::blocked_range3d<T0, T1, T2>(0, D0, 0, D1, 0, D2),
input,
Expand Down Expand Up @@ -524,7 +525,7 @@ void parallel_for(const T0& D0, const F& func) {
if (D0 == T0(0)) {
return;
}
#if OV_THREAD == OV_THREAD_TBB
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO)
auto work_amount = static_cast<size_t>(D0);
int nthr = parallel_get_max_threads();
if (static_cast<size_t>(nthr) > work_amount)
Expand Down Expand Up @@ -590,7 +591,7 @@ void parallel_for2d(const T0& D0, const T1& D1, const F& func) {
if (D0 == T0(0) || D1 == T1(0)) {
return;
}
#if OV_THREAD == OV_THREAD_TBB
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO)
auto work_amount = static_cast<size_t>(D0 * D1);
int nthr = parallel_get_max_threads();
if (static_cast<size_t>(nthr) > work_amount)
Expand Down Expand Up @@ -636,7 +637,7 @@ void parallel_for2d(const T0& D0, const T1& D1, const F& func) {

template <typename T0, typename T1, typename F>
void parallel_for2d_dynamic(const T0& D0, const T1& D1, const F& func) {
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO || OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO)
tbb::parallel_for(tbb::blocked_range2d<T0, T1>(0, D0, 0, D1), [=](const tbb::blocked_range2d<T0, T1>& r) {
for (T0 d0 = r.rows().begin(); d0 < r.rows().end(); d0++) {
for (T1 d1 = r.cols().begin(); d1 < r.cols().end(); d1++) {
Expand Down Expand Up @@ -674,7 +675,7 @@ void parallel_for3d(const T0& D0, const T1& D1, const T2& D2, const F& func) {
if (D0 == T0(0) || D1 == T1(0) || D2 == T2(0)) {
return;
}
#if OV_THREAD == OV_THREAD_TBB
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO)
auto work_amount = static_cast<size_t>(D0 * D1 * D2);
int nthr = parallel_get_max_threads();
if (static_cast<size_t>(nthr) > work_amount)
Expand Down Expand Up @@ -720,7 +721,7 @@ void parallel_for3d(const T0& D0, const T1& D1, const T2& D2, const F& func) {

template <typename T0, typename T1, typename T2, typename F>
void parallel_for3d_dynamic(const T0& D0, const T1& D1, const T2& D2, const F& func) {
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO)
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO || OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO)
tbb::parallel_for(tbb::blocked_range3d<T0, T1, T2>(0, D0, 0, D1, 0, D2),
[=](const tbb::blocked_range3d<T0, T1, T2>& r) {
for (T0 d0 = r.pages().begin(); d0 < r.pages().end(); d0++) {
Expand Down Expand Up @@ -762,7 +763,7 @@ void parallel_for4d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons
if (D0 == T0(0) || D1 == T1(0) || D2 == T2(0) || D3 == T3(0)) {
return;
}
#if OV_THREAD == OV_THREAD_TBB
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO)
auto work_amount = static_cast<size_t>(D0 * D1 * D2 * D3);
int nthr = parallel_get_max_threads();
if (static_cast<size_t>(nthr) > work_amount)
Expand Down Expand Up @@ -838,7 +839,7 @@ void parallel_for5d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons
if (D0 == T0(0) || D1 == T1(0) || D2 == T2(0) || D3 == T3(0) || D4 == T4(0)) {
return;
}
#if OV_THREAD == OV_THREAD_TBB
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO)
auto work_amount = static_cast<size_t>(D0 * D1 * D2 * D3 * D4);
int nthr = parallel_get_max_threads();
if (static_cast<size_t>(nthr) > work_amount)
Expand Down Expand Up @@ -916,7 +917,7 @@ void parallel_for6d(const T0& D0, const T1& D1, const T2& D2, const T3& D3, cons
if (D0 == T0(0) || D1 == T1(0) || D2 == T2(0) || D3 == T3(0) || D4 == T4(0) || D5 == T5(0)) {
return;
}
#if OV_THREAD == OV_THREAD_TBB
#if (OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO)
auto work_amount = static_cast<size_t>(D0 * D1 * D2 * D3 * D4 * D5);
int nthr = parallel_get_max_threads();
if (static_cast<size_t>(nthr) > work_amount)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,14 @@ struct MemBandwidthPressure {
float ratio_mem_limited_convs = 0;
float ratio_mem_limited_deconvs = 0;
float ratio_mem_limited_gemms = 0;
float ratio_mem_limited_adds = 0;
float ratio_compute_deconvs = 0;
int total_gemms = 0;
int total_convs = 0;
int total_adds = 0;
int total_light_gemms = 0;
int total_light_convs = 0;
int total_nodes = 0;

static constexpr float UNKNOWN = FLT_MAX;
static constexpr float ALL = 1.0f;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

#include "openvino/core/parallel.hpp"

#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO || OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO
# include <tbb/enumerable_thread_specific.h>
#else
# include <functional>
Expand All @@ -25,7 +25,7 @@
namespace ov {
namespace threading {

#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO
#if OV_THREAD == OV_THREAD_TBB || OV_THREAD == OV_THREAD_TBB_AUTO || OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO

/**
* @brief A wrapper class to keep object to be thread local.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

#include "openvino/core/parallel.hpp"

#if ((OV_THREAD == OV_THREAD_TBB) || (OV_THREAD == OV_THREAD_TBB_AUTO))
#if ((OV_THREAD == OV_THREAD_TBB) || (OV_THREAD == OV_THREAD_TBB_AUTO) || (OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO))
# include <tbb/concurrent_priority_queue.h>
# include <tbb/concurrent_queue.h>
#endif
Expand Down Expand Up @@ -47,7 +47,7 @@ class ThreadSafeQueueWithSize {
std::queue<T> _queue;
std::mutex _mutex;
};
#if ((OV_THREAD == OV_THREAD_TBB) || (OV_THREAD == OV_THREAD_TBB_AUTO))
#if ((OV_THREAD == OV_THREAD_TBB) || (OV_THREAD == OV_THREAD_TBB_AUTO) || (OV_THREAD == OV_THREAD_TBB_PARTITIONER_AUTO))
template <typename T>
using ThreadSafeQueue = tbb::concurrent_queue<T>;
template <typename T>
Expand Down
Loading
Loading