openvinotoolkit
diff --git a/‎.gitmodules‎
Lines changed: 0 additions & 3 deletions b/‎.gitmodules‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎cmake/compiler_commit_hash.cmake‎
Lines changed: 29 additions & 0 deletions b/‎cmake/compiler_commit_hash.cmake‎
Lines changed: 29 additions & 0 deletions
diff --git a/‎cmake/prep_ldscript.py‎
Lines changed: 1 addition & 1 deletion b/‎cmake/prep_ldscript.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎scripts/extract_layers_info/obtain_csv_files/readme.md‎
Lines changed: 2 additions & 2 deletions b/‎scripts/extract_layers_info/obtain_csv_files/readme.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/CMakeLists.txt‎
Lines changed: 0 additions & 3 deletions b/‎src/CMakeLists.txt‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎src/vpux_compiler/CMakeLists.txt‎
Lines changed: 20 additions & 0 deletions b/‎src/vpux_compiler/CMakeLists.txt‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎src/vpux_compiler/cmake/add_npu_library.cmake‎
Lines changed: 1 addition & 0 deletions b/‎src/vpux_compiler/cmake/add_npu_library.cmake‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/vpux_compiler/cmake/vpux_tblgen.cmake‎
Lines changed: 20 additions & 40 deletions b/‎src/vpux_compiler/cmake/vpux_tblgen.cmake‎
Lines changed: 20 additions & 40 deletions
diff --git a/‎src/vpux_compiler/docs/code_style.md‎
Lines changed: 53 additions & 1 deletion b/‎src/vpux_compiler/docs/code_style.md‎
Lines changed: 53 additions & 1 deletion
diff --git a/‎src/vpux_compiler/docs/guides/project_structure.md‎
Lines changed: 2 additions & 2 deletions b/‎src/vpux_compiler/docs/guides/project_structure.md‎
Lines changed: 2 additions & 2 deletions
@@ -10,9 +10,6 @@
 [submodule "thirdparty/vpucostmodel"]
 	path = thirdparty/vpucostmodel
 	url = https://github.com/intel/npu-nn-cost-model.git
-[submodule "thirdparty/yaml-cpp"]
-	path = thirdparty/yaml-cpp
-	url = https://github.com/jbeder/yaml-cpp
 [submodule "thirdparty/gtest-parallel"]
 	path = thirdparty/gtest-parallel
 	url = https://github.com/google/gtest-parallel.git
@@ -0,0 +1,29 @@
+#
+# Copyright (C) 2025 Intel Corporation.
+# SPDX-License-Identifier: Apache-2.0
+#
+
+execute_process(
+    COMMAND
+        git rev-parse HEAD
+    WORKING_DIRECTORY ${REPO_DIR}
+    OUTPUT_VARIABLE CURRENT_COMMIT_HASH
+    OUTPUT_STRIP_TRAILING_WHITESPACE
+    RESULT_VARIABLE ERROR_CODE
+)
+
+if (NOT ${ERROR_CODE} EQUAL 0)
+    message(FATAL_ERROR "Failed to capture compiler git commit.")
+endif()
+
+set(LAST_COMMIT_HASH "")
+if (EXISTS ${COMMIT_HASH_CACHE})
+    file(READ ${COMMIT_HASH_CACHE} LAST_COMMIT_HASH)
+endif()
+
+if ("${CURRENT_COMMIT_HASH}" STREQUAL "${LAST_COMMIT_HASH}")
+    return()
+endif()
+
+file(WRITE ${COMMIT_HASH_CACHE} ${CURRENT_COMMIT_HASH})
+configure_file(${COMMIT_HASH_PATTERN} ${COMMIT_HASH_FILE} @ONLY)
@@ -17,7 +17,7 @@ def prep_ldscript_header(filename_ld, filename_h, varname):
     lines = [line.strip() for line in text.split("\n")]
     lines = [line for line in lines if line]
 
-    vardef_begin = f'const char* {varname} = R"ldscript(\n'
+    vardef_begin = f'static const char* {varname} = R"ldscript(\n'
     vardef_end = '\n)ldscript";\n'
 
     result = "#pragma once\n\n" + vardef_begin + "\n".join(lines) + vardef_end
 
@@ -4,8 +4,8 @@ Utility "Extract Layers Info" is python3 scripts (recommended Python 3.6 or high
 information about layers in neural networks IRs. Later this information could be saved in various formats: csv, html, etc.
 Current version supports csv-files.
 
-### How to obtain cvs-files and upload them into spreadsheet
-In this scenario data about IR's layers are written to cvs-files for transformations into spreadsheet.
+### How to obtain csv-files and upload them into spreadsheet
+In this scenario data about IR's layers are written to csv-files for transformations into spreadsheet.
 During working process script shows list of directories where it finds IRs and list of corresponding csv-files.
 
 1. Goto script directory:
 
@@ -19,6 +19,3 @@ endif()
 add_subdirectory(vpux_utils)
 
 add_subdirectory(vpux_compiler)
-
-if((BUILD_SHARED_LIBS AND ENABLE_MLIR_COMPILER))
-endif()
@@ -88,6 +88,26 @@ include(cmake/add_npu_library.cmake)
 
 set(VPUX_COMPILER_VERSION_FILE ${CMAKE_CURRENT_SOURCE_DIR}/src/compiler_version.cpp)
 
+# compiler repo git commit hash
+set(COMMIT_HASH_PATTERN ${CMAKE_CURRENT_SOURCE_DIR}/include/vpux/compiler/compiler_hash.hpp.in)
+set(COMMIT_HASH_FILE ${CMAKE_CURRENT_BINARY_DIR}/include/vpux/compiler/compiler_hash.hpp)
+set(COMMIT_HASH_CACHE ${CMAKE_CURRENT_BINARY_DIR}/include/vpux/compiler/compiler_hash.hpp.pre)
+
+add_custom_target(update_compiler_hash ALL
+    COMMAND ${CMAKE_COMMAND}
+        -D REPO_DIR=${PROJECT_SOURCE_DIR}
+        -D COMMIT_HASH_PATTERN=${COMMIT_HASH_PATTERN}
+        -D COMMIT_HASH_FILE=${COMMIT_HASH_FILE}
+        -D COMMIT_HASH_CACHE=${COMMIT_HASH_CACHE}
+        -P ${PROJECT_SOURCE_DIR}/cmake/compiler_commit_hash.cmake
+    COMMENT "Updating compiler commit hash..."
+    BYPRODUCTS ${COMMIT_HASH_FILE}
+)
+
+add_library(compiler_commit_hash INTERFACE)
+target_include_directories(compiler_commit_hash INTERFACE ${CMAKE_CURRENT_BINARY_DIR}/include/vpux/compiler)
+add_dependencies(compiler_commit_hash update_compiler_hash)
+
 # Construct source files after BITCOMPACTOR configuration
 add_subdirectory(src)
 get_property(VPU_SRC_LIBS GLOBAL PROPERTY NPU_SRC_LIB_LIST)
 
@@ -66,4 +66,5 @@ function(add_npu_library name)
 
     update_npu_src_lib_list(${name})
     enable_warnings_as_errors(${name} WIN_STRICT)
+    ov_build_target_faster(${name} PCH_HEADER ${VPU_COMPILER_PCH_FILE})
 endfunction(add_npu_library)
@@ -7,20 +7,16 @@ add_custom_target(MLIRVPUXIncGenList)
 
 function(add_vpux_dialect dialect_namespace)
     set(LLVM_TARGET_DEFINITIONS dialect.td)
-    mlir_tablegen(dialect.hpp.inc -gen-dialect-decls -dialect=${dialect_namespace}
-    )
-    mlir_tablegen(dialect.cpp.inc -gen-dialect-defs -dialect=${dialect_namespace}
-    )
+    mlir_tablegen(dialect.hpp.inc -gen-dialect-decls -dialect=${dialect_namespace})
+    mlir_tablegen(dialect.cpp.inc -gen-dialect-defs -dialect=${dialect_namespace})
     add_public_tablegen_target(MLIRVPUX${dialect_namespace}DialectIncGen)
     add_dependencies(MLIRVPUXIncGenList MLIRVPUX${dialect_namespace}DialectIncGen)
 endfunction()
 
 function(add_vpux_ops dialect_namespace arch)
     set(LLVM_TARGET_DEFINITIONS ops.td)
-    mlir_tablegen(ops.hpp.inc -gen-op-decls
-    )
-    mlir_tablegen(ops.cpp.inc -gen-op-defs
-    )
+    mlir_tablegen(ops.hpp.inc -gen-op-decls)
+    mlir_tablegen(ops.cpp.inc -gen-op-defs)
     add_public_tablegen_target(MLIRVPUX${dialect_namespace}${arch}OpsIncGen)
     add_dependencies(MLIRVPUXIncGenList MLIRVPUX${dialect_namespace}${arch}OpsIncGen)
     if(arch STREQUAL GENERIC)
@@ -33,51 +29,42 @@ endfunction()
 function(add_vpux_ops_granular dialect_namespace arch target_dir ops_target)
     set(LLVM_TARGET_DEFINITIONS ${target_dir}/${ops_target}.td)
     file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/${target_dir})
-    mlir_tablegen(${target_dir}/${ops_target}.hpp.inc -gen-op-decls
-    )
-    mlir_tablegen(${target_dir}/${ops_target}.cpp.inc -gen-op-defs
-    )
+    mlir_tablegen(${target_dir}/${ops_target}.hpp.inc -gen-op-decls)
+    mlir_tablegen(${target_dir}/${ops_target}.cpp.inc -gen-op-defs)
     add_public_tablegen_target(MLIRVPUX${dialect_namespace}${arch}${ops_target}OpsIncGen)
     add_dependencies(MLIRVPUXIncGenList MLIRVPUX${dialect_namespace}${arch}${ops_target}OpsIncGen)
 endfunction()
 
 function(add_vpux_ops_interface ops_namespace doc_dir)
     set(LLVM_TARGET_DEFINITIONS ops_interfaces.td)
-    mlir_tablegen(ops_interfaces.hpp.inc -gen-op-interface-decls
-    )
-    mlir_tablegen(ops_interfaces.cpp.inc -gen-op-interface-defs
-    )
+    mlir_tablegen(ops_interfaces.hpp.inc -gen-op-interface-decls)
+    mlir_tablegen(ops_interfaces.cpp.inc -gen-op-interface-defs)
     add_public_tablegen_target(MLIRVPUX${ops_namespace}OpsInterfacesIncGen)
     add_mlir_doc(ops_interfaces _${ops_namespace}_ops_interfaces ${doc_dir} -gen-op-interface-docs)
     add_dependencies(MLIRVPUXIncGenList MLIRVPUX${ops_namespace}OpsInterfacesIncGen)
 endfunction()
 
 function(add_vpux_type_interface ops_namespace doc_dir)
     set(LLVM_TARGET_DEFINITIONS type_interfaces.td)
-    mlir_tablegen(type_interfaces.hpp.inc -gen-type-interface-decls
-    )
-    mlir_tablegen(type_interfaces.cpp.inc -gen-type-interface-defs
-    )
+    mlir_tablegen(type_interfaces.hpp.inc -gen-type-interface-decls)
+    mlir_tablegen(type_interfaces.cpp.inc -gen-type-interface-defs)
     add_public_tablegen_target(MLIRVPUX${ops_namespace}TypeInterfacesIncGen)
     add_mlir_doc(type_interfaces _${ops_namespace}_type_interfaces ${doc_dir} -gen-type-interface-docs)
     add_dependencies(MLIRVPUXIncGenList MLIRVPUX${ops_namespace}TypeInterfacesIncGen)
 endfunction()
 
 function(add_vpux_attr_interface ops_namespace doc_dir)
     set(LLVM_TARGET_DEFINITIONS attr_interfaces.td)
-    mlir_tablegen(attr_interfaces.hpp.inc -gen-attr-interface-decls
-    )
-    mlir_tablegen(attr_interfaces.cpp.inc -gen-attr-interface-defs
-    )
+    mlir_tablegen(attr_interfaces.hpp.inc -gen-attr-interface-decls)
+    mlir_tablegen(attr_interfaces.cpp.inc -gen-attr-interface-defs)
     add_public_tablegen_target(MLIRVPUX${ops_namespace}AttrInterfacesIncGen)
     add_mlir_doc(attr_interfaces _${ops_namespace}_attr_interfaces ${doc_dir} -gen-attr-interface-docs)
     add_dependencies(MLIRVPUXIncGenList MLIRVPUX${ops_namespace}AttrInterfacesIncGen)
 endfunction()
 
 function(add_vpux_pass ops_namespace doc_prefix doc_dir)
     set(LLVM_TARGET_DEFINITIONS passes.td)
-    mlir_tablegen(passes.hpp.inc -gen-pass-decls -name=${ops_namespace}
-    )
+    mlir_tablegen(passes.hpp.inc -gen-pass-decls -name=${ops_namespace})
     add_public_tablegen_target(MLIRVPUX${doc_prefix}PassesIncGen)
     add_mlir_doc(passes _${doc_prefix}_passes ${doc_dir} -gen-pass-doc)
     add_dependencies(MLIRVPUXIncGenList MLIRVPUX${doc_prefix}PassesIncGen)
@@ -88,27 +75,21 @@ function(add_vpux_attribute ops_namespace)
     cmake_parse_arguments(ARG "${options}" "" "" ${ARGN})
     set(LLVM_TARGET_DEFINITIONS attributes.td)
     if(ARG_ENABLE_VPUX_ENUMS)
-        mlir_tablegen(enums.hpp.inc -gen-enum-decls
-        )
-        mlir_tablegen(enums.cpp.inc -gen-enum-defs
-        )
+        mlir_tablegen(enums.hpp.inc -gen-enum-decls)
+        mlir_tablegen(enums.cpp.inc -gen-enum-defs)
     endif()
     if(ARG_ENABLE_VPUX_ATTR)
-        mlir_tablegen(attributes.hpp.inc -gen-attrdef-decls
-        )
-        mlir_tablegen(attributes.cpp.inc -gen-attrdef-defs
-        )
+        mlir_tablegen(attributes.hpp.inc -gen-attrdef-decls)
+        mlir_tablegen(attributes.cpp.inc -gen-attrdef-defs)
     endif()
     add_public_tablegen_target(MLIRVPUX${ops_namespace}AttrIncGen)
     add_dependencies(MLIRVPUXIncGenList MLIRVPUX${ops_namespace}AttrIncGen)
 endfunction()
 
 function(add_vpux_type ops_namespace)
     set(LLVM_TARGET_DEFINITIONS types.td)
-    mlir_tablegen(types.hpp.inc -gen-typedef-decls --typedefs-dialect=${ops_namespace}
-    )
-    mlir_tablegen(types.cpp.inc -gen-typedef-defs --typedefs-dialect=${ops_namespace}
-    )
+    mlir_tablegen(types.hpp.inc -gen-typedef-decls --typedefs-dialect=${ops_namespace})
+    mlir_tablegen(types.cpp.inc -gen-typedef-defs --typedefs-dialect=${ops_namespace})
     add_public_tablegen_target(MLIRVPUX${ops_namespace}TypesIncGen)
     add_dependencies(MLIRVPUXIncGenList MLIRVPUX${ops_namespace}TypesIncGen)
 endfunction()
@@ -131,8 +112,7 @@ endfunction()
 
 function(add_vpux_rewrite td_file ops_namespace)
     set(LLVM_TARGET_DEFINITIONS rewriters/${td_file}.td)
-    mlir_tablegen(${td_file}.hpp.inc -gen-rewriters
-    )
+    mlir_tablegen(${td_file}.hpp.inc -gen-rewriters)
     add_public_tablegen_target(MLIRVPUX${ops_namespace}RewriterIncGen)
     add_dependencies(MLIRVPUXIncGenList MLIRVPUX${ops_namespace}RewriterIncGen)
 endfunction()
@@ -369,6 +369,51 @@ Example:
 #include "vpux/compiler/init.hpp"
 ```
 
+### Forward declarations
+
+Some headers can grow very large in time, which can negatively affect the build time of the project. During the project build, the headers need to be processed for all of the files that include them. As a consequence, if the headers are large, a significant amount of time will be spent processing the same header. One way to prevent this is to avoid having large headers included in other headers, by using forward declarations where possible. For example:
+
+```cpp
+// BAD: `VPUIP/IR/ops.hpp` contains the definitions of all of the operations in the VPUIP dialect,
+//      which can result in tens of thousands of lines included in the file, while this header only needs to know that VPUIP::NCEClusterTaskOp exists
+#include "vpux/compiler/dialect/VPUIP/IR/ops.hpp"
+vpux::VPUIP::NCEClusterTaskOp processNCEOp(vpux::VPUIP::NCEClusterTaskOp);
+
+// OK: the `vpux::VPUIP::NCEClusterTaskOp` symbol is forward-declared, so it is known to the rest of the header; the large `VPUIP/IR/ops.hpp` does not need to be included
+namespace vpux::VPUIP {
+class NCEClusterTaskOp;
+}
+vpux::VPUIP::NCEClusterTaskOp processNCEOp(vpux::VPUIP::NCEClusterTaskOp);
+```
+
+By using forward declarations, the definition of a symbol is no longer known to the file. The declaration is sufficient for simple cases like the one above, where the symbol is used in the signature of a function (as an argument or a return value). Forward declaration however is not usable in case the definition of a symbol is necessary, such as when the size of the object or its internal details (e.g. methods) must be known. Examples:
+
+```cpp
+// The `getInput` method of `vpux::VPUIP::NCEClusterTaskOp` is accessed, so its definition must be known
+// Note: the function is inline, so it can be defined in a header file
+inline mlir::Value getNCEInput(vpux::VPUIP::NCEClusterTaskOp nceOp) {
+    return nceOp.getInput();
+}
+
+// The definitions of `mlir::IntegerAttr` and `mlir::MLIRContext` must be known
+// Note: the function is a template one, so it can be defined in a header file
+template <typename T>
+mlir::IntegerAttr getIntAttr(mlir::MLIRContext* ctx, T val) {
+    return mlir::IntegerAttr::get(mlir::IntegerType::get(ctx, 64), val);
+}
+
+// The definition of `vpux::VPUIP::NCEClusterTaskOp` must be known, for it to have a default value
+void processNCEOp(vpux::VPUIP::NCEClusterTaskOp = nullptr);
+```
+
+Based on these examples, it should be clear when it is feasible to use forward declarations. In case the definition of a symbol is necessary because it is used in template / inline functions or because it has a default value, consideration should be given to refactoring this code. It is possible under many circumstances to move such function definitions to source files or to avoid default arguments, especially when the build time of the project might negatively suffer.
+
+When using forward declaration for a symbol, its definition will likely be necessary in the source file(s) that use it. For the first example above, the source file that defines the `processNCEOp` method will need to include the `VPUIP/IR/ops.hpp` header. The benefit of this approach is that only this source file will have to process `VPUIP/IR/ops.hpp` during the project build, instead of all of the sources which include the header that declares `processNCEOp`.
+
+**Note:** The forward declaration of a symbol must be the identical to the definition. For example, if an enum is defined as `enum class MyEnum : uint64_t {...}`, its forward declaration must reflect this type as well: `enum class MyEnum : uint64_t;`. Otherwise, the project will fail to build with an error.
+
+**Note:** In case the definition of a symbol is necessary, but a translation unit only has access to its declaration, an incomplete type error will appear during build. To solve this, the header which contains the symbol's definition should be included in the source that requires it.
+
 ## Patterns
 
 ### Return Early Pattern
@@ -525,6 +570,13 @@ const auto newType = callOnShapeOf(type, [&](const auto& shape) {
 
 <u>Note:</u> The return type of the callable must be the same regardless of the input shape type.
 
+<u>Note:</u> It is recommended to use the `getBoundedShape/getBoundedMemShape` helper functions instead of `getShape/getMemShape` whenever static or dynamic shapes are possible.
+ * Prefer `getBoundedShape/getBoundedMemShape` if shapes might be dynamic, and logic should operate on upper bounds in that case.
+ `getBoundedShape/getBoundedMemShape` returns the upper bounds of the shape if it is bounded, otherwise returns the shape itself.
+ * Use `getShape/getMemShape` when you specifically need to know which dimensions are dynamic. Be aware that for dynamic dimensions, `getShape/getMemShape` will return `mlir::ShapedType::kDynamic` as the value. Ensure your logic correctly handles this case and works with IR as expected.
+ * Use `getShape/getMemShape` if you are certain that the operation will only ever encounter static shapes. In this case, all dimensions are known at compile time, and `getShape` will return concrete values for each dimension.
+
+This helps ensure that any logic relying on shape information is robust to both static and dynamic cases and avoids unexpected behavior when working with tensors or arrays whose dimensions may not be fully known at compile time.
 ### Using method 'llvm::make_early_inc_range'
 
 This method is useful when you iterate through a list of objects and you need to change this range at the same time. From the description:
@@ -850,7 +902,7 @@ void implementPattern(IE::ConvolutionOp origOp) {
 However, `mlir::UnknownLoc` and `mlir::NameLoc` can be used in cases, where operation does not originate from original nGraph network or represents compiler internals. Usually such operations belongs to top-level module op. PSS tests and mapped-inference related passes are also an exception.
 
 ```cpp
-innerBuilder.create<IE::MemoryResourceOp>(mlir::UnknownLoc::get(ctx), memSpace.getLeafReference(), byteSizeAttr, nullptr); // OK, memory reservation is not a real operation
+innerBuilder.create<config::MemoryResourceOp>(mlir::UnknownLoc::get(ctx), memSpace.getLeafReference(), byteSizeAttr, nullptr); // OK, memory reservation is not a real operation
 
 auto newFuncOp = innerModuleBuilder.create<mlir::func::FuncOp>(mlir::UnknownLoc::get(ctx), functionName, funcType); // OK, funcOp here is external shave function
 
 
@@ -139,7 +139,7 @@ where `strategy` is `IGreedilyPassStrategy` and it can be implemented in differe
 // 37XX
 void UnrollDistributedOpsStrategy::addPatterns(mlir::RewritePatternSet& patterns) {
     auto module = _func->getParentOfType<mlir::ModuleOp>();
-    auto dmaOp = IE::getAvailableExecutor(module, VPU::ExecutorKind::DMA_NN);
+    auto dmaOp = config::getAvailableExecutor(module, VPU::ExecutorKind::DMA_NN);
     auto dmaPortCount = dmaOp.getCount();
 
     patterns.add<VPUIP::ClusterDMARewriter>(&_ctx, dmaPortCount, _log);
@@ -150,7 +150,7 @@ void UnrollDistributedOpsStrategy::addPatterns(mlir::RewritePatternSet& patterns
 // 40XX
 void UnrollDistributedOpsStrategy::addPatterns(mlir::RewritePatternSet& patterns) {
     auto module = _func->getParentOfType<mlir::ModuleOp>();
-    auto dmaOp = IE::getAvailableExecutor(module, VPU::ExecutorKind::DMA_NN);
+    auto dmaOp = config::getAvailableExecutor(module, VPU::ExecutorKind::DMA_NN);
     auto dmaPortCount = dmaOp.getCount();
 
     patterns.add<VPUIP::ClusterDMARewriter>(&_ctx, dmaPortCount, _log);