diff --git a/src/plugins/intel_npu/src/al/CMakeLists.txt b/src/plugins/intel_npu/src/al/CMakeLists.txt
index 7bdb9ccd7a1b6b..8e5383728338d7 100644
--- a/src/plugins/intel_npu/src/al/CMakeLists.txt
+++ b/src/plugins/intel_npu/src/al/CMakeLists.txt
@@ -24,6 +24,7 @@ target_link_libraries(${TARGET_NAME}
     PUBLIC
         openvino::npu_logger_utils
         openvino::runtime::dev
+        openvino_xml_util
 )
 
 set_target_properties(${TARGET_NAME} PROPERTIES INTERPROCEDURAL_OPTIMIZATION_RELEASE ${ENABLE_LTO})
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp
index 50d7b85607ca21..dad4a271fadbd4 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/config/options.hpp
@@ -1426,4 +1426,18 @@ struct USE_BASE_MODEL_SERIALIZER final : OptionBase<USE_BASE_MODEL_SERIALIZER, b
     }
 };
 
+struct SERIALIZATION_WEIGHTS_SIZE_THRESHOLD final : OptionBase<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD, size_t> {
+    static std::string_view key() {
+        return ov::intel_npu::serialization_weights_size_threshold.name();
+    }
+
+    static size_t defaultValue() {
+        return 0;
+    }
+
+    static OptionMode mode() {
+        return OptionMode::RunTime;
+    }
+};
+
 }  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp
index c485af1e0a3d1a..c0f727f307aea2 100644
--- a/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/npu_private_properties.hpp
@@ -357,12 +357,21 @@ static constexpr ov::Property<bool> weightless_blob{"NPU_WEIGHTLESS_BLOB"};
  *
  * The base serializer is the OV implementation of the "XmlSerializer" without any extensions. All weights are copied in
  * a separate buffer. By turning this off, the NPU extension of the serializer is enabled. This allows optimizing the
- * process by avoiding copies into a separate weights buffer. However, this solution may be less reliable.
- *
- * @note This option doesn't actually do anything right now, it has been registered in advance.
+ * process by reducing the amount of weights that will be copied in a separate buffer. However, this solution may be
+ * less reliable.
  */
 static constexpr ov::Property<bool> use_base_model_serializer{"NPU_USE_BASE_MODEL_SERIALIZER"};
 
+/**
+ * @brief [Only for NPU Plugin]
+ * Type: size_t. Default is 0.
+ *
+ * Effective only if "use_base_model_serializer" is set to false. All "ov::Constant" buffers smaller than this value
+ * (byte size) will be copied in a separate buffer. The rest of the weights will be reconstructed at deserialization
+ * time using buffer pointers.
+ */
+static constexpr ov::Property<size_t> serialization_weights_size_threshold{"NPU_SERIALIZATION_WEIGHTS_SIZE_THRESHOLD"};
+
 /**
  * @brief [Experimental, only for NPU Plugin]
  * Type: integer.
diff --git a/src/plugins/intel_npu/src/al/include/intel_npu/weights_pointer_attribute.hpp b/src/plugins/intel_npu/src/al/include/intel_npu/weights_pointer_attribute.hpp
new file mode 100644
index 00000000000000..e71923d7ee3f70
--- /dev/null
+++ b/src/plugins/intel_npu/src/al/include/intel_npu/weights_pointer_attribute.hpp
@@ -0,0 +1,46 @@
+//
+// Copyright (C) 2025 Intel Corporation.
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <string_view>
+
+#include "openvino/core/runtime_attribute.hpp"
+
+namespace intel_npu {
+
+/**
+ * @brief Attribute containing the memory address of a weights buffer and the size of the buffer in bytes.
+ * @details Used as part of the serialization/deserialization algorithms in order to allow processing models without
+ * copying weights.
+ */
+class WeightsPointerAttribute : public ov::RuntimeAttribute {
+public:
+    OPENVINO_RTTI("WeightsPointerAttribute", "0", RuntimeAttribute);
+
+    WeightsPointerAttribute() = delete;
+
+    WeightsPointerAttribute(const void* pointer, const size_t size)
+        : memory_pointer(reinterpret_cast<size_t>(pointer)),
+          byte_size(size) {}
+
+    /**
+     * @note The names of the attributes have been kept short in order to save some memory (there may be a lot of
+     * "ov::Constant" nodes in a model). Also, three characters should be sufficient to avoid collisions.
+     */
+    static constexpr const std::string_view POINTER_KEY = "mpZ";
+    static constexpr const std::string_view BYTE_SIZE_KEY = "msZ";
+
+    bool visit_attributes(ov::AttributeVisitor& visitor) override {
+        visitor.on_attribute(POINTER_KEY.data(), memory_pointer);
+        visitor.on_attribute(BYTE_SIZE_KEY.data(), byte_size);
+        return true;
+    }
+
+    size_t memory_pointer;
+    size_t byte_size;
+};
+
+}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp
index 9bf0cc8ce2bb20..f41ceef203ad60 100644
--- a/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp
+++ b/src/plugins/intel_npu/src/common/include/intel_npu/common/icompiler_adapter.hpp
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include "intel_npu/common/filtered_config.hpp"
 #include "intel_npu/common/igraph.hpp"
 
 namespace intel_npu {
@@ -11,7 +12,7 @@ namespace intel_npu {
 class ICompilerAdapter {
 public:
     virtual std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model,
-                                            const Config& config) const = 0;
+                                            const FilteredConfig& config) const = 0;
 
     /**
      * @brief Compiles the model, weights separation enabled.
@@ -27,7 +28,8 @@ class ICompilerAdapter {
      * "icompiler.hpp".
      * @return A "WeightlessGraph" type of object.
      */
-    virtual std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model, const Config& config) const = 0;
+    virtual std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model,
+                                              const FilteredConfig& config) const = 0;
 
     /**
      * @brief Parses the provided binary objects and returns a wrapper over the resulted L0 handles. The model may also
@@ -44,11 +46,12 @@ class ICompilerAdapter {
      */
     virtual std::shared_ptr<IGraph> parse(
         ov::Tensor mainBlob,
-        const Config& config,
+        const FilteredConfig& config,
         std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
         const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt) const = 0;
 
-    virtual ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const = 0;
+    virtual ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model,
+                                      const FilteredConfig& config) const = 0;
     virtual uint32_t get_version() const = 0;
     virtual std::vector<std::string> get_supported_options() const = 0;
     virtual bool is_option_supported(std::string optname) const = 0;
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/custom_stream_buffer.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/custom_stream_buffer.hpp
index 727132779f703f..bf7320368426c0 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/custom_stream_buffer.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/custom_stream_buffer.hpp
@@ -75,6 +75,11 @@ class writer_streambuf final : public std::streambuf {
         }
     }
 
+    pos_type seekpos(pos_type pos, std::ios_base::openmode which) override {
+        writeIt = startIt + pos;
+        return pos;
+    }
+
     OutputIt startIt;
     OutputIt writeIt;
 };
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp
index f1c1c302f2265c..89f0d6feb8b2e3 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/driver_compiler_adapter.hpp
@@ -10,6 +10,7 @@
 #include "intel_npu/config/config.hpp"
 #include "intel_npu/utils/logger/logger.hpp"
 #include "intel_npu/utils/zero/zero_init.hpp"
+#include "vcl_serializer.hpp"
 #include "ze_graph_ext_wrappers.hpp"
 
 namespace intel_npu {
@@ -18,17 +19,20 @@ class DriverCompilerAdapter final : public ICompilerAdapter {
 public:
     DriverCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct);
 
-    std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
+    std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model,
+                                    const FilteredConfig& config) const override;
 
-    std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model, const Config& config) const override;
+    std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model,
+                                      const FilteredConfig& config) const override;
 
     std::shared_ptr<IGraph> parse(
         ov::Tensor mainBlob,
-        const Config& config,
+        const FilteredConfig& config,
         std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
         const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt) const override;
 
-    ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
+    ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model,
+                              const FilteredConfig& config) const override;
 
     std::vector<std::string> get_supported_options() const override;
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ir_serializer.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ir_serializer.hpp
deleted file mode 100644
index e97c9b5aee22e1..00000000000000
--- a/src/plugins/intel_npu/src/compiler_adapter/include/ir_serializer.hpp
+++ /dev/null
@@ -1,82 +0,0 @@
-// Copyright (C) 2018-2025 Intel Corporation
-// SPDX-License-Identifier: Apache-2.0
-//
-
-#pragma once
-#include <iostream>
-#include <string>
-
-#include "intel_npu/config/config.hpp"
-#include "intel_npu/utils/logger/logger.hpp"
-#include "ze_graph_ext.h"
-
-namespace intel_npu {
-
-using SerializedIR = std::pair<size_t, std::shared_ptr<uint8_t>>;
-
-/**
- * @brief Contain all required transformation on OpenVINO model in case for external compiler usage and
- *  providing forward compatibility (OV model with opset N+M, external compiler with opset N)
- */
-namespace driver_compiler_utils {
-
-class IRSerializer {
-public:
-    IRSerializer(const std::shared_ptr<const ov::Model>& origModel, const uint32_t supportedOpset = 11);
-
-    size_t getXmlSize() const {
-        return _xmlSize;
-    }
-
-    size_t getWeightsSize() const {
-        return _weightsSize;
-    }
-
-    /**
-     * @brief Serialize OpenVINO model to target buffer
-     */
-    void serializeModelToBuffer(uint8_t* xml, uint8_t* weights);
-
-    /**
-     * @brief Serialize input / output information to string format.
-     * @details Format:
-     * --inputs_precisions="0:<input1Precision> [1:<input2Precision>]"
-     * --inputs_layouts="0:<input1Layout> [1:<input2Layout>]"
-     * --outputs_precisions="0:<output1Precision>"
-     * --outputs_layouts="0:<output1Layout>"
-     *
-     * For older compiler versions, the name of the inputs/outputs may be used instead of their indices.
-     *
-     * Since the layout information is no longer an important part of the metadata values when using the 2.0 OV
-     * API, the layout fields shall be filled with default values in order to assure the backward compatibility
-     * with the driver.
-     */
-    SerializedIR serializeIR(const std::shared_ptr<const ov::Model>& model,
-                             ze_graph_compiler_version_info_t compilerVersion,
-                             const uint32_t supportedOpsetVersion);
-
-    std::string serializeIOInfo(const std::shared_ptr<const ov::Model>& model, const bool useIndices);
-
-    std::string serializeConfig(const Config& config,
-                                ze_graph_compiler_version_info_t compilerVersion,
-                                bool turboSupported = false);
-
-private:
-    /**
-     * @brief Serialize OpenVINO model to target stream
-     */
-    void serializeModelToStream(std::ostream& xml, std::ostream& weights);
-
-    /**
-     * @brief Get size of xml and weights from model
-     */
-    void countModelSize();
-
-    Logger _logger;
-    std::shared_ptr<ov::Model> _model = nullptr;
-    uint32_t _supportedOpset = 11;
-    size_t _xmlSize = 0;
-    size_t _weightsSize = 0;
-};
-}  // namespace driver_compiler_utils
-}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp
index b62cb91b29791f..0675d964565947 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/plugin_compiler_adapter.hpp
@@ -19,17 +19,20 @@ class PluginCompilerAdapter final : public ICompilerAdapter {
 public:
     PluginCompilerAdapter(const std::shared_ptr<ZeroInitStructsHolder>& zeroInitStruct);
 
-    std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
+    std::shared_ptr<IGraph> compile(const std::shared_ptr<const ov::Model>& model,
+                                    const FilteredConfig& config) const override;
 
-    std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model, const Config& config) const override;
+    std::shared_ptr<IGraph> compileWS(const std::shared_ptr<ov::Model>& model,
+                                      const FilteredConfig& config) const override;
 
     std::shared_ptr<IGraph> parse(
         ov::Tensor mainBlob,
-        const Config& config,
+        const FilteredConfig& config,
         std::optional<std::vector<ov::Tensor>> initBlobs = std::nullopt,
         const std::optional<std::shared_ptr<const ov::Model>>& model = std::nullopt) const override;
 
-    ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model, const Config& config) const override;
+    ov::SupportedOpsMap query(const std::shared_ptr<const ov::Model>& model,
+                              const FilteredConfig& config) const override;
 
     std::vector<std::string> get_supported_options() const override;
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp
new file mode 100644
index 00000000000000..6b8f01b373a831
--- /dev/null
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/vcl_serializer.hpp
@@ -0,0 +1,142 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+#include <ze_graph_ext.h>
+
+#include <iostream>
+#include <string>
+
+#include "intel_npu/config/config.hpp"
+#include "intel_npu/utils/logger/logger.hpp"
+#include "openvino/core/model.hpp"
+#include "openvino/pass/manager.hpp"
+#include "ze_graph_ext.h"
+
+namespace intel_npu {
+
+using SerializedIR = std::pair<size_t, std::shared_ptr<uint8_t>>;
+
+/**
+ * @brief Contain all required transformation on OpenVINO model in case for external compiler usage and
+ *  providing forward compatibility (OV model with opset N+M, external compiler with opset N)
+ */
+namespace driver_compiler_utils {
+
+/**
+ * @brief Interface to be used by the serialization algorithms.
+ * @details The "VCL" serializer is meant to integrate an OV serializer and add any additional model metadata in order
+ * to feed the compilation method of the "VCL" interface.
+ */
+class VCLSerializerBase {
+public:
+    VCLSerializerBase(const std::shared_ptr<const ov::Model>& origModel,
+                      const ze_graph_compiler_version_info_t compilerVersion,
+                      const uint32_t supportedOpset = 11);
+
+    virtual SerializedIR serialize() = 0;
+
+    virtual ~VCLSerializerBase();
+
+protected:
+    void serialize_model_to_stream(const std::function<void(ov::pass::Manager&)>& register_serialization_pass);
+
+    Logger _logger;
+    std::shared_ptr<ov::Model> _model = nullptr;
+    ze_graph_compiler_version_info_t _compilerVersion;
+    uint32_t _supportedOpset = 11;
+};
+
+/**
+ * @brief Class implementing the legacy serialization algorithms. All weights are copied in a separate buffer.
+ */
+class VCLSerializerWithWeightsCopy : public VCLSerializerBase {
+public:
+    VCLSerializerWithWeightsCopy(const std::shared_ptr<const ov::Model>& origModel,
+                                 const ze_graph_compiler_version_info_t compilerVersion,
+                                 const uint32_t supportedOpset = 11);
+
+    SerializedIR serialize() override;
+
+private:
+    /**
+     * @brief Serialize OpenVINO model to target buffer
+     */
+    void serialize_model_to_buffer(uint8_t* xml, uint8_t* weights);
+
+    /**
+     * @brief Serialize OpenVINO model to target stream
+     */
+    void serialize_model_to_stream(std::ostream& xml, std::ostream& weights);
+
+    /**
+     * @brief Get size of xml and weights from model
+     */
+    void count_model_size();
+
+    size_t _xmlSize = 0;
+    size_t _weightsSize = 0;
+};
+
+/**
+ * @brief Class implementing the optimized serialization algorithm.
+ * @details Weights will be stored either as metadata (memory location & size in bytes) or as whole buffers (just like
+ * the legacy algorithm). The amount of weights that will be copied can be controlled by leveraging the
+ * "intel_npu::serialization_weights_size_threshold" config option.
+ */
+class VCLSerializerWithoutWeightsCopy : public VCLSerializerBase {
+public:
+    VCLSerializerWithoutWeightsCopy(const std::shared_ptr<const ov::Model>& origModel,
+                                    const ze_graph_compiler_version_info_t compilerVersion,
+                                    const uint32_t supportedOpset = 11);
+
+    SerializedIR serialize() override;
+
+private:
+    void serialize_model_to_buffer(uint8_t* buffer);
+
+    void serialize_model_to_stream(std::ostream& stream);
+
+    void count_model_size();
+
+    uint64_t _serializedModelSize = 0;
+};
+
+/**
+ * @brief Serializes the model using a format supported by the "VCL" interface.
+ *
+ * @param supportedOpsetVersion The last operators set version supported by the compiler.
+ * @param useBaseModelSerializer "true" means the legacy serializer will be used (weights will be copied), "false" means
+ * the optimized one is used instead (weights pointers are stored).
+ * @param weightsSizeThreshold Relevant only if "useBaseModelSerializer" is false. The weights smaller than this value
+ * will be copied into a separate buffer. The rest will have only their memory location stored.
+ */
+SerializedIR serializeIR(const std::shared_ptr<const ov::Model>& model,
+                         ze_graph_compiler_version_info_t compilerVersion,
+                         const uint32_t supportedOpsetVersion,
+                         const bool useBaseModelSerializer = true,
+                         const size_t weightsSizeThreshold = 0);
+
+/**
+ * @brief Serialize input / output information to string format.
+ * @details Format:
+ * --inputs_precisions="0:<input1Precision> [1:<input2Precision>]"
+ * --inputs_layouts="0:<input1Layout> [1:<input2Layout>]"
+ * --outputs_precisions="0:<output1Precision>"
+ * --outputs_layouts="0:<output1Layout>"
+ *
+ * For older compiler versions, the name of the inputs/outputs may be used instead of their indices.
+ *
+ * Since the layout information is no longer an important part of the metadata values when using the 2.0 OV
+ * API, the layout fields shall be filled with default values in order to assure the backward compatibility
+ * with the driver.
+ */
+std::string serializeIOInfo(const std::shared_ptr<const ov::Model>& model, const bool useIndices);
+
+std::string serializeConfig(const Config& config,
+                            ze_graph_compiler_version_info_t compilerVersion,
+                            bool turboSupported = false);
+
+}  // namespace driver_compiler_utils
+}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/xml_serializer.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/xml_serializer.hpp
new file mode 100644
index 00000000000000..689114fbdf8543
--- /dev/null
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/xml_serializer.hpp
@@ -0,0 +1,106 @@
+//
+// Copyright (C) 2025 Intel Corporation.
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include "openvino/pass/serialize.hpp"
+#include "openvino/xml_util/xml_serialize_util.hpp"
+
+namespace intel_npu {
+
+/**
+ * @brief Nothing is stored. The weights are expected to be reconstruscted in some other way.
+ */
+class WeightlessWriter : public ov::util::ConstantWriter {
+public:
+    explicit WeightlessWriter(ov::util::ConstantWriter& other) : ov::util::ConstantWriter(other) {}
+
+    FilePosition write(const char*, size_t, size_t&, bool, ov::element::Type, bool) override {
+        return 0;
+    }
+};
+
+/**
+ * @brief Overriden in order to allow serializing models without copying weights.
+ * @details Weights can be stored either as values (buffer copies, just like the parent algorithm), or as metadata
+ * (memory location + buffer size in bytes). The amount of weights that are copied as values can be controlled by
+ * configuring the "intel_npu::serialization_weights_size_threshold" option.
+ */
+class XmlSerializer : public ov::util::XmlSerializer {
+public:
+    XmlSerializer(pugi::xml_node& data,
+                  const std::string& node_type_name,
+                  ov::util::ConstantWriter& constant_write_handler,
+                  int64_t version,
+                  std::shared_ptr<WeightlessWriter> weightless_constant_writer = nullptr)
+        : ov::util::XmlSerializer(data,
+                                  node_type_name,
+                                  constant_write_handler,
+                                  version,
+                                  false,
+                                  false,
+                                  ov::element::dynamic,
+                                  false),
+          m_base_constant_writer(std::ref(constant_write_handler)),
+          m_weightless_constant_writer(weightless_constant_writer
+                                           ? weightless_constant_writer
+                                           : std::make_shared<WeightlessWriter>(constant_write_handler)) {}
+
+private:
+    /**
+     * @brief Toggles between the two writers.
+     */
+    ov::util::ConstantWriter& get_constant_write_handler() override;
+
+    /**
+     * @brief Overriden in order to choose which weights writer will be used based on the occurrence of the
+     * "WeightsPointerAttribute".
+     */
+    bool append_node_attributes(ov::Node& node) override;
+
+    std::unique_ptr<ov::util::XmlSerializer> make_visitor(pugi::xml_node& data,
+                                                          const std::string& node_type_name,
+                                                          ov::util::ConstantWriter& constant_write_handler,
+                                                          int64_t version,
+                                                          bool,
+                                                          bool,
+                                                          ov::element::Type,
+                                                          bool) const override;
+
+    /**
+     * @brief The base OV writer, copies the weights in a dedicated buffer.
+     */
+    std::reference_wrapper<ov::util::ConstantWriter> m_base_constant_writer;
+    /**
+     * @brief Writes nothing. The visitor pattern will be used in order to store weights metadata instead.
+     */
+    std::shared_ptr<WeightlessWriter> m_weightless_constant_writer = nullptr;
+    bool m_use_weightless_writer = false;
+};
+
+/**
+ * @brief Leverages the "intel_npu::XmlSerializer" in order to allow serializing models without copying weights.
+ */
+class StreamSerialize : public ov::pass::StreamSerialize {
+public:
+    StreamSerialize(std::ostream& stream,
+                    const std::function<void(std::ostream&)>& custom_data_serializer,
+                    ov::pass::Serialize::Version version = ov::pass::Serialize::Version::UNSPECIFIED)
+        : ov::pass::StreamSerialize(stream, custom_data_serializer, {}, version) {}
+
+private:
+    std::unique_ptr<ov::util::XmlSerializer> make_serializer(pugi::xml_node& data,
+                                                             const std::string& node_type_name,
+                                                             ov::util::ConstantWriter& constant_write_handler,
+                                                             int64_t version,
+                                                             bool,
+                                                             bool,
+                                                             ov::element::Type,
+                                                             bool) const override {
+        return std::make_unique<XmlSerializer>(data, node_type_name, constant_write_handler, version);
+    }
+};
+
+}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp
index 94a058650019e0..505c988e41151c 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/include/ze_graph_ext_wrappers.hpp
@@ -10,7 +10,7 @@
 #include "intel_npu/network_metadata.hpp"
 #include "intel_npu/utils/logger/logger.hpp"
 #include "intel_npu/utils/zero/zero_init.hpp"
-#include "ir_serializer.hpp"
+#include "vcl_serializer.hpp"
 
 namespace intel_npu {
 
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp
index cd459b604f3f6c..df8209a23e39fd 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/driver_compiler_adapter.cpp
@@ -11,13 +11,14 @@
 #include "intel_npu/common/itt.hpp"
 #include "intel_npu/config/options.hpp"
 #include "intel_npu/utils/logger/logger.hpp"
-#include "ir_serializer.hpp"
 #include "mem_usage.hpp"
 #include "openvino/core/model.hpp"
 #include "openvino/core/rt_info/weightless_caching_attributes.hpp"
+#include "vcl_serializer.hpp"
 #include "weightless_graph.hpp"
 
 namespace {
+
 bool isInitMetadata(const intel_npu::NetworkMetadata& networkMetadata) {
     if (networkMetadata.inputs.size() == 0) {
         return false;
@@ -80,7 +81,7 @@ DriverCompilerAdapter::DriverCompilerAdapter(const std::shared_ptr<ZeroInitStruc
 }
 
 std::shared_ptr<IGraph> DriverCompilerAdapter::compile(const std::shared_ptr<const ov::Model>& model,
-                                                       const Config& config) const {
+                                                       const FilteredConfig& config) const {
     OV_ITT_TASK_CHAIN(COMPILE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "compile");
 
     const ze_graph_compiler_version_info_t& compilerVersion = _compilerProperties.compilerVersion;
@@ -88,17 +89,24 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::compile(const std::shared_ptr<con
     _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion);
 
     _logger.debug("serialize IR");
-    driver_compiler_utils::IRSerializer irSerializer(model, maxOpsetVersion);
-    SerializedIR serializedIR = irSerializer.serializeIR(model, compilerVersion, maxOpsetVersion);
+
+    auto serializedIR = driver_compiler_utils::serializeIR(
+        model,
+        compilerVersion,
+        maxOpsetVersion,
+        config.isAvailable(ov::intel_npu::use_base_model_serializer.name()) ? config.get<USE_BASE_MODEL_SERIALIZER>()
+                                                                            : true,
+        config.get<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
 
     std::string buildFlags;
     const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9));
 
     _logger.debug("build flags");
-    buildFlags += irSerializer.serializeIOInfo(model, useIndices);
+    buildFlags += driver_compiler_utils::serializeIOInfo(model, useIndices);
     buildFlags += " ";
-    buildFlags +=
-        irSerializer.serializeConfig(config, compilerVersion, _zeGraphExt->isTurboOptionSupported(compilerVersion));
+    buildFlags += driver_compiler_utils::serializeConfig(config,
+                                                         compilerVersion,
+                                                         _zeGraphExt->isTurboOptionSupported(compilerVersion));
 
     _logger.debug("compileIR Build flags : %s", buildFlags.c_str());
 
@@ -121,7 +129,7 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::compile(const std::shared_ptr<con
 }
 
 std::shared_ptr<IGraph> DriverCompilerAdapter::compileWS(const std::shared_ptr<ov::Model>& model,
-                                                         const Config& config) const {
+                                                         const FilteredConfig& config) const {
     OV_ITT_TASK_CHAIN(COMPILE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "compileWS");
 
     storeWeightlessCacheAttribute(model);
@@ -144,13 +152,18 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::compileWS(const std::shared_ptr<o
     }
 
     _logger.debug("serialize IR");
-    driver_compiler_utils::IRSerializer irSerializer(model, maxOpsetVersion);
-    SerializedIR serializedIR = irSerializer.serializeIR(model, compilerVersion, maxOpsetVersion);
+    auto serializedIR = driver_compiler_utils::serializeIR(
+        model,
+        compilerVersion,
+        maxOpsetVersion,
+        config.isAvailable(ov::intel_npu::use_base_model_serializer.name()) ? config.get<USE_BASE_MODEL_SERIALIZER>()
+                                                                            : true,
+        config.get<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
 
     std::string buildFlags;
     const bool useIndices = !((compilerVersion.major < 5) || (compilerVersion.major == 5 && compilerVersion.minor < 9));
 
-    const std::string serializedIOInfo = irSerializer.serializeIOInfo(model, useIndices);
+    const std::string serializedIOInfo = driver_compiler_utils::serializeIOInfo(model, useIndices);
     const FilteredConfig* plgConfig = dynamic_cast<const FilteredConfig*>(&config);
     if (plgConfig == nullptr) {
         OPENVINO_THROW("config is not FilteredConfig");
@@ -177,7 +190,7 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::compileWS(const std::shared_ptr<o
         _logger.debug("build flags");
         buildFlags = serializedIOInfo;
         buildFlags += " ";
-        buildFlags += irSerializer.serializeConfig(updatedConfig, compilerVersion);
+        buildFlags += driver_compiler_utils::serializeConfig(updatedConfig, compilerVersion);
 
         _logger.debug("compile start");
         // If UMD Caching is requested to be bypassed or if OV cache is enabled, disable driver caching
@@ -224,7 +237,7 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::compileWS(const std::shared_ptr<o
 
 std::shared_ptr<IGraph> DriverCompilerAdapter::parse(
     ov::Tensor mainBlob,
-    const Config& config,
+    const FilteredConfig& config,
     std::optional<std::vector<ov::Tensor>> initBlobs,
     const std::optional<std::shared_ptr<const ov::Model>>& model) const {
     OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "parse");
@@ -278,7 +291,7 @@ std::shared_ptr<IGraph> DriverCompilerAdapter::parse(
 }
 
 ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr<const ov::Model>& model,
-                                                 const Config& config) const {
+                                                 const FilteredConfig& config) const {
     OV_ITT_TASK_CHAIN(query_BLOB, itt::domains::NPUPlugin, "DriverCompilerAdapter", "query");
 
     const ze_graph_compiler_version_info_t& compilerVersion = _compilerProperties.compilerVersion;
@@ -286,11 +299,16 @@ ov::SupportedOpsMap DriverCompilerAdapter::query(const std::shared_ptr<const ov:
     _logger.info("getSupportedOpsetVersion Max supported version of opset in CiD: %d", maxOpsetVersion);
 
     _logger.debug("serialize IR");
-    driver_compiler_utils::IRSerializer irSerializer(model, maxOpsetVersion);
-    SerializedIR serializedIR = irSerializer.serializeIR(model, compilerVersion, maxOpsetVersion);
+    auto serializedIR = driver_compiler_utils::serializeIR(
+        model,
+        compilerVersion,
+        maxOpsetVersion,
+        config.isAvailable(ov::intel_npu::use_base_model_serializer.name()) ? config.get<USE_BASE_MODEL_SERIALIZER>()
+                                                                            : true,
+        config.get<SERIALIZATION_WEIGHTS_SIZE_THRESHOLD>());
 
     std::string buildFlags;
-    buildFlags += irSerializer.serializeConfig(config, compilerVersion);
+    buildFlags += driver_compiler_utils::serializeConfig(config, compilerVersion);
     _logger.debug("queryImpl build flags : %s", buildFlags.c_str());
 
     ov::SupportedOpsMap result;
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
index 1a3a1a51ac6751..c58ede410e9b23 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/plugin_compiler_adapter.cpp
@@ -91,7 +91,7 @@ PluginCompilerAdapter::PluginCompilerAdapter(const std::shared_ptr<ZeroInitStruc
 }
 
 std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<const ov::Model>& model,
-                                                       const Config& config) const {
+                                                       const FilteredConfig& config) const {
     OV_ITT_TASK_CHAIN(COMPILE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "compile");
 
     _logger.debug("compile start");
@@ -124,7 +124,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compile(const std::shared_ptr<con
 }
 
 std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<ov::Model>& model,
-                                                         const Config& config) const {
+                                                         const FilteredConfig& config) const {
     OV_ITT_TASK_CHAIN(COMPILE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "compileWS");
 
     std::vector<std::shared_ptr<NetworkDescription>> initNetworkDescriptions;
@@ -257,7 +257,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::compileWS(const std::shared_ptr<o
 
 std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
     ov::Tensor mainBlob,
-    const Config& config,
+    const FilteredConfig& config,
     std::optional<std::vector<ov::Tensor>> initBlobs,
     const std::optional<std::shared_ptr<const ov::Model>>& model) const {
     OV_ITT_TASK_CHAIN(PARSE_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "parse");
@@ -331,7 +331,7 @@ std::shared_ptr<IGraph> PluginCompilerAdapter::parse(
 }
 
 ov::SupportedOpsMap PluginCompilerAdapter::query(const std::shared_ptr<const ov::Model>& model,
-                                                 const Config& config) const {
+                                                 const FilteredConfig& config) const {
     OV_ITT_TASK_CHAIN(QUERY_BLOB, itt::domains::NPUPlugin, "PluginCompilerAdapter", "query");
 
     return _compiler->query(model, config);
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/ir_serializer.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp
similarity index 65%
rename from src/plugins/intel_npu/src/compiler_adapter/src/ir_serializer.cpp
rename to src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp
index 723c27bc1cf38d..731e6ce06dde3f 100644
--- a/src/plugins/intel_npu/src/compiler_adapter/src/ir_serializer.cpp
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/vcl_serializer.cpp
@@ -2,20 +2,24 @@
 // SPDX-License-Identifier: Apache-2.0
 //
 
-#include "ir_serializer.hpp"
+#include "vcl_serializer.hpp"
 
 #include <cstdint>
+#include <istream>
 #include <mutex>
 #include <regex>
+#include <streambuf>
 
 #include "custom_stream_buffer.hpp"
 #include "intel_npu/common/filtered_config.hpp"
 #include "intel_npu/config/options.hpp"
-#include "openvino/pass/manager.hpp"
+#include "intel_npu/weights_pointer_attribute.hpp"
 #include "openvino/pass/serialize.hpp"
 #include "transformations/op_conversions/convert_interpolate11_downgrade.hpp"
+#include "xml_serializer.hpp"
 
 namespace {
+
 constexpr std::string_view INPUTS_PRECISIONS_KEY = "--inputs_precisions";
 constexpr std::string_view INPUTS_LAYOUTS_KEY = "--inputs_layouts";
 constexpr std::string_view OUTPUTS_PRECISIONS_KEY = "--outputs_precisions";
@@ -136,12 +140,63 @@ std::string rankToLegacyLayoutString(const size_t rank) {
     }
 }
 
+/**
+ * @brief Stores weights metadata (memory location & buffer size in bytes) as runtime attributes of "ov::Constant"
+ * nodes.
+ * @details The presence of these attrbutes determines which weights are copied in a separate buffer by the
+ * serialization algorithm. If the attribute is found, the metadata required to reconstruct the weights buffer is
+ * present, therefore copying the buffer is omitted.
+ *
+ * @param model The target model, the attributes will be stored within it.
+ * @param weightSizeThreshold Determines which constant nodes will have this attribute stored within them. Implicitly,
+ * this determines which weights will be copied at serialization time. The weights smaller than this value will not get
+ * this attribute.
+ */
+void storeWeightsPointerAttribute(const std::shared_ptr<ov::Model>& model, const size_t weightSizeThreshold) {
+    for (auto&& node : model->get_ordered_ops()) {
+        if (!ov::is_type<ov::op::v0::Constant>(node)) {
+            continue;
+        }
+
+        auto constantNode = std::static_pointer_cast<ov::op::v0::Constant>(node);
+        if (constantNode->get_byte_size() < weightSizeThreshold) {
+            continue;
+        }
+
+        ov::RTMap& runtimeInfoMap = constantNode->get_rt_info();
+        runtimeInfoMap[intel_npu::WeightsPointerAttribute::get_type_info_static()] =
+            intel_npu::WeightsPointerAttribute(constantNode->get_data_ptr(), constantNode->get_byte_size());
+    }
+}
+
+/**
+ * @brief Removes the attributes stored by "storeWeightsPointerAttribute" in order to restore the model to its original
+ * state.
+ * @see storeWeightsPointerAttribute for details.
+ */
+void removeWeightsPointerAttribute(const std::shared_ptr<ov::Model>& model) {
+    for (auto&& node : model->get_ordered_ops()) {
+        if (!ov::is_type<ov::op::v0::Constant>(node)) {
+            continue;
+        }
+
+        ov::RTMap& runtimeInfoMap = node->get_rt_info();
+        const auto& resultIt = runtimeInfoMap.find(intel_npu::WeightsPointerAttribute::get_type_info_static());
+        if (resultIt != runtimeInfoMap.end()) {
+            runtimeInfoMap.erase(resultIt);
+        }
+    }
+}
+
 }  // namespace
 
 namespace intel_npu::driver_compiler_utils {
 
-IRSerializer::IRSerializer(const std::shared_ptr<const ov::Model>& origModel, const uint32_t supportedOpset)
-    : _logger("IRSerializer", Logger::global().level()),
+VCLSerializerBase::VCLSerializerBase(const std::shared_ptr<const ov::Model>& origModel,
+                                     const ze_graph_compiler_version_info_t compilerVersion,
+                                     const uint32_t supportedOpset)
+    : _logger("VCLSerializerBase", Logger::global().level()),
+      _compilerVersion(compilerVersion),
       _supportedOpset(supportedOpset) {
     // There is no const variant of run_passes so use const_cast here
     // as model serialization does not mutate the model
@@ -152,14 +207,29 @@ IRSerializer::IRSerializer(const std::shared_ptr<const ov::Model>& origModel, co
         _model = _model->clone();
         _logger.info("Clone model for offset smaller than 11");
     }
-
-    countModelSize();
 }
 
-void IRSerializer::serializeModelToStream(std::ostream& xml, std::ostream& weights) {
-    _logger.debug("serializeModelToStream");
+VCLSerializerBase::~VCLSerializerBase() = default;
+
+VCLSerializerWithWeightsCopy::VCLSerializerWithWeightsCopy(const std::shared_ptr<const ov::Model>& origModel,
+                                                           const ze_graph_compiler_version_info_t compilerVersion,
+                                                           const uint32_t supportedOpset)
+    : VCLSerializerBase(origModel, compilerVersion, supportedOpset) {
+    _logger.setName("VCLSerializerWithWeightsCopy");
+};
+
+VCLSerializerWithoutWeightsCopy::VCLSerializerWithoutWeightsCopy(const std::shared_ptr<const ov::Model>& origModel,
+                                                                 const ze_graph_compiler_version_info_t compilerVersion,
+                                                                 const uint32_t supportedOpset)
+    : VCLSerializerBase(origModel, compilerVersion, supportedOpset) {
+    _logger.setName("VCLSerializerWithoutWeightsCopy");
+};
+
+void VCLSerializerBase::serialize_model_to_stream(
+    const std::function<void(ov::pass::Manager&)>& register_serialization_pass) {
+    _logger.debug("serialize_model_to_stream");
     const auto passConfig = std::make_shared<ov::pass::PassConfig>();
-    ov::pass::Manager manager(std::move(passConfig), "NPU:serializeModelToStream");
+    ov::pass::Manager manager(std::move(passConfig), "NPU:serialize_model_to_stream");
 
     if (_supportedOpset < 11) {
         // Downgrade to opset10
@@ -167,18 +237,7 @@ void IRSerializer::serializeModelToStream(std::ostream& xml, std::ostream& weigh
         _logger.info("Downgrade op for opset smaller than 11");
     }
 
-    manager.register_pass<ov::pass::Serialize>(xml, weights);
-
-    // Depending on the driver version, the compiler attached to it may request this information as an indicator of the
-    // precision/layout preprocessing requirement. We are setting this value to "true" since the API version is no
-    // longer a cause for altering the metadata. This is due to the preprocessing performed in the OpenVINO framework's
-    // implementaion, the "ov::Model" object is preprocessed before reaching the NPU plugin.
-    const auto newAPIKey = "is_new_api";
-
-    // Flag used for indicating an NPU plugin version which switched the I/O identification convention from names to
-    // indices. The flag is required in order to inform the driver-compiler adapter to expect indices when attempting to
-    // deserialize the I/O metadata.
-    const auto useIndicesForIOMetadata = "use_indices_for_io_metadata";
+    register_serialization_pass(manager);
 
     // We modify the original model object here therefore a mutex is required
     static std::mutex rtInfoMutex;
@@ -186,58 +245,107 @@ void IRSerializer::serializeModelToStream(std::ostream& xml, std::ostream& weigh
     {
         std::lock_guard<std::mutex> lock(rtInfoMutex);
 
-        _model->set_rt_info(true, newAPIKey);
-        _model->set_rt_info(true, useIndicesForIOMetadata);
+        // Depending on the driver version, the compiler attached to it may request this information as an indicator of
+        // the
+        // precision/layout preprocessing requirement. We are setting this value to "true" since the API version is no
+        // longer a cause for altering the metadata. This is due to the preprocessing performed in the OpenVINO
+        // framework's implementaion, the "ov::Model" object is preprocessed before reaching the NPU plugin.
+        _model->set_rt_info(true, "is_new_api");
+        // Flag used for indicating an NPU plugin version which switched the I/O identification convention from names to
+        // indices. The flag is required in order to inform the driver-compiler adapter to expect indices when
+        // attempting to deserialize the I/O metadata.
+        _model->set_rt_info(true, "use_indices_for_io_metadata");
 
         manager.run_passes(_model);
 
         auto& rtInfo = _model->get_rt_info();
-        rtInfo.erase(newAPIKey);
-        rtInfo.erase(useIndicesForIOMetadata);
+        rtInfo.erase("is_new_api");
+        rtInfo.erase("use_indices_for_io_metadata");
     }
-    _logger.debug("serializeModelToStream end");
+    _logger.debug("serialize_model_to_stream end");
 }
 
-void IRSerializer::countModelSize() {
-    _logger.debug("countModelSize");
+void VCLSerializerWithWeightsCopy::serialize_model_to_stream(std::ostream& xml, std::ostream& weights) {
+    const std::function<void(ov::pass::Manager&)>& register_serialization_pass = [&](ov::pass::Manager& manager) {
+        manager.register_pass<ov::pass::Serialize>(xml, weights);
+    };
+    VCLSerializerBase::serialize_model_to_stream(register_serialization_pass);
+}
+
+void VCLSerializerWithoutWeightsCopy::serialize_model_to_stream(std::ostream& stream) {
+    const std::function<void(std::ostream&)>& compiler_version_serializer = [&](std::ostream& stream) {
+        stream.write(reinterpret_cast<const char*>(&_compilerVersion), sizeof(_compilerVersion));
+    };
+    const std::function<void(ov::pass::Manager&)>& register_serialization_pass = [&](ov::pass::Manager& manager) {
+        manager.register_pass<StreamSerialize>(stream, compiler_version_serializer);
+    };
+    VCLSerializerBase::serialize_model_to_stream(register_serialization_pass);
+}
+
+void VCLSerializerWithWeightsCopy::count_model_size() {
+    _logger.debug("count_model_size");
 
     counter_streambuf xmlStreamBuf;
     counter_streambuf weightsStreamBuf;
     std::ostream xmlStream(&xmlStreamBuf);
     std::ostream weightsStream(&weightsStreamBuf);
 
-    serializeModelToStream(xmlStream, weightsStream);
+    serialize_model_to_stream(xmlStream, weightsStream);
 
     _xmlSize = xmlStreamBuf.size();
     _weightsSize = weightsStreamBuf.size();
 
-    _logger.debug("countModelSize completed, xml size: %d, weights size: %d", _xmlSize, _weightsSize);
+    _logger.debug("count_model_size completed, xml size: %d, weights size: %d", _xmlSize, _weightsSize);
+}
+
+void VCLSerializerWithoutWeightsCopy::count_model_size() {
+    _logger.debug("count_model_size");
+
+    counter_streambuf streamBuf;
+    std::ostream stream(&streamBuf);
+
+    serialize_model_to_stream(stream);
+
+    _serializedModelSize = streamBuf.size();
+
+    _logger.debug("count_model_size completed, serialized model size: %d", _serializedModelSize);
 }
 
-void IRSerializer::serializeModelToBuffer(uint8_t* xml, uint8_t* weights) {
-    _logger.debug("serializeModelToBuffer");
+void VCLSerializerWithWeightsCopy::serialize_model_to_buffer(uint8_t* xml, uint8_t* weights) {
+    _logger.debug("serialize_model_to_buffer");
 
     writer_streambuf xmlStreamBuf(xml);
     writer_streambuf weightsStreamBuf(weights);
     std::ostream xmlStream(&xmlStreamBuf);
     std::ostream weightsStream(&weightsStreamBuf);
 
-    serializeModelToStream(xmlStream, weightsStream);
+    serialize_model_to_stream(xmlStream, weightsStream);
+
+    _logger.debug("serialize_model_to_buffer end");
+}
+
+void VCLSerializerWithoutWeightsCopy::serialize_model_to_buffer(uint8_t* buffer) {
+    _logger.debug("serialize_model_to_buffer");
 
-    _logger.debug("serializeModelToBuffer end");
+    writer_streambuf streamBuf(buffer);
+    std::ostream stream(&streamBuf);
+
+    serialize_model_to_stream(stream);
+
+    _logger.debug("serialize_model_to_buffer end");
 }
 
-SerializedIR IRSerializer::serializeIR(const std::shared_ptr<const ov::Model>& model,
-                                       ze_graph_compiler_version_info_t compilerVersion,
-                                       const uint32_t supportedOpsetVersion) {
+SerializedIR VCLSerializerWithWeightsCopy::serialize() {
+    count_model_size();
+
     // Contract between adapter and compiler in driver
     const uint32_t maxNumberOfElements = 10;
     const uint64_t maxSizeOfXML = std::numeric_limits<uint64_t>::max() / 3;
     const uint64_t maxSizeOfWeights = maxSizeOfXML * 2;
 
     const uint32_t numberOfInputData = 2;
-    const uint64_t xmlSize = static_cast<uint64_t>(getXmlSize());
-    const uint64_t weightsSize = static_cast<uint64_t>(getWeightsSize());
+    const uint64_t xmlSize = static_cast<uint64_t>(_xmlSize);
+    const uint64_t weightsSize = static_cast<uint64_t>(_weightsSize);
 
     OPENVINO_ASSERT(numberOfInputData < maxNumberOfElements);
     if (xmlSize >= maxSizeOfXML) {
@@ -250,7 +358,7 @@ SerializedIR IRSerializer::serializeIR(const std::shared_ptr<const ov::Model>& m
                        maxSizeOfWeights);
     }
 
-    const uint64_t sizeOfSerializedIR = sizeof(compilerVersion) + sizeof(numberOfInputData) + sizeof(xmlSize) +
+    const uint64_t sizeOfSerializedIR = sizeof(_compilerVersion) + sizeof(numberOfInputData) + sizeof(xmlSize) +
                                         xmlSize + sizeof(weightsSize) + weightsSize;
 
     // use array to avoid vector's memory zeroing overhead
@@ -258,8 +366,8 @@ SerializedIR IRSerializer::serializeIR(const std::shared_ptr<const ov::Model>& m
     uint8_t* serializedIR = buffer.get();
 
     uint64_t offset = 0;
-    checkedMemcpy(serializedIR + offset, sizeOfSerializedIR - offset, &compilerVersion, sizeof(compilerVersion));
-    offset += sizeof(compilerVersion);
+    checkedMemcpy(serializedIR + offset, sizeOfSerializedIR - offset, &_compilerVersion, sizeof(_compilerVersion));
+    offset += sizeof(_compilerVersion);
 
     checkedMemcpy(serializedIR + offset, sizeOfSerializedIR - offset, &numberOfInputData, sizeof(numberOfInputData));
     offset += sizeof(numberOfInputData);
@@ -274,14 +382,50 @@ SerializedIR IRSerializer::serializeIR(const std::shared_ptr<const ov::Model>& m
     uint64_t weightsOffset = offset;
     offset += weightsSize;
 
-    serializeModelToBuffer(serializedIR + xmlOffset, serializedIR + weightsOffset);
+    serialize_model_to_buffer(serializedIR + xmlOffset, serializedIR + weightsOffset);
 
     OPENVINO_ASSERT(offset == sizeOfSerializedIR);
 
     return std::make_pair(sizeOfSerializedIR, buffer);
 }
 
-std::string IRSerializer::serializeIOInfo(const std::shared_ptr<const ov::Model>& model, const bool useIndices) {
+SerializedIR VCLSerializerWithoutWeightsCopy::serialize() {
+    count_model_size();
+
+    if (_serializedModelSize >= std::numeric_limits<uint64_t>::max()) {
+        OPENVINO_THROW("The serialized model is too big to process. Size: ",
+                       _serializedModelSize,
+                       " >= ",
+                       std::numeric_limits<uint64_t>::max());
+    }
+
+    // use array to avoid vector's memory zero-ing overhead
+    std::shared_ptr<uint8_t> buffer(new uint8_t[_serializedModelSize], std::default_delete<uint8_t[]>());
+    serialize_model_to_buffer(buffer.get());
+
+    return SerializedIR(_serializedModelSize, buffer);
+}
+
+SerializedIR serializeIR(const std::shared_ptr<const ov::Model>& model,
+                         const ze_graph_compiler_version_info_t compilerVersion,
+                         const uint32_t supportedOpsetVersion,
+                         const bool useBaseModelSerializer,
+                         const size_t weightsSizeThreshold) {
+    if (!useBaseModelSerializer) {
+        // Non-constness required for adding & removing weights pointer attributes. The current instance is already a
+        // clone (or should be one), we are not modifying the original model.
+        const std::shared_ptr<ov::Model> nonConstantModel = std::const_pointer_cast<ov::Model>(model);
+        storeWeightsPointerAttribute(nonConstantModel, weightsSizeThreshold);
+
+        SerializedIR serializedIR =
+            VCLSerializerWithoutWeightsCopy(model, compilerVersion, supportedOpsetVersion).serialize();
+        removeWeightsPointerAttribute(nonConstantModel);
+        return serializedIR;
+    }
+    return VCLSerializerWithWeightsCopy(model, compilerVersion, supportedOpsetVersion).serialize();
+}
+
+std::string serializeIOInfo(const std::shared_ptr<const ov::Model>& model, const bool useIndices) {
     const ov::ParameterVector& parameters = model->get_parameters();
     const ov::ResultVector& results = model->get_results();
 
@@ -369,9 +513,9 @@ std::string IRSerializer::serializeIOInfo(const std::shared_ptr<const ov::Model>
            outputsPrecisionSS.str() + VALUES_SEPARATOR.data() + outputsLayoutSS.str();
 }
 
-std::string IRSerializer::serializeConfig(const Config& config,
-                                          ze_graph_compiler_version_info_t compilerVersion,
-                                          bool turboSupported) {
+std::string serializeConfig(const Config& config,
+                            ze_graph_compiler_version_info_t compilerVersion,
+                            bool turboSupported) {
     Logger logger("serializeConfig", Logger::global().level());
 
     std::string content = {};
diff --git a/src/plugins/intel_npu/src/compiler_adapter/src/xml_serializer.cpp b/src/plugins/intel_npu/src/compiler_adapter/src/xml_serializer.cpp
new file mode 100644
index 00000000000000..9a3abca741cbe0
--- /dev/null
+++ b/src/plugins/intel_npu/src/compiler_adapter/src/xml_serializer.cpp
@@ -0,0 +1,45 @@
+//
+// Copyright (C) 2025 Intel Corporation.
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#include "xml_serializer.hpp"
+
+#include "intel_npu/weights_pointer_attribute.hpp"
+#include "openvino/op/util/op_types.hpp"
+
+namespace intel_npu {
+
+ov::util::ConstantWriter& XmlSerializer::get_constant_write_handler() {
+    if (m_use_weightless_writer) {
+        return *m_weightless_constant_writer;
+    } else {
+        return m_base_constant_writer;
+    }
+}
+
+bool XmlSerializer::append_node_attributes(ov::Node& node) {
+    // If the "WeightsPointerAttribute" is found, then we have the metadata required to avoid copying the weights
+    // corresponding to this node.
+    m_use_weightless_writer = node.get_rt_info().count(WeightsPointerAttribute::get_type_info_static()) != 0;
+    auto result = ov::util::XmlSerializer::append_node_attributes(node);
+    m_use_weightless_writer = false;
+    return result;
+}
+
+std::unique_ptr<ov::util::XmlSerializer> XmlSerializer::make_visitor(pugi::xml_node& data,
+                                                                     const std::string& node_type_name,
+                                                                     ov::util::ConstantWriter& constant_write_handler,
+                                                                     int64_t version,
+                                                                     bool,
+                                                                     bool,
+                                                                     ov::element::Type,
+                                                                     bool) const {
+    return std::make_unique<XmlSerializer>(data,
+                                           node_type_name,
+                                           constant_write_handler,
+                                           version,
+                                           m_weightless_constant_writer);
+}
+
+}  // namespace intel_npu
diff --git a/src/plugins/intel_npu/src/plugin/src/plugin.cpp b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
index 610cf717be4393..3585a257f7ddc1 100644
--- a/src/plugins/intel_npu/src/plugin/src/plugin.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/plugin.cpp
@@ -322,6 +322,7 @@ void Plugin::init_options() {
     REGISTER_OPTION(SEPARATE_WEIGHTS_VERSION);
     REGISTER_OPTION(WS_COMPILE_CALL_NUMBER);
     REGISTER_OPTION(USE_BASE_MODEL_SERIALIZER);
+    REGISTER_OPTION(SERIALIZATION_WEIGHTS_SIZE_THRESHOLD);
 
     if (_backend) {
         if (_backend->isCommandQueueExtSupported()) {
diff --git a/src/plugins/intel_npu/src/plugin/src/properties.cpp b/src/plugins/intel_npu/src/plugin/src/properties.cpp
index 0432b69af2b363..fe0cbc43608fb5 100644
--- a/src/plugins/intel_npu/src/plugin/src/properties.cpp
+++ b/src/plugins/intel_npu/src/plugin/src/properties.cpp
@@ -384,6 +384,8 @@ void Properties::registerPluginProperties() {
     TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::weightless_blob, WEIGHTLESS_BLOB);
     TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::separate_weights_version, SEPARATE_WEIGHTS_VERSION);
     TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::use_base_model_serializer, USE_BASE_MODEL_SERIALIZER);
+    TRY_REGISTER_SIMPLE_PROPERTY(ov::intel_npu::serialization_weights_size_threshold,
+                                 SERIALIZATION_WEIGHTS_SIZE_THRESHOLD);
 
     TRY_REGISTER_CUSTOMFUNC_PROPERTY(ov::intel_npu::stepping, STEPPING, [&](const Config& config) {
         if (!config.has<STEPPING>()) {
diff --git a/src/plugins/intel_npu/tests/functional/behavior/npu_driver_compiler_adapter/custom_stream.cpp b/src/plugins/intel_npu/tests/functional/behavior/npu_driver_compiler_adapter/custom_stream.cpp
index ec12aff645f38e..f646a6455d6c1d 100644
--- a/src/plugins/intel_npu/tests/functional/behavior/npu_driver_compiler_adapter/custom_stream.cpp
+++ b/src/plugins/intel_npu/tests/functional/behavior/npu_driver_compiler_adapter/custom_stream.cpp
@@ -4,46 +4,30 @@
 #include <chrono>
 #include <random>
 
-#include "shared_test_classes/base/ov_behavior_test_utils.hpp"
 #include "common/functions.hpp"
 #include "common/npu_test_env_cfg.hpp"
 #include "common_test_utils/node_builders/constant.hpp"
 #include "intel_npu/config/options.hpp"
-#include "ir_serializer.hpp"
 #include "openvino/opsets/opset11.hpp"
+#include "shared_test_classes/base/ov_behavior_test_utils.hpp"
+#include "vcl_serializer.hpp"
 
 using CompilationParams = std::tuple<std::string,  // Device name
                                      ov::AnyMap    // Config
                                      >;
 
-using IRSerializer = intel_npu::driver_compiler_utils::IRSerializer;
+using VCLSerializerWithWeightsCopy = intel_npu::driver_compiler_utils::VCLSerializerWithWeightsCopy;
+using VCLSerializerWithoutWeightsCopy = intel_npu::driver_compiler_utils::VCLSerializerWithoutWeightsCopy;
 
 namespace ov::test::behavior {
 
 class DriverCompilerAdapterCustomStreamTestNPU : public ov::test::behavior::OVPluginTestBase,
                                                  public testing::WithParamInterface<CompilationParams> {
 public:
-    std::string generateRandomFileName() {
-        std::stringstream ss;
-        auto now = std::chrono::high_resolution_clock::now();
-        auto seed = now.time_since_epoch().count();
-        std::mt19937 mt_rand(static_cast<unsigned int>(seed));
-        std::uniform_int_distribution<int> dist(0, 15);
-
-        for (unsigned int i = 0; i < 16; ++i) {
-            int random_number = dist(mt_rand);
-            ss << std::hex << random_number;
-        }
-        return ss.str();
-    }
-
     void SetUp() override {
         std::tie(target_device, configuration) = this->GetParam();
         SKIP_IF_CURRENT_TEST_IS_DISABLED()
         OVPluginTestBase::SetUp();
-        std::string fileName = generateRandomFileName();
-        xmlFileName = fileName + ".xml";
-        binFileName = fileName + ".bin";
     }
 
     static std::string getTestCaseName(const testing::TestParamInfo<CompilationParams>& obj) {
@@ -68,43 +52,25 @@ class DriverCompilerAdapterCustomStreamTestNPU : public ov::test::behavior::OVPl
         if (!configuration.empty()) {
             utils::PluginCache::get().reset();
         }
-        if (std::remove(xmlFileName.c_str()) != 0 || std::remove(binFileName.c_str()) != 0) {
-            ADD_FAILURE() << "Failed to remove serialized files, xml: " << xmlFileName << " bin: " << binFileName;
-        }
         APIBaseTest::TearDown();
     }
 
 protected:
     ov::AnyMap configuration;
-    std::string xmlFileName;
-    std::string binFileName;
 };
 
-TEST_P(DriverCompilerAdapterCustomStreamTestNPU, TestLargeModel) {
+TEST_P(DriverCompilerAdapterCustomStreamTestNPU, TestLargeModelWeightsCopy) {
     auto model = createModelWithLargeSize();
-    IRSerializer irSerializer(model, 11);
-    size_t xmlSize = irSerializer.getXmlSize();
-    size_t weightsSize = irSerializer.getWeightsSize();
-
-    std::vector<uint8_t> xml(xmlSize);
-    std::vector<uint8_t> weights(weightsSize);
-    irSerializer.serializeModelToBuffer(xml.data(), weights.data());
-
-    {
-        std::ofstream xmlFile(xmlFileName, std::ios::binary);
-        if (xmlFile) {
-            xmlFile.write(reinterpret_cast<const char*>(xml.data()), xmlSize);
-            xmlFile.close();
-        }
+    const ze_graph_compiler_version_info_t dummyCompilerVersion{0, 0};
+    VCLSerializerWithWeightsCopy serializer(model, dummyCompilerVersion, 11);
+    EXPECT_NO_THROW(serializer.serialize());
+}
 
-        std::ofstream binFile(binFileName, std::ios::binary);
-        if (binFile) {
-            binFile.write(reinterpret_cast<const char*>(weights.data()), weightsSize);
-            binFile.close();
-        }
-    }
-    ov::Core core;
-    EXPECT_NO_THROW(model = core.read_model(xmlFileName));
+TEST_P(DriverCompilerAdapterCustomStreamTestNPU, TestLargeModelNoWeightsCopy) {
+    auto model = createModelWithLargeSize();
+    const ze_graph_compiler_version_info_t dummyCompilerVersion{0, 0};
+    VCLSerializerWithoutWeightsCopy serializer(model, dummyCompilerVersion, 11);
+    EXPECT_NO_THROW(serializer.serialize());
 }
 
 const std::vector<ov::AnyMap> configs = {
diff --git a/src/plugins/intel_npu/tests/functional/internal/compiler_adapter/zero_graph.hpp b/src/plugins/intel_npu/tests/functional/internal/compiler_adapter/zero_graph.hpp
index 0c133c32f41c15..9a24a2765dc4e7 100644
--- a/src/plugins/intel_npu/tests/functional/internal/compiler_adapter/zero_graph.hpp
+++ b/src/plugins/intel_npu/tests/functional/internal/compiler_adapter/zero_graph.hpp
@@ -16,8 +16,8 @@
 #include "intel_npu/utils/zero/zero_mem.hpp"
 #include "intel_npu/utils/zero/zero_mem_pool.hpp"
 #include "intel_npu/utils/zero/zero_utils.hpp"
-#include "ir_serializer.hpp"
 #include "openvino/runtime/intel_npu/properties.hpp"
+#include "vcl_serializer.hpp"
 #include "ze_graph_ext_wrappers.hpp"
 #include "zero_init_mock.hpp"
 
@@ -85,7 +85,8 @@ class ZeroGraphTest : public ::testing::TestWithParam<CompilationParamsAndExtens
 
         auto compilerProperties = zeroInitStruct->getCompilerProperties();
         const auto maxOpsetVersion = compilerProperties.maxOVOpsetVersionSupported;
-        irSerializer = std::make_shared<IRSerializer>(IRSerializer(model, maxOpsetVersion));
+        vclSerializer =
+            std::make_shared<VCLSerializerWithWeightsCopy>(model, compilerProperties.compilerVersion, maxOpsetVersion);
     }
 
     void TearDown() override {
@@ -93,16 +94,13 @@ class ZeroGraphTest : public ::testing::TestWithParam<CompilationParamsAndExtens
     }
 
     void serializeIR() {
-        auto compilerProperties = zeroInitStruct->getCompilerProperties();
-        const ze_graph_compiler_version_info_t& compilerVersion = compilerProperties.compilerVersion;
-        const auto maxOpsetVersion = compilerProperties.maxOVOpsetVersionSupported;
-        serializedIR = irSerializer->serializeIR(model, compilerVersion, maxOpsetVersion);
+        serializedIR = vclSerializer->serialize();
     }
 
     bool bypassUmdCache() {
         if (!configuration.empty()) {
             for (auto& configItem : configuration) {
-                if (configItem.first ==  ov::cache_dir.name()) {
+                if (configItem.first == ov::cache_dir.name()) {
                     const auto set_cache_dir = configItem.second;
                     if (!set_cache_dir.empty()) {
                         return true;
@@ -127,7 +125,7 @@ class ZeroGraphTest : public ::testing::TestWithParam<CompilationParamsAndExtens
     GraphDescriptor graphDescriptor;
 
     std::shared_ptr<ov::Model> model;
-    std::shared_ptr<driver_compiler_utils::IRSerializer> irSerializer;
+    std::shared_ptr<driver_compiler_utils::VCLSerializerWithWeightsCopy> vclSerializer;
 
     std::string targetDevice;
     std::string blobPath;