|
| 1 | + |
| 2 | +// Copyright (C) 2025 Intel Corporation |
| 3 | +// SPDX-License-Identifier: Apache-2.0 |
| 4 | + |
| 5 | +#include "pipeline_stateful_npu.hpp" |
| 6 | +#include "speculative_decoding/speculative_decoding_npu.hpp" |
| 7 | +#include "llm/pipeline_stateful.hpp" |
| 8 | +#include "llm/pipeline_static.hpp" |
| 9 | +#include "utils.hpp" |
| 10 | + |
| 11 | +#include <fstream> |
| 12 | + |
| 13 | +#include "openvino/runtime/core.hpp" |
| 14 | +#include "openvino/core/parallel.hpp" |
| 15 | +#include "openvino/genai/text_streamer.hpp" |
| 16 | + |
| 17 | +namespace { |
| 18 | + ov::genai::ModelDesc |
| 19 | + extract_draft_model_from_config(ov::AnyMap& config) { |
| 20 | + ov::genai::ModelDesc draft_model; |
| 21 | + if (config.find(ov::genai::utils::DRAFT_MODEL_ARG_NAME) != config.end()) { |
| 22 | + draft_model = config.at(ov::genai::utils::DRAFT_MODEL_ARG_NAME).as<ov::genai::ModelDesc>(); |
| 23 | + config.erase(ov::genai::utils::DRAFT_MODEL_ARG_NAME); |
| 24 | + } |
| 25 | + return draft_model; |
| 26 | +} |
| 27 | +} // anonymous namespace |
| 28 | + |
| 29 | +namespace ov::genai { |
| 30 | + |
| 31 | +// NB: No constructor for creation of pipeline from infer request, as pipeline from infer request |
| 32 | +// for NPU is handled inside of ov::genai::StatefulLLMPipeline class iself. |
| 33 | +StatefulLLMPipelineNPU::StatefulLLMPipelineNPU( |
| 34 | + const std::filesystem::path& models_path, |
| 35 | + const ov::genai::Tokenizer& tokenizer, |
| 36 | + const ov::AnyMap& properties) |
| 37 | + : StatefulLLMPipelineNPU( |
| 38 | + utils::read_model(models_path, properties), |
| 39 | + tokenizer, |
| 40 | + properties, |
| 41 | + utils::from_config_json_if_exists(models_path) |
| 42 | + ) {} |
| 43 | + |
| 44 | +StatefulLLMPipelineNPU::StatefulLLMPipelineNPU( |
| 45 | + const std::filesystem::path& models_path, |
| 46 | + const ov::AnyMap& plugin_config) |
| 47 | + : StatefulLLMPipelineNPU{models_path, Tokenizer(models_path, plugin_config), plugin_config} {} |
| 48 | + |
| 49 | +StatefulLLMPipelineNPU::StatefulLLMPipelineNPU( |
| 50 | + const std::shared_ptr<ov::Model>& model, |
| 51 | + const ov::genai::Tokenizer& tokenizer, |
| 52 | + const ov::AnyMap& properties, |
| 53 | + const ov::genai::GenerationConfig& generation_config) |
| 54 | + : LLMPipelineImplBase(tokenizer, generation_config) { |
| 55 | + auto properties_without_draft_model = properties; |
| 56 | + auto draft_model_descr = extract_draft_model_from_config(properties_without_draft_model); |
| 57 | + if (draft_model_descr.model != nullptr) { |
| 58 | + auto main_model_descr = ov::genai::ModelDesc(model, tokenizer, "NPU", properties_without_draft_model, {}, generation_config); |
| 59 | + m_pimpl = std::make_unique<SpeculativeLLMPipelineNPU>(main_model_descr, draft_model_descr); |
| 60 | + } else if (properties_without_draft_model.count("STATIC_PIPELINE")) { |
| 61 | + m_pimpl = static_llm::LLMPipelineFactory::create(model, tokenizer, |
| 62 | + properties_without_draft_model, generation_config); |
| 63 | + } else { |
| 64 | + m_pimpl = std::make_unique<StatefulLLMPipeline>(model, tokenizer, "NPU", |
| 65 | + properties_without_draft_model, generation_config); |
| 66 | + } |
| 67 | +} |
| 68 | + |
| 69 | +DecodedResults StatefulLLMPipelineNPU::generate( |
| 70 | + StringInputs inputs, |
| 71 | + OptionalGenerationConfig generation_config, |
| 72 | + StreamerVariant streamer) { |
| 73 | + return m_pimpl->generate(inputs, generation_config, streamer); |
| 74 | +} |
| 75 | + |
| 76 | +EncodedResults StatefulLLMPipelineNPU::generate( |
| 77 | + const EncodedInputs& inputs, |
| 78 | + OptionalGenerationConfig generation_config, |
| 79 | + StreamerVariant streamer) { |
| 80 | + return m_pimpl->generate(inputs, generation_config, streamer); |
| 81 | +} |
| 82 | + |
| 83 | +void StatefulLLMPipelineNPU::start_chat(const std::string& system_message) { |
| 84 | + m_pimpl->start_chat(system_message); |
| 85 | +} |
| 86 | + |
| 87 | +// FIXME: Do we need it? |
| 88 | +// void StatefulLLMPipelineNPU::reset_kv_state() { |
| 89 | +// m_pimpl->reset_kv_state(); |
| 90 | +// } |
| 91 | + |
| 92 | +void StatefulLLMPipelineNPU::finish_chat() { |
| 93 | + m_pimpl->finish_chat(); |
| 94 | +} |
| 95 | + |
| 96 | +} // namespace ov::genai |
0 commit comments