Skip to content
Merged
2 changes: 1 addition & 1 deletion .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ jobs:
- name: 'GGUF Reader tests'
cmd: 'tests/python_tests/test_gguf_reader.py'
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).GGUF.test || fromJSON(needs.smart_ci.outputs.affected_components).LLM.test }}
timeout: 60
timeout: 100
- name: 'Tokenizer tests'
cmd: 'tests/python_tests/test_tokenizer.py'
run_condition: ${{ fromJSON(needs.smart_ci.outputs.affected_components).tokenizers.test }}
Expand Down
16 changes: 15 additions & 1 deletion src/cpp/src/gguf_utils/gguf_tokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -122,12 +122,26 @@ bool is_special_token(int32_t token_type) {
return token_type == 3 || token_type == 4;
}

std::string quote_meta(const std::string& str) {
std::string result = "(";

// todo: add also utf validate
for (char c : str) {
if (!std::isalnum(c) && c != '_') {
result += '\\';
}
result += c;
}
result += ")";
return result;
}
Comment on lines +125 to +137
Copy link

Copilot AI Sep 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spelling error in comment on line 128: 'utf validate' should be 'UTF validation' or 'UTF-8 validation'.

Copilot uses AI. Check for mistakes.


std::string join_special_tokens(const std::vector<std::string>& special_tokens) {
std::ostringstream oss;
for (size_t i = 0; i < special_tokens.size(); ++i) {
if (i > 0)
oss << "|";
oss << special_tokens[i];
oss << quote_meta(special_tokens[i]);
}
return oss.str();
}
Expand Down
16 changes: 13 additions & 3 deletions src/cpp/src/tokenizer/tokenizer_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "tokenizer/tokenizer_impl.hpp"
#include "add_second_input_pass.hpp"
#include "sampling/structured_output/structured_output_controller.hpp"
#include "openvino/genai/version.hpp"

namespace ov {
namespace genai {
Expand Down Expand Up @@ -273,7 +274,8 @@ void Tokenizer::TokenizerImpl::setup_tokenizer(const std::filesystem::path& mode
std::shared_ptr<ov::Model> ov_tokenizer = nullptr;
std::shared_ptr<ov::Model> ov_detokenizer = nullptr;
auto [filtered_properties, enable_save_ov_model] = utils::extract_gguf_properties(properties);
if (is_gguf_model(models_path)) {

if (ov::genai::is_gguf_model(models_path)) {
std::map<std::string, GGUFMetaData> tokenizer_config{};
std::tie(ov_tokenizer, ov_detokenizer, tokenizer_config) =
create_tokenizer_from_config(m_shared_object_ov_tokenizers, models_path);
Expand All @@ -293,6 +295,8 @@ void Tokenizer::TokenizerImpl::setup_tokenizer(const std::filesystem::path& mode
if (!m_chat_template.empty()) {
m_chat_template = patch_gguf_chat_template(m_chat_template);
}
ov_tokenizer->set_rt_info(ov::genai::get_version().buildNumber, "openvino_genai_version");
ov_detokenizer->set_rt_info(ov::genai::get_version().buildNumber, "openvino_genai_version");

if (enable_save_ov_model){
std::filesystem::path gguf_model_path(models_path);
Expand Down Expand Up @@ -372,8 +376,14 @@ void Tokenizer::TokenizerImpl::setup_tokenizer(const std::pair<std::shared_ptr<o
auto core = get_core_singleton();
std::string device = "CPU"; // only CPU is supported for now

// Saving IR version was added only in 24.5, so if it's missing, then it's older than 24.5
m_older_than_24_5 = !(ov_tokenizer ? ov_tokenizer : ov_detokenizer)->has_rt_info("openvino_tokenizers_version");
// Save openvino GenAI runtime version was added in 25.4 for GGUF models,
// if we have it in ov::Model, then it's newer than 24.5 we don't need to check 'openvino_tokenizers' version.
if ((ov_tokenizer ? ov_tokenizer : ov_detokenizer)->has_rt_info("openvino_genai_version")) {
m_older_than_24_5 = false;
} else {
// Saving IR version was added only in 24.5, so if it's missing, then it's older than 24.5
m_older_than_24_5 = !(ov_tokenizer ? ov_tokenizer : ov_detokenizer)->has_rt_info("openvino_tokenizers_version");
}

if (ov_tokenizer) {
ov::pass::Manager manager;
Expand Down
20 changes: 17 additions & 3 deletions tests/python_tests/test_gguf_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,24 @@ def test_pipelines_with_gguf_generate(pipeline_type, model_ids):
@pytest.mark.parametrize("pipeline_type", get_gguf_pipeline_types())
@pytest.mark.parametrize("model_ids", get_gguf_model_list())
@pytest.mark.parametrize("enable_save_ov_model", [False, True])
@pytest.mark.parametrize("prompt", [
'Why is the Sun yellow?',
# To check that special tokens are handled correctly.
'<|endoftext|> <|im_end|>',
'<|endoftext|><|endoftext|><|im_end|>',
'<|endoftext|> Why the Sky is Blue? <|im_end|>',
])
@pytest.mark.precommit
def test_full_gguf_pipeline(pipeline_type, model_ids, enable_save_ov_model):
def test_full_gguf_pipeline(pipeline_type, model_ids, enable_save_ov_model, prompt):
if sys.platform == 'darwin':
pytest.skip(reason="168882: Sporadic segmentation fault failure on MacOS.")
gguf_model_id = model_ids["gguf_model_id"]
gguf_filename = model_ids["gguf_filename"]
dynamic_quantization_group_size = model_ids["dynamic_quantization_group_size"]
prompt = 'Why is the Sun yellow?'

if gguf_model_id == "sammysun0711/tiny-random-deepseek-distill-qwen-gguf" and "<|endoftext|>" in prompt:
pytest.skip(reason="Prompts to test special tokens for this model fail on HF side")

opt_model = load_hf_model_from_gguf(gguf_model_id, gguf_filename)
hf_tokenizer = load_hf_tokenizer_from_gguf(gguf_model_id, gguf_filename)
gc.collect()
Expand Down Expand Up @@ -97,6 +106,11 @@ def test_full_gguf_pipeline(pipeline_type, model_ids, enable_save_ov_model):
gguf_full_path = download_gguf_model(gguf_model_id, gguf_filename)
ov_pipe_gguf = create_ov_pipeline(gguf_full_path, pipeline_type=pipeline_type, enable_save_ov_model=enable_save_ov_model, dynamic_quantization_group_size=dynamic_quantization_group_size)
res_string_input_2 = ov_pipe_gguf.generate(prompt, generation_config=ov_generation_config)

# Check that eos_token, bos_token string representations are loaded correctly from gguf file
assert ov_pipe_gguf.get_tokenizer().get_eos_token() == hf_tokenizer.decode([ov_pipe_gguf.get_tokenizer().get_eos_token_id()])
assert ov_pipe_gguf.get_tokenizer().get_bos_token() == hf_tokenizer.decode([ov_pipe_gguf.get_tokenizer().get_bos_token_id()])

del ov_pipe_gguf
gc.collect()

Expand Down Expand Up @@ -129,7 +143,7 @@ def test_full_gguf_qwen3_pipeline(pipeline_type, model_ids):
# <think>\nOkay, the user is asking why the Sun is yellow. Let me start by recalling what I know about the Sun's color.
# Prompt after applying chat template is identical between HF and GenAI, so the issue is not in chat template.
# TODO: Investigate output difference for GGUF models. Ticket: TBD
res_string_input_1 = "</im_start>\nOkay, the user is asking why the Sun is yellow. Let me start by recalling what I know about the Sun's color."
res_string_input_1 = "\nOkay, the user is asking why the Sun is yellow. Let me start by recalling what I know about the Sun's color. I remember"

gguf_full_path = download_gguf_model(gguf_model_id, gguf_filename)
ov_pipe_gguf = create_ov_pipeline(gguf_full_path, pipeline_type=pipeline_type)
Expand Down
Loading