diff --git a/samples/cpp/text_generation/benchmark_genai.cpp b/samples/cpp/text_generation/benchmark_genai.cpp index 2d42eb4c54..f3acab1123 100644 --- a/samples/cpp/text_generation/benchmark_genai.cpp +++ b/samples/cpp/text_generation/benchmark_genai.cpp @@ -55,6 +55,7 @@ int main(int argc, char* argv[]) try { ov::genai::GenerationConfig config; config.max_new_tokens = result["max_new_tokens"].as(); + config.apply_chat_template = false; ov::genai::SchedulerConfig scheduler_config; scheduler_config.enable_prefix_caching = false; diff --git a/samples/python/text_generation/benchmark_genai.py b/samples/python/text_generation/benchmark_genai.py index 4db541fec4..96548bc31a 100755 --- a/samples/python/text_generation/benchmark_genai.py +++ b/samples/python/text_generation/benchmark_genai.py @@ -1,11 +1,13 @@ # Copyright (C) 2023-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -import sys import argparse +import sys + import openvino_genai as ov_genai from openvino import get_version + def main(): parser = argparse.ArgumentParser(description="Help command") parser.add_argument("-m", "--model", type=str, required=True, help="Path to model and tokenizers base directory") @@ -15,31 +17,32 @@ def main(): parser.add_argument("-n", "--num_iter", type=int, default=2, help="Number of iterations") parser.add_argument("-mt", "--max_new_tokens", type=int, default=20, help="Maximal number of new tokens") parser.add_argument("-d", "--device", type=str, default="CPU", help="Device") - + args = parser.parse_args() if args.prompt is not None and args.prompt_file is not None: - raise RuntimeError(f'Prompt and prompt file should not exist together!') + raise RuntimeError("Prompt and prompt file should not exist together!") else: if args.prompt_file is not None: - with open(args.prompt_file, 'r', encoding='utf-8') as f: + with open(args.prompt_file, "r", encoding="utf-8") as f: prompt = [f.read()] else: - prompt = ['The Sky is blue because'] if args.prompt is None else [args.prompt] + prompt = ["The Sky is blue because"] if args.prompt is None else [args.prompt] if len(prompt) == 0: - raise RuntimeError(f'Prompt is empty!') + raise RuntimeError("Prompt is empty!") - print(f'openvino runtime version: {get_version()}, genai version: {ov_genai.__version__}') + print(f"openvino runtime version: {get_version()}, genai version: {ov_genai.__version__}") - # Perf metrics is stored in DecodedResults. + # Perf metrics is stored in DecodedResults. # In order to get DecodedResults instead of a string input should be a list. models_path = args.model device = args.device num_warmup = args.num_warmup num_iter = args.num_iter - + config = ov_genai.GenerationConfig() config.max_new_tokens = args.max_new_tokens + config.apply_chat_template = False if device == "NPU": pipe = ov_genai.LLMPipeline(models_path, device) @@ -55,21 +58,28 @@ def main(): for _ in range(num_warmup): pipe.generate(prompt, config) - + res = pipe.generate(prompt, config) perf_metrics = res.perf_metrics for _ in range(num_iter - 1): res = pipe.generate(prompt, config) perf_metrics += res.perf_metrics - + print(f"Output token size: {res.perf_metrics.get_num_generated_tokens()}") print(f"Load time: {perf_metrics.get_load_time():.2f} ms") - print(f"Generate time: {perf_metrics.get_generate_duration().mean:.2f} ± {perf_metrics.get_generate_duration().std:.2f} ms") - print(f"Tokenization time: {perf_metrics.get_tokenization_duration().mean:.2f} ± {perf_metrics.get_tokenization_duration().std:.2f} ms") - print(f"Detokenization time: {perf_metrics.get_detokenization_duration().mean:.2f} ± {perf_metrics.get_detokenization_duration().std:.2f} ms") + print( + f"Generate time: {perf_metrics.get_generate_duration().mean:.2f} ± {perf_metrics.get_generate_duration().std:.2f} ms" + ) + print( + f"Tokenization time: {perf_metrics.get_tokenization_duration().mean:.2f} ± {perf_metrics.get_tokenization_duration().std:.2f} ms" + ) + print( + f"Detokenization time: {perf_metrics.get_detokenization_duration().mean:.2f} ± {perf_metrics.get_detokenization_duration().std:.2f} ms" + ) print(f"TTFT: {perf_metrics.get_ttft().mean:.2f} ± {perf_metrics.get_ttft().std:.2f} ms") print(f"TPOT: {perf_metrics.get_tpot().mean:.2f} ± {perf_metrics.get_tpot().std:.2f} ms") print(f"Throughput : {perf_metrics.get_throughput().mean:.2f} ± {perf_metrics.get_throughput().std:.2f} tokens/s") + if __name__ == "__main__": main() diff --git a/samples/python/text_generation/compound_grammar_generation.py b/samples/python/text_generation/compound_grammar_generation.py index 4309d67811..05e547be38 100644 --- a/samples/python/text_generation/compound_grammar_generation.py +++ b/samples/python/text_generation/compound_grammar_generation.py @@ -7,12 +7,13 @@ from typing import Any from openvino_genai import ( - LLMPipeline, GenerationConfig, - StructuredOutputConfig as SOC, + LLMPipeline, StreamingStatus, ) - +from openvino_genai import ( + StructuredOutputConfig as SOC, +) from pydantic import BaseModel, Field @@ -25,9 +26,7 @@ class booking_flight_tickets(BaseModel): """booking flights""" origin_airport_code: str = Field(description="The name of Departure airport code") - destination_airport_code: str = Field( - description="The name of Destination airport code" - ) + destination_airport_code: str = Field(description="The name of Destination airport code") departure_date: str = Field(description="The date of outbound flight") return_date: str = Field(description="The date of return flight") @@ -74,12 +73,11 @@ def tools_to_array_schema(*tools: BaseModel) -> str: return json.dumps( { "type": "array", - "items": { - "anyOf": [tool_to_dict(tool, with_description=False) for tool in tools] - }, + "items": {"anyOf": [tool_to_dict(tool, with_description=False) for tool in tools]}, } ) + # modified system message from: # https://github.com/vllm-project/vllm/blob/main/examples/tool_chat_template_phi4_mini.jinja sys_message = """You are a helpful AI assistant. @@ -88,7 +86,7 @@ def tools_to_array_schema(*tools: BaseModel) -> str: Use the following rule to decide when to call a function: * if the response can be generated from your internal knowledge, do so, but use only yes or no as the response * if you need external information that can be obtained by calling one or more of the provided functions, generate function calls - + If you decide to call functions: * prefix function calls with functools marker (no closing marker required) * all function calls should be generated in a single JSON list formatted as functools[{"name": [function name], "arguments": [function arguments as JSON]}, ...] @@ -118,16 +116,10 @@ def main(): user_text_1 = "Do dolphins have fingers?" print("User: ", user_text_1) chat_history.append({"role": "user", "content": user_text_1}) - model_input = tokenizer.apply_chat_template( - chat_history, add_generation_prompt=True - ) - - # the example grammar works the same as SOC.Regex("yes|no") - # but the Union grammar is more flexible and can be extended with more options - yes_or_no = SOC.Regex("yes") | SOC.Regex( - "no" - ) # SOC.Union(SOC.Regex("yes"), SOC.Regex("no")) - generation_config.structured_output_config = SOC(compound_grammar=yes_or_no) + model_input = tokenizer.apply_chat_template(chat_history, add_generation_prompt=True) + # same as SOC.Union(SOC.ConstString("yes"), SOC.ConstString("no")) + yes_or_no_grammar = SOC.ConstString("yes") | SOC.ConstString("no") + generation_config.structured_output_config = SOC(structural_tags_config=yes_or_no_grammar) print("Assistant: ", end="") answer = pipe.generate(model_input, generation_config, streamer=streamer) chat_history.append({"role": "assistant", "content": answer}) @@ -139,21 +131,16 @@ def main(): ) print("User: ", user_text_2) chat_history.append({"role": "user", "content": user_text_2}) - model_input = tokenizer.apply_chat_template( - chat_history, add_generation_prompt=True - ) + model_input = tokenizer.apply_chat_template(chat_history, add_generation_prompt=True) - start_tool_call_tag = SOC.Regex(r"functools") - tools_json = SOC.JSONSchema( - tools_to_array_schema(booking_flight_tickets, booking_hotels) - ) - tool_call = ( - start_tool_call_tag + tools_json - ) # SOC.Concat(start_tool_call_tag, tools_json) - generation_config.structured_output_config.compound_grammar = tool_call + start_tool_call_tag = SOC.ConstString(r"functools") + tools_json = SOC.JSONSchema(tools_to_array_schema(booking_flight_tickets, booking_hotels)) + tool_call_grammar = start_tool_call_tag + tools_json # SOC.Concat(start_tool_call_tag, tools_json) + generation_config.structured_output_config.structural_tags_config = tool_call_grammar print("Assistant: ", end="") pipe.generate(model_input, generation_config, streamer=streamer) + print() if __name__ == "__main__": diff --git a/samples/python/text_generation/structural_tags_generation.py b/samples/python/text_generation/structural_tags_generation.py index 45caca3436..0df862199d 100644 --- a/samples/python/text_generation/structural_tags_generation.py +++ b/samples/python/text_generation/structural_tags_generation.py @@ -3,20 +3,20 @@ # SPDX-License-Identifier: Apache-2.0 import argparse -import re import json +import re from datetime import datetime from pprint import pprint +from typing import ClassVar from openvino_genai import ( - LLMPipeline, GenerationConfig, - StructuredOutputConfig, - StructuralTagsConfig, - StructuralTagItem, + LLMPipeline, StreamingStatus, ) -from typing import ClassVar +from openvino_genai import ( + StructuredOutputConfig as SOC, +) from pydantic import BaseModel, Field @@ -35,9 +35,7 @@ class WeatherRequest(ToolRequest): city: str = Field(description="City name") country: str = Field(description="Country name") - date: str = Field( - pattern=r"2\d\d\d-[0-1]\d-[0-3]\d", description="Date in YYYY-MM-DD format" - ) + date: str = Field(pattern=r"2\d\d\d-[0-1]\d-[0-3]\d", description="Date in YYYY-MM-DD format") class CurrencyExchangeRequest(ToolRequest): @@ -59,8 +57,8 @@ class CurrencyExchangeRequest(ToolRequest): "You can use the following tools:\n" f"{new_line.join([tool.string_representation() for tool in tools.values()])}\n" "Please, only use the following format for tool calling in your responses:\n" - "" - "{\"argument1\": \"value1\", ...}" + '' + '{"argument1": "value1", ...}' "\n" "Use the tool name and arguments as defined in the tool schema.\n" "If you don't know the answer, just say that you don't know, but try to call the tool if it helps to answer the question.\n" @@ -77,10 +75,7 @@ def parse_tools_from_response(response: str) -> list[ToolRequest]: {"argument1": "value1", ...} """ matches = re.finditer(function_pattern, response) - return [ - tools.get(match.group(1)).model_validate_json(match.group(2)) - for match in matches - ] + return [tools.get(match.group(1)).model_validate_json(match.group(2)) for match in matches] def streamer(subword): @@ -89,7 +84,9 @@ def streamer(subword): def main(): - default_prompt = "What is the weather in London today and in Paris yesterday, and how many pounds can I get for 100 euros?" + default_prompt = ( + "What is the weather in London today and in Paris yesterday, and how many pounds can I get for 100 euros?" + ) description = ( "This script demonstrates how to use OpenVINO GenAI with structured tags to generate responses " @@ -116,26 +113,24 @@ def main(): for use_structural_tags in [False, True]: print("=" * 80) - print( - f"{'Using structural tags' if use_structural_tags else 'Using no structural tags':^80}" - ) + print(f"{'Using structural tags' if use_structural_tags else 'Using no structural tags':^80}") print("=" * 80) config = GenerationConfig() config.max_new_tokens = 300 pipe.start_chat(sys_message) if use_structural_tags: - config.structured_output_config = StructuredOutputConfig( - structural_tags_config=StructuralTagsConfig( - structural_tags=[ - StructuralTagItem( + config.structured_output_config = SOC( + structural_tags_config=SOC.TriggeredTags( + triggers=["', - schema=json.dumps(tool.model_json_schema()), + content=SOC.JSONSchema(json.dumps(tool.model_json_schema())), end="", ) for name, tool in tools.items() ], - triggers=[" ') + prompt = input("> ") except EOFError: break pipe.start_chat(sys_message) - config.structured_output_config = StructuredOutputConfig(json_schema = json.dumps(ItemQuantities.model_json_schema())) + config.structured_output_config = StructuredOutputConfig( + json_schema=json.dumps(ItemQuantities.model_json_schema()) + ) config.do_sample = False res = json.loads(pipe.generate(prompt, config)) pipe.finish_chat() @@ -82,7 +88,9 @@ def main(): pipe.start_chat(sys_message_for_items) generate_has_run = False for item, quantity in res.items(): - config.structured_output_config = StructuredOutputConfig(json_schema = json.dumps(items_map[item].model_json_schema())) + config.structured_output_config = StructuredOutputConfig( + json_schema=json.dumps(items_map[item].model_json_schema()) + ) for _ in range(quantity): generate_has_run = True json_strs = pipe.generate(prompt, config) @@ -92,5 +100,5 @@ def main(): print("No items generated. Please try again with a different request.") -if '__main__' == __name__: +if "__main__" == __name__: main() diff --git a/src/cpp/CMakeLists.txt b/src/cpp/CMakeLists.txt index 03dc2365d6..28d05b19c6 100644 --- a/src/cpp/CMakeLists.txt +++ b/src/cpp/CMakeLists.txt @@ -59,7 +59,7 @@ if(ANDROID) endif() if(ENABLE_XGRAMMAR) - set(XGRAMMAR_VERSION v0.1.18) + set(XGRAMMAR_VERSION v0.1.25) set(XGRAMMAR_DIR ${CMAKE_BINARY_DIR}/xgrammar) FetchContent_Declare( diff --git a/src/cpp/include/openvino/genai/generation_config.hpp b/src/cpp/include/openvino/genai/generation_config.hpp index 3020be34bc..143d42f4e2 100644 --- a/src/cpp/include/openvino/genai/generation_config.hpp +++ b/src/cpp/include/openvino/genai/generation_config.hpp @@ -7,6 +7,7 @@ #include #include #include +#include #include "openvino/runtime/compiled_model.hpp" #include "openvino/runtime/infer_request.hpp" @@ -38,6 +39,10 @@ struct OPENVINO_GENAI_EXPORTS StructuralTagItem { void update_config(const ov::AnyMap& properties); std::string to_string() const; + bool operator==(const StructuralTagItem& other) const { + return begin == other.begin && schema == other.schema && end == other.end; + } + std::string begin; std::string schema; std::string end; @@ -65,6 +70,11 @@ struct OPENVINO_GENAI_EXPORTS StructuralTagsConfig { StructuralTagsConfig(const ov::AnyMap& properties); void update_config(const ov::AnyMap& properties); std::string to_string() const; + std::string to_json() const; + + bool operator==(const StructuralTagsConfig& other) const { + return structural_tags == other.structural_tags && triggers == other.triggers; + } std::vector structural_tags; std::vector triggers; @@ -91,7 +101,39 @@ class OPENVINO_GENAI_EXPORTS StructuredOutputConfig { StructuredOutputConfig(const ov::AnyMap& properties); StructuredOutputConfig() = default; - // base grammar types for compound grammar construction + static std::string format_for_json(const std::string& input) { + std::ostringstream stream; + stream << '"'; + for (char character : input) { + switch (character) { + case '"': stream << "\\\""; break; + case '\\': stream << "\\\\"; break; + case '\b': stream << "\\b"; break; + case '\f': stream << "\\f"; break; + case '\n': stream << "\\n"; break; + case '\r': stream << "\\r"; break; + case '\t': stream << "\\t"; break; + default: { + // Interpret `character` as a raw byte to avoid sign-extension on platforms where `char` is signed. + unsigned char uc = static_cast(character); + if (uc < 0x20) { + // control characters < 0x20 must be escaped as \uXXXX in JSON + stream << "\\u" << std::hex << std::uppercase << std::setfill('0') << std::setw(4) + << static_cast(uc) << std::dec << std::nouppercase; + } else { + stream << character; + } + } + } + } + stream << '"'; + return stream.str(); + } + + // base grammar types for structural tags construction + /** + * @brief Regex structural tag constrains output using a regular expression. + */ struct Regex { std::string value; @@ -100,11 +142,18 @@ class OPENVINO_GENAI_EXPORTS StructuredOutputConfig { std::string to_string() const { return "Regex(\"" + value + "\")"; } + std::string to_json() const { + return std::string("{\"type\": \"regex\", \"pattern\": ") + format_for_json(value) + "}"; + } bool operator==(const Regex& other) const { return value == other.value; } }; + /** + * @brief JSONSchema structural tag constrains output to a JSON document that + * must conform to the provided JSON Schema string. + */ struct JSONSchema { std::string value; @@ -113,11 +162,17 @@ class OPENVINO_GENAI_EXPORTS StructuredOutputConfig { std::string to_string() const { return "JSONSchema(\"" + value + "\")"; } + std::string to_json() const { + return std::string("{\"type\": \"json_schema\", \"json_schema\": ") + value + "}"; + } bool operator==(const JSONSchema& other) const { return value == other.value; } }; + /** + * @brief EBNF structural tag constrains output using an EBNF grammar. + */ struct EBNF { std::string value; @@ -126,88 +181,370 @@ class OPENVINO_GENAI_EXPORTS StructuredOutputConfig { std::string to_string() const { return "EBNF(\"" + value + "\")"; } + std::string to_json() const { + return std::string("{\"type\": \"grammar\", \"grammar\": ") + format_for_json(value) + "}"; + } bool operator==(const EBNF& other) const { return value == other.value; } }; - // compound grammar types + /** + * @brief ConstString structural tag forces the generator to produce exactly + * the provided constant string value. + */ + struct ConstString { + std::string value; + + ConstString() = default; + ConstString(const std::string& str) : value(str) {} + std::string to_string() const { + return "ConstString(\"" + value + "\")"; + } + std::string to_json() const { + return std::string("{\"type\": \"const_string\", \"value\": ") + format_for_json(value) + "}"; + } + bool operator==(const ConstString& other) const { + return value == other.value; + } + }; + + /** + * @brief AnyText structural tag allows any text for the portion + * of output covered by this tag. + */ + struct AnyText { + AnyText() = default; + std::string to_string() const { + return "AnyText()"; + } + std::string to_json() const { + return "{\"type\": \"any_text\"}"; + } + bool operator==(const AnyText& other) const { + return true; + } + }; + + /** + * @brief QwenXMLParametersFormat instructs the generator to output an XML + * parameters block derived from the provided JSON schema. This is a + * specialized helper for Qwen-style XML parameter formatting. + */ + struct QwenXMLParametersFormat { + std::string json_schema; + + QwenXMLParametersFormat() = default; + QwenXMLParametersFormat(const std::string& schema) : json_schema(schema) {}; + std::string to_json() const { + return std::string("{\"type\": \"qwen_xml_parameter\", \"json_schema\": ") + json_schema + "}"; + }; + std::string to_string() const { + return "QwenXMLParametersFormat(json_schema=" + json_schema + ")"; + }; + bool operator==(const QwenXMLParametersFormat& other) const { + return json_schema == other.json_schema; + } + }; + + // nested grammar types struct Concat; struct Union; + struct Tag; + struct TriggeredTags; + struct TagsWithSeparator; - using CompoundGrammar = std::variant< + using StructuralTag = std::variant< + std::string, Regex, JSONSchema, EBNF, + ConstString, + AnyText, + QwenXMLParametersFormat, std::shared_ptr, - std::shared_ptr + std::shared_ptr, + std::shared_ptr, + std::shared_ptr, + std::shared_ptr >; + using CompoundGrammar = StructuralTag; + + template + static std::string structural_tag_to_string(const T& g) { + if constexpr (std::is_same_v) { + return g; + } else if constexpr (std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) { + return g.to_string(); + } else if constexpr (std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v>) { + return g ? g->to_string() : std::string("null"); + } else { + OPENVINO_THROW("Unsupported structural tag, cannot convert to string:" + std::string(typeid(g).name())); + } + } - // compound grammar types - Concat and Union are used to combine multiple grammars into one - // Concat combines two grammars in sequence, e.g. "A B" means A followed by B + template + static std::string structural_tag_to_json(const T& g) { + if constexpr (std::is_same_v) { + return g; + } else if constexpr (std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) { + return g.to_json(); + } else if constexpr (std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v> || + std::is_same_v>) { + return g ? g->to_json() : std::string("null"); + } else { + OPENVINO_THROW("Unsupported structural tag, cannot convert to json:" + std::string(typeid(g).name())); + } + } + + // nested grammar types + /** + * @brief Concat composes multiple structural tags in sequence. Each element + * must be produced in the given order. + * Can be used indirectly with + operator. + * + * Example: Concat(ConstString("a"), ConstString("b")) produces "ab". + * ConstString("a") + ConstString("b") is equivalent. + */ struct Concat { - CompoundGrammar left; - CompoundGrammar right; + std::vector elements; Concat() = default; - Concat(CompoundGrammar left, CompoundGrammar right) : left(std::move(left)), right(std::move(right)) {}; - std::string to_string() const { - return "Concat(" + std::visit([](const auto& g) -> std::string { - if constexpr (std::is_same_v, std::shared_ptr> || - std::is_same_v, std::shared_ptr>) { - return g ? g->to_string() : "null"; - } else { - return g.to_string(); + Concat(StructuralTag left, StructuralTag right) : elements{std::move(left), std::move(right)} {}; + Concat(const std::vector& elems) : elements(elems) {}; + std::string to_json() const { + std::ostringstream oss; + oss << "{\"type\": \"sequence\", \"elements\": ["; + for (size_t i = 0; i < elements.size(); ++i) { + oss << std::visit([](const auto& g) { return structural_tag_to_json(g); }, elements[i]); + if (i != elements.size() - 1) { + oss << ", "; } - }, left) + ", " + - std::visit([](const auto& g) -> std::string { - if constexpr (std::is_same_v, std::shared_ptr> || - std::is_same_v, std::shared_ptr>) { - return g ? g->to_string() : "null"; - } else { - return g.to_string(); + } + oss << "]}"; + return oss.str(); + }; + std::string to_string() const { + std::ostringstream oss; + oss << "Concat("; + for (size_t i = 0; i < elements.size(); ++i) { + oss << std::visit([](const auto& g) -> std::string { return structural_tag_to_string(g); }, elements[i]); + if (i != elements.size() - 1) { + oss << ", "; } - }, right) + ")"; + } + oss << ")"; + return oss.str(); } bool operator==(const Concat& other) const { - return left == other.left && right == other.right; + return elements == other.elements; } }; // Union combines two grammars in parallel, e.g. "A | B" means either A or B + /** + * @brief Union composes multiple structural tags as alternatives. The + * model may produce any one of the provided elements. + * Can be used indirectly with | operator. + */ struct Union { - CompoundGrammar left; - CompoundGrammar right; + std::vector elements; Union() = default; - Union(CompoundGrammar left, CompoundGrammar right) : left(std::move(left)), right(std::move(right)) {}; - std::string to_string() const { - return "Union(" + std::visit([](const auto& g) -> std::string { - if constexpr (std::is_same_v, std::shared_ptr> || - std::is_same_v, std::shared_ptr>) { - return g ? g->to_string() : "null"; - } else { - return g.to_string(); + Union(StructuralTag left, StructuralTag right) : elements{std::move(left), std::move(right)} {}; + Union(const std::vector& elems) : elements(elems) {}; + std::string to_json() const { + std::ostringstream oss; + oss << "{\"type\": \"or\", \"elements\": ["; + for (size_t i = 0; i < elements.size(); ++i) { + oss << std::visit([](const auto& g) -> std::string { return structural_tag_to_json(g); }, elements[i]); + if (i != elements.size() - 1) { + oss << ", "; } - }, left) + ", " + - std::visit([](const auto& g) -> std::string { - if constexpr (std::is_same_v, std::shared_ptr> || - std::is_same_v, std::shared_ptr>) { - return g ? g->to_string() : "null"; - } else { - return g.to_string(); + } + oss << "]}"; + return oss.str(); + } + std::string to_string() const { + std::ostringstream oss; + oss << "Union("; + for (size_t i = 0; i < elements.size(); ++i) { + oss << std::visit([](const auto& g) -> std::string { return structural_tag_to_string(g); }, elements[i]); + if (i != elements.size() - 1) { + oss << ", "; } - }, right) + ")"; + } + oss << ")"; + return oss.str(); } bool operator==(const Union& other) const { - return left == other.left && right == other.right; + return elements == other.elements; } }; + /** + * @brief Tag defines a begin/end wrapper with constrained inner content. + * + * The generator will output `begin`, then the `content` (a StructuralTag), + * and finally `end`. + * + * Example: Tag("", AnyText(), "") represents thinking portion of the model output. + */ + struct Tag { + std::string begin; + StructuralTag content; + std::string end; + + Tag() = default; + Tag(const std::string& begin, StructuralTag content, const std::string& end) : begin(begin), content(std::move(content)), end(end) {}; + std::string to_json() const { + std::ostringstream oss; + oss << "{\"type\": \"tag\", \"begin\": " << format_for_json(begin) << ", \"content\": " << + std::visit([](const auto& g) -> std::string { return structural_tag_to_json(g); }, content) << + ", \"end\": " << format_for_json(end) << "}"; + return oss.str(); + }; + std::string to_string() const { + std::ostringstream oss; + oss << "Tag(begin=\"" << begin << "\", content=" << + std::visit([](const auto& g) -> std::string { return structural_tag_to_string(g); }, content) << + ", end=\"" << end << "\")"; + return oss.str(); + }; + bool operator==(const Tag& other) const { + return begin == other.begin && content == other.content && end == other.end; + } + }; + + /** + * @brief TriggeredTags associates a set of `triggers` with multiple `tags`. + * + * When the model generates any of the trigger strings the structured generation + * activates to produce configured tags. Flags allow requiring + * at least one tag and stopping structured generation after the first tag. + */ + struct TriggeredTags { + std::vector triggers; + std::vector tags; + bool at_least_one = false; // if true, at least one tag must be generated after trigger + bool stop_after_first = false; // if true, structured generation stops after first tag is generated + + TriggeredTags() = default; + TriggeredTags(const std::vector& triggers, + const std::vector& tags, + bool at_least_one = false, + bool stop_after_first = false) + : triggers(triggers), tags(tags), at_least_one(at_least_one), stop_after_first(stop_after_first) {}; + std::string to_json() const { + std::ostringstream oss; + oss << "{\"type\": \"triggered_tags\", \"triggers\": ["; + for (size_t i = 0; i < triggers.size(); ++i) { + oss << format_for_json(triggers[i]); + if (i != triggers.size() - 1) { + oss << ", "; + } + } + oss << "], \"tags\": ["; + for (size_t i = 0; i < tags.size(); ++i) { + oss << tags[i].to_json(); + if (i != tags.size() - 1) { + oss << ", "; + } + } + oss << "], \"at_least_one\": " << (at_least_one ? "true" : "false") << + ", \"stop_after_first\": " << (stop_after_first ? "true" : "false") << "}"; + return oss.str(); + }; + std::string to_string() const { + std::ostringstream oss; + oss << "TriggeredTags(triggers=["; + for (size_t i = 0; i < triggers.size(); ++i) { + oss << "\"" << triggers[i] << "\""; + if (i != triggers.size() - 1) { + oss << ", "; + } + }; + oss << "], tags=["; + for (size_t i = 0; i < tags.size(); ++i) { + oss << tags[i].to_string(); + if (i != tags.size() - 1) { + oss << ", "; + } + } + oss << "], at_least_one=" << (at_least_one ? "True" : "False") << + ", stop_after_first=" << (stop_after_first ? "True" : "False") << ")"; + return oss.str(); + }; + }; + + /** + * @brief TagsWithSeparator configures generation of a sequence of tags + * separated by a fixed `separator` string. + * + * Can be used to produce repeated tagged elements like + * "A;B" where `separator`=";". + */ + struct TagsWithSeparator { + std::vector tags; + std::string separator; + bool at_least_one = false; // if true, at least one tag must be generated + bool stop_after_first = false; // if true, generation stops after first tag is generated + + TagsWithSeparator() = default; + TagsWithSeparator(const std::vector& tags, + const std::string& separator, + bool at_least_one = false, + bool stop_after_first = false) + : tags(tags), separator(separator), at_least_one(at_least_one), stop_after_first(stop_after_first) {}; + std::string to_json() const { + std::ostringstream oss; + oss << "{\"type\": \"tags_with_separator\", \"separator\": " << format_for_json(separator) << ", \"tags\": ["; + for (size_t i = 0; i < tags.size(); ++i) { + oss << tags[i].to_json(); + if (i != tags.size() - 1) { + oss << ", "; + } + } + oss << "], \"at_least_one\": " << (at_least_one ? "true" : "false") << + ", \"stop_after_first\": " << (stop_after_first ? "true" : "false") << "}"; + return oss.str(); + }; + std::string to_string() const { + std::ostringstream oss; + oss << "TagsWithSeparator(separator=\"" << separator << "\", tags=["; + for (size_t i = 0; i < tags.size(); ++i) { + oss << tags[i].to_string(); + if (i != tags.size() - 1) { + oss << ", "; + } + } + oss << "], at_least_one=" << (at_least_one ? "true" : "false") << + ", stop_after_first=" << (stop_after_first ? "true" : "false") << ")"; + return oss.str(); + }; + }; + std::optional json_schema; std::optional regex; std::optional grammar; - std::optional structural_tags_config; + std::optional> structural_tags_config; std::optional compound_grammar; std::optional backend; void validate() const; @@ -217,12 +554,12 @@ class OPENVINO_GENAI_EXPORTS StructuredOutputConfig { OPENVINO_GENAI_EXPORTS std::shared_ptr -operator+(const StructuredOutputConfig::CompoundGrammar& lhs, - const StructuredOutputConfig::CompoundGrammar& rhs); +operator+(const StructuredOutputConfig::StructuralTag& lhs, + const StructuredOutputConfig::StructuralTag& rhs); OPENVINO_GENAI_EXPORTS std::shared_ptr -operator|(const StructuredOutputConfig::CompoundGrammar& lhs, - const StructuredOutputConfig::CompoundGrammar& rhs); +operator|(const StructuredOutputConfig::StructuralTag& lhs, + const StructuredOutputConfig::StructuralTag& rhs); /** * @brief Structure to keep generation config parameters. For a selected method of decoding, only parameters from that group diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp index db7d621ed5..78cf09191f 100644 --- a/src/cpp/src/generation_config.cpp +++ b/src/cpp/src/generation_config.cpp @@ -210,6 +210,15 @@ std::string StructuralTagsConfig::to_string() const { ", triggers=" + triggers_repr.str() + ")"; } +std::string StructuralTagsConfig::to_json() const { + std::vector tags; + tags.reserve(structural_tags.size()); + for (const auto& tag : structural_tags) { + tags.emplace_back(tag.begin, StructuredOutputConfig::JSONSchema{tag.schema}, tag.end); + } + return StructuredOutputConfig::TriggeredTags(triggers, tags, false, false).to_json(); +} + StructuredOutputConfig::StructuredOutputConfig(const ov::AnyMap& properties) { update_config(properties); validate(); @@ -367,16 +376,15 @@ void StructuredOutputConfig::validate() const { (json_schema.has_value() ? "json=" + *json_schema +", " : ""), (regex.has_value() ? "regex=" + *regex + ", " : ""), (grammar.has_value() ? "grammar=" + *grammar : ""), - (structural_tags_config.has_value() ? "structural_tags_config=" + structural_tags_config->to_string() : ""), - (compound_grammar.has_value() ? "compound_grammar=" + std::visit([](const auto& g) -> std::string { - if constexpr ( - std::is_same_v, std::shared_ptr> || - std::is_same_v, std::shared_ptr> - ) { - return g ? g->to_string() : "null"; + (structural_tags_config.has_value() ? "structural_tags_config=" + std::visit([](const auto& config) -> std::string { + if constexpr (std::is_same_v, StructuralTagsConfig>) { + return config.to_string(); } else { - return g.to_string(); + return StructuredOutputConfig::structural_tag_to_string(config); } + }, *structural_tags_config) : ""), + (compound_grammar.has_value() ? "compound_grammar=" + std::visit([](const auto& g) -> std::string { + return StructuredOutputConfig::structural_tag_to_string(g); }, *compound_grammar) : "") ); } @@ -389,15 +397,66 @@ void StructuredOutputConfig::validate(Tokenizer& tokenizer) const { std::shared_ptr -operator+(const ov::genai::StructuredOutputConfig::CompoundGrammar& lhs, - const ov::genai::StructuredOutputConfig::CompoundGrammar& rhs) { - return std::make_shared(lhs, rhs); +operator+(const ov::genai::StructuredOutputConfig::StructuralTag& lhs, + const ov::genai::StructuredOutputConfig::StructuralTag& rhs) { + using SOC = ov::genai::StructuredOutputConfig; + const auto lhs_concat = std::get_if>(&lhs); + const auto rhs_concat = std::get_if>(&rhs); + + if (lhs_concat && *lhs_concat) { + // lhs is a Concat + if (rhs_concat && *rhs_concat) { + // both are Concat: combine elements + std::vector elems = (*lhs_concat)->elements; + elems.insert(elems.end(), (*rhs_concat)->elements.begin(), (*rhs_concat)->elements.end()); + return std::make_shared(elems); + } else { + // only lhs is Concat: append rhs + std::vector elems = (*lhs_concat)->elements; + elems.push_back(rhs); + return std::make_shared(elems); + } + } else if (rhs_concat && *rhs_concat) { + // only rhs is Concat: prepend lhs + std::vector elems; + elems.push_back(lhs); + elems.insert(elems.end(), (*rhs_concat)->elements.begin(), (*rhs_concat)->elements.end()); + return std::make_shared(elems); + } else { + // neither is Concat: create binary Concat + return std::make_shared(lhs, rhs); + } } std::shared_ptr -operator|(const ov::genai::StructuredOutputConfig::CompoundGrammar& lhs, - const ov::genai::StructuredOutputConfig::CompoundGrammar& rhs) { - return std::make_shared(lhs, rhs); +operator|(const ov::genai::StructuredOutputConfig::StructuralTag& lhs, + const ov::genai::StructuredOutputConfig::StructuralTag& rhs) { + using SOC = ov::genai::StructuredOutputConfig; + const auto lhs_union = std::get_if>(&lhs); + const auto rhs_union = std::get_if>(&rhs); + + if (lhs_union && *lhs_union) { + if (rhs_union && *rhs_union) { + // both are Union: combine elements + std::vector elems = (*lhs_union)->elements; + elems.insert(elems.end(), (*rhs_union)->elements.begin(), (*rhs_union)->elements.end()); + return std::make_shared(elems); + } else { + // only lhs is Union: append rhs + std::vector elems = (*lhs_union)->elements; + elems.push_back(rhs); + return std::make_shared(elems); + } + } else if (rhs_union && *rhs_union) { + // only rhs is Union: prepend lhs + std::vector elems; + elems.push_back(lhs); + elems.insert(elems.end(), (*rhs_union)->elements.begin(), (*rhs_union)->elements.end()); + return std::make_shared(elems); + } else { + // neither is Union: create binary Union + return std::make_shared(lhs, rhs); + } } GenerationConfig beam_search() { diff --git a/src/cpp/src/sampling/structured_output/xgrammar_backend.cpp b/src/cpp/src/sampling/structured_output/xgrammar_backend.cpp index 3c91b01a08..9d5f7e22c5 100644 --- a/src/cpp/src/sampling/structured_output/xgrammar_backend.cpp +++ b/src/cpp/src/sampling/structured_output/xgrammar_backend.cpp @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 #include "xgrammar_backend.hpp" +#include "logger.hpp" #include namespace ov { @@ -24,63 +25,89 @@ XGrammarStructuredOutput::XGrammarStructuredOutput(const ov::genai::Tokenizer::T } -xgrammar::Grammar XGrammarStructuredOutput::parse_compound_grammar(const StructuredOutputConfig::CompoundGrammar& compound_grammar) { - return std::visit([](const auto& grammar) -> xgrammar::Grammar { - using T = std::decay_t; - if constexpr (std::is_same_v) { - return xgrammar::Grammar::FromRegex(grammar.value); - } else if constexpr (std::is_same_v) { - return xgrammar::Grammar::FromJSONSchema(grammar.value); - } else if constexpr (std::is_same_v) { - return xgrammar::Grammar::FromEBNF(grammar.value); - } else if constexpr (std::is_same_v>) { - return xgrammar::Grammar::Concat({ - XGrammarStructuredOutput::parse_compound_grammar(grammar->left), - XGrammarStructuredOutput::parse_compound_grammar(grammar->right) - }); - } else if constexpr (std::is_same_v>) { - return xgrammar::Grammar::Union({ - XGrammarStructuredOutput::parse_compound_grammar(grammar->left), - XGrammarStructuredOutput::parse_compound_grammar(grammar->right) - }); - } else { - OPENVINO_THROW( - "Cannot compile the compound grammar. Unsupported compound grammar type. " - "Supported types are: Regex, JSONSchema, EBNF, Union, Concat." - ); - } - }, compound_grammar); +xgrammar::Grammar XGrammarStructuredOutput::parse_structural_tag(const StructuredOutputConfig::CompoundGrammar& compound_grammar) { + + std::ostringstream oss; + + // compound grammar is already a string JSON representation + if (std::holds_alternative(compound_grammar)) { + oss << std::get(compound_grammar); + } else { + oss << "{\"type\": \"structural_tag\", \"format\": "; + oss << std::visit([](const auto& grammar) -> std::string { + return StructuredOutputConfig::structural_tag_to_json(grammar); + }, compound_grammar); + oss << "}"; + }; + auto result = xgrammar::Grammar::FromStructuralTag(oss.str()); + if (std::holds_alternative(result)) { + return std::get(result); + } else { + const auto& error = std::get(result); + std::string error_message; + std::visit([&error_message](const auto& err) { + if constexpr (std::is_member_function_pointer::what)>::value) { + error_message = err.what(); + } else { + error_message = "Unknown error type"; + } + }, error); + OPENVINO_THROW("Failed to create grammar from structural tag: " + error_message); + } } xgrammar::Grammar XGrammarStructuredOutput::create_grammar(const std::optional& structured_output_config) { - // Default constructor for xgrammar::Grammar is not enabled, - // create explicitly an empty grammar. - xgrammar::Grammar grammar = xgrammar::Grammar::FromEBNF("root ::= root"); if (!structured_output_config.has_value()) { - return grammar; + return xgrammar::Grammar::FromEBNF("root ::= root"); } if (structured_output_config.value().json_schema.has_value()) { - grammar = xgrammar::Grammar::FromJSONSchema(structured_output_config.value().json_schema.value()); + return xgrammar::Grammar::FromJSONSchema(structured_output_config.value().json_schema.value()); } else if (structured_output_config.value().regex.has_value()) { - grammar = xgrammar::Grammar::FromRegex(structured_output_config.value().regex.value()); + return xgrammar::Grammar::FromRegex(structured_output_config.value().regex.value()); } else if (structured_output_config.value().grammar.has_value()) { - grammar = xgrammar::Grammar::FromEBNF(structured_output_config.value().grammar.value()); + return xgrammar::Grammar::FromEBNF(structured_output_config.value().grammar.value()); } else if (structured_output_config.value().structural_tags_config.has_value()) { - std::vector xgrammar_structural_tags; - for (const auto& tag : structured_output_config.value().structural_tags_config.value().structural_tags) { - auto structural_tag = xgrammar::StructuralTagItem{tag.begin, tag.schema, tag.end}; - xgrammar_structural_tags.push_back(std::move(structural_tag)); - } - grammar = xgrammar::Grammar::FromStructuralTag( - xgrammar_structural_tags, structured_output_config.value().structural_tags_config.value().triggers - ); + return std::visit([](const auto& config) -> xgrammar::Grammar { + using ConfigType = std::decay_t; + if constexpr (std::is_same_v) { + // Old format: StructuralTagsConfig + Logger::warn( + "The use of \"structural_tags_config\" with StructuralTagsConfig instance is deprecated and will be removed in future releases. " + "Use TriggeredTags instead." + ); + + std::ostringstream oss; + oss << "{\"type\": \"structural_tag\", \"format\": " << config.to_json() << "}"; + auto result = xgrammar::Grammar::FromStructuralTag(oss.str()); + if (std::holds_alternative(result)) { + return std::get(result); + } else { + const auto& error = std::get(result); + std::string error_message; + std::visit([&error_message](const auto& err) { + if constexpr (std::is_member_function_pointer::what)>::value) { + error_message = err.what(); + } else { + error_message = "Unknown error type"; + } + }, error); + OPENVINO_THROW("Failed to create grammar from structural tag: " + error_message); + } + } else { + // New format: StructuralTag + return parse_structural_tag(config); + } + }, structured_output_config.value().structural_tags_config.value()); } else if (structured_output_config.value().compound_grammar.has_value()) { - grammar = parse_compound_grammar(structured_output_config.value().compound_grammar.value()); - } else { - OPENVINO_THROW("No grammar definition provided for structured output generation."); + Logger::warn( + "The use of \"compound_grammar\" is deprecated and will be removed in future releases.\n" + "Pass the same input to \"structural_tags_config\" instead." + ); + return parse_structural_tag(structured_output_config.value().compound_grammar.value()); } - return grammar; + + OPENVINO_THROW("No grammar definition provided for structured output generation."); } void XGrammarStructuredOutput::validate_grammar(const std::optional& structured_output_config) { @@ -129,7 +156,7 @@ XGrammarLogitsTransformer::XGrammarLogitsTransformer( m_token_bitmask->ndim = 1; m_token_bitmask->dtype = DLDataType{kDLInt, 32, 1}; m_token_bitmask->byte_offset = 0; - m_token_bitmask->strides = nullptr; // No strides, tensor is compact + m_token_bitmask->strides = &m_bitmask_strides[0]; // xgrammar expects strides to be set, even for compact tensors m_bitmask_shape = {static_cast(m_token_bitmask_ov.get_size())}; m_token_bitmask->shape = &m_bitmask_shape[0]; @@ -138,9 +165,9 @@ XGrammarLogitsTransformer::XGrammarLogitsTransformer( m_next_token_logits->ndim = 1; m_next_token_logits->dtype = DLDataType{kDLFloat, 32, 1}; m_next_token_logits->byte_offset = 0; - m_next_token_logits->strides = nullptr; // No strides, tensor is compact m_logits_shape = {static_cast(m_vocab_size)}; m_next_token_logits->shape = &m_logits_shape[0]; + m_next_token_logits->strides = &m_logits_strides[0]; } void XGrammarLogitsTransformer::accept_tokens(const TokenIds& input_ids) { diff --git a/src/cpp/src/sampling/structured_output/xgrammar_backend.hpp b/src/cpp/src/sampling/structured_output/xgrammar_backend.hpp index ea65806bc1..886c0b76c0 100644 --- a/src/cpp/src/sampling/structured_output/xgrammar_backend.hpp +++ b/src/cpp/src/sampling/structured_output/xgrammar_backend.hpp @@ -43,7 +43,9 @@ class XGrammarLogitsTransformer : public IStatefulLogitTransformer { std::shared_ptr m_token_bitmask; std::shared_ptr m_next_token_logits; std::vector m_logits_shape; + std::vector m_logits_strides = {1}; std::vector m_bitmask_shape; + std::vector m_bitmask_strides = {1}; int m_vocab_size; }; @@ -82,7 +84,7 @@ class XGrammarStructuredOutput : public IStructuredOutputImpl { private: std::unique_ptr m_grammar_compiler; - static xgrammar::Grammar parse_compound_grammar(const StructuredOutputConfig::CompoundGrammar& compound_grammar); + static xgrammar::Grammar parse_structural_tag(const StructuredOutputConfig::CompoundGrammar& compound_grammar); xgrammar::Grammar create_grammar(const std::optional& structured_output_config); }; diff --git a/src/python/openvino_genai/py_openvino_genai.pyi b/src/python/openvino_genai/py_openvino_genai.pyi index 68fd193d4e..de86f9ce9c 100644 --- a/src/python/openvino_genai/py_openvino_genai.pyi +++ b/src/python/openvino_genai/py_openvino_genai.pyi @@ -2505,14 +2505,10 @@ class StructuralTagItem: """ @typing.overload def __init__(self) -> None: - """ - Default constructor for StructuralTagItem - """ + ... @typing.overload def __init__(self, **kwargs) -> None: - """ - Constructor that initializes the structured tags configuration with kwargs. - """ + ... def __repr__(self) -> str: ... @property @@ -2562,14 +2558,10 @@ class StructuralTagsConfig: """ @typing.overload def __init__(self) -> None: - """ - Default constructor for StructuralTagsConfig - """ + ... @typing.overload def __init__(self, **kwargs) -> None: - """ - Constructor that initializes the structured tags configuration with kwargs. - """ + ... def __repr__(self) -> str: ... @property @@ -2606,89 +2598,239 @@ class StructuredOutputConfig: It allows for more complex and flexible structured output generation. The compound grammar a Union or Concat of several grammars, where each grammar can be a JSON schema, regex, EBNF, Union or Concat. """ + class AnyText: + """ + + AnyText structural tag allows any text for the portion of output + covered by this tag. + """ + def __add__(self, arg0: typing.Any) -> StructuredOutputConfig.Concat: + ... + def __init__(self) -> None: + ... + def __or__(self, arg0: typing.Any) -> StructuredOutputConfig.Union: + ... + def __repr__(self) -> str: + ... class Concat: - left: openvino_genai.py_openvino_genai.StructuredOutputConfig.Regex | openvino_genai.py_openvino_genai.StructuredOutputConfig.JSONSchema | openvino_genai.py_openvino_genai.StructuredOutputConfig.EBNF | openvino_genai.py_openvino_genai.StructuredOutputConfig.Concat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Union - right: openvino_genai.py_openvino_genai.StructuredOutputConfig.Regex | openvino_genai.py_openvino_genai.StructuredOutputConfig.JSONSchema | openvino_genai.py_openvino_genai.StructuredOutputConfig.EBNF | openvino_genai.py_openvino_genai.StructuredOutputConfig.Concat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Union - @staticmethod - def __new__(arg0: typing.Any, arg1: typing.Any, arg2: typing.Any) -> StructuredOutputConfig.Concat: - """ - Concat combines two grammars sequentially, e.g. "A B" means A followed by B - """ + """ + + Concat composes multiple structural tags in sequence. Each element + must be produced in the given order. Can be used indirectly with + operator. + + Example: Concat(ConstString("a"), ConstString("b")) produces "ab". + ConstString("a") + ConstString("b") is equivalent. + """ def __add__(self, arg0: typing.Any) -> StructuredOutputConfig.Concat: ... + @typing.overload + def __init__(self, elements: collections.abc.Iterable) -> None: + ... + @typing.overload + def __init__(self, *args) -> None: + ... + def __or__(self, arg0: typing.Any) -> StructuredOutputConfig.Union: + ... + def __repr__(self) -> str: + ... + @property + def elements(self) -> list[str | openvino_genai.py_openvino_genai.StructuredOutputConfig.Regex | openvino_genai.py_openvino_genai.StructuredOutputConfig.JSONSchema | openvino_genai.py_openvino_genai.StructuredOutputConfig.EBNF | openvino_genai.py_openvino_genai.StructuredOutputConfig.ConstString | openvino_genai.py_openvino_genai.StructuredOutputConfig.AnyText | openvino_genai.py_openvino_genai.StructuredOutputConfig.QwenXMLParametersFormat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Concat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Union | openvino_genai.py_openvino_genai.StructuredOutputConfig.Tag | openvino_genai.py_openvino_genai.StructuredOutputConfig.TriggeredTags | openvino_genai.py_openvino_genai.StructuredOutputConfig.TagsWithSeparator]: + ... + @elements.setter + def elements(self, arg0: collections.abc.Sequence[str | openvino_genai.py_openvino_genai.StructuredOutputConfig.Regex | openvino_genai.py_openvino_genai.StructuredOutputConfig.JSONSchema | openvino_genai.py_openvino_genai.StructuredOutputConfig.EBNF | openvino_genai.py_openvino_genai.StructuredOutputConfig.ConstString | openvino_genai.py_openvino_genai.StructuredOutputConfig.AnyText | openvino_genai.py_openvino_genai.StructuredOutputConfig.QwenXMLParametersFormat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Concat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Union | openvino_genai.py_openvino_genai.StructuredOutputConfig.Tag | openvino_genai.py_openvino_genai.StructuredOutputConfig.TriggeredTags | openvino_genai.py_openvino_genai.StructuredOutputConfig.TagsWithSeparator]) -> None: + ... + class ConstString: + """ + + ConstString structural tag forces the generator to produce exactly + the provided constant string value. + """ + value: str + def __add__(self, arg0: typing.Any) -> StructuredOutputConfig.Concat: + ... + def __init__(self, arg0: str) -> None: + ... def __or__(self, arg0: typing.Any) -> StructuredOutputConfig.Union: ... def __repr__(self) -> str: ... class EBNF: + """ + + EBNF structural tag constrains output using an EBNF grammar. + """ value: str def __add__(self, arg0: typing.Any) -> StructuredOutputConfig.Concat: ... def __init__(self, arg0: str) -> None: - """ - EBNF grammar building block for compound grammar configuration. - """ + ... def __or__(self, arg0: typing.Any) -> StructuredOutputConfig.Union: ... def __repr__(self) -> str: ... class JSONSchema: + """ + + JSONSchema structural tag constrains output to a JSON document that + must conform to the provided JSON Schema string. + """ value: str def __add__(self, arg0: typing.Any) -> StructuredOutputConfig.Concat: ... def __init__(self, arg0: str) -> None: - """ - JSON schema building block for compound grammar configuration. - """ + ... + def __or__(self, arg0: typing.Any) -> StructuredOutputConfig.Union: + ... + def __repr__(self) -> str: + ... + class QwenXMLParametersFormat: + """ + + QwenXMLParametersFormat instructs the generator to output an XML + parameters block derived from the provided JSON schema. This is a + specialized helper for Qwen-style XML parameter formatting. + """ + json_schema: str + def __add__(self, arg0: typing.Any) -> StructuredOutputConfig.Concat: + ... + def __init__(self, arg0: str) -> None: + ... def __or__(self, arg0: typing.Any) -> StructuredOutputConfig.Union: ... def __repr__(self) -> str: ... class Regex: + """ + + Regex structural tag constrains output using a regular expression. + """ value: str def __add__(self, arg0: typing.Any) -> StructuredOutputConfig.Concat: ... def __init__(self, arg0: str) -> None: - """ - Regex building block for compound grammar configuration. - """ + ... def __or__(self, arg0: typing.Any) -> StructuredOutputConfig.Union: ... def __repr__(self) -> str: ... + class Tag: + """ + + Tag defines a begin/end wrapper with constrained inner content. + + The generator will output `begin`, then the `content` (a StructuralTag), + and finally `end`. + + Example: Tag("", AnyText(), "") represents thinking portion of the model output. + """ + begin: str + content: str | openvino_genai.py_openvino_genai.StructuredOutputConfig.Regex | openvino_genai.py_openvino_genai.StructuredOutputConfig.JSONSchema | openvino_genai.py_openvino_genai.StructuredOutputConfig.EBNF | openvino_genai.py_openvino_genai.StructuredOutputConfig.ConstString | openvino_genai.py_openvino_genai.StructuredOutputConfig.AnyText | openvino_genai.py_openvino_genai.StructuredOutputConfig.QwenXMLParametersFormat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Concat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Union | openvino_genai.py_openvino_genai.StructuredOutputConfig.Tag | openvino_genai.py_openvino_genai.StructuredOutputConfig.TriggeredTags | openvino_genai.py_openvino_genai.StructuredOutputConfig.TagsWithSeparator + end: str + def __add__(self, arg0: typing.Any) -> StructuredOutputConfig.Concat: + ... + def __init__(self, begin: str, content: str | openvino_genai.py_openvino_genai.StructuredOutputConfig.Regex | openvino_genai.py_openvino_genai.StructuredOutputConfig.JSONSchema | openvino_genai.py_openvino_genai.StructuredOutputConfig.EBNF | openvino_genai.py_openvino_genai.StructuredOutputConfig.ConstString | openvino_genai.py_openvino_genai.StructuredOutputConfig.AnyText | openvino_genai.py_openvino_genai.StructuredOutputConfig.QwenXMLParametersFormat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Concat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Union | openvino_genai.py_openvino_genai.StructuredOutputConfig.Tag | openvino_genai.py_openvino_genai.StructuredOutputConfig.TriggeredTags | openvino_genai.py_openvino_genai.StructuredOutputConfig.TagsWithSeparator, end: str) -> None: + ... + def __or__(self, arg0: typing.Any) -> StructuredOutputConfig.Union: + ... + def __repr__(self) -> str: + ... + class TagsWithSeparator: + """ + + TagsWithSeparator configures generation of a sequence of tags + separated by a fixed separator string. + + Can be used to produce repeated tagged elements like "A;B" + where `separator`=";". + """ + at_least_one: bool + separator: str + stop_after_first: bool + def __add__(self, arg0: typing.Any) -> StructuredOutputConfig.Concat: + ... + def __init__(self, tags: collections.abc.Sequence[StructuredOutputConfig.Tag], separator: str, at_least_one: bool = False, stop_after_first: bool = False) -> None: + ... + def __or__(self, arg0: typing.Any) -> StructuredOutputConfig.Union: + ... + def __repr__(self) -> str: + ... + @property + def tags(self) -> list[StructuredOutputConfig.Tag]: + ... + @tags.setter + def tags(self, arg0: collections.abc.Sequence[StructuredOutputConfig.Tag]) -> None: + ... + class TriggeredTags: + """ + + TriggeredTags associates a set of `triggers` with multiple `tags`. + + When the model generates any of the trigger strings the structured + generation activates to produce configured tags. Flags allow requiring + at least one tag and stopping structured generation after the first tag. + """ + at_least_one: bool + stop_after_first: bool + def __add__(self, arg0: typing.Any) -> StructuredOutputConfig.Concat: + ... + def __init__(self, triggers: collections.abc.Sequence[str], tags: collections.abc.Sequence[StructuredOutputConfig.Tag], at_least_one: bool = False, stop_after_first: bool = False) -> None: + ... + def __or__(self, arg0: typing.Any) -> StructuredOutputConfig.Union: + ... + def __repr__(self) -> str: + ... + @property + def tags(self) -> list[StructuredOutputConfig.Tag]: + ... + @tags.setter + def tags(self, arg0: collections.abc.Sequence[StructuredOutputConfig.Tag]) -> None: + ... + @property + def triggers(self) -> list[str]: + ... + @triggers.setter + def triggers(self, arg0: collections.abc.Sequence[str]) -> None: + ... class Union: - left: openvino_genai.py_openvino_genai.StructuredOutputConfig.Regex | openvino_genai.py_openvino_genai.StructuredOutputConfig.JSONSchema | openvino_genai.py_openvino_genai.StructuredOutputConfig.EBNF | openvino_genai.py_openvino_genai.StructuredOutputConfig.Concat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Union - right: openvino_genai.py_openvino_genai.StructuredOutputConfig.Regex | openvino_genai.py_openvino_genai.StructuredOutputConfig.JSONSchema | openvino_genai.py_openvino_genai.StructuredOutputConfig.EBNF | openvino_genai.py_openvino_genai.StructuredOutputConfig.Concat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Union - @staticmethod - def __new__(arg0: typing.Any, arg1: typing.Any, arg2: typing.Any) -> StructuredOutputConfig.Union: - """ - Union combines two grammars in parallel, e.g. "A | B" means either A or B - """ + """ + + Union composes multiple structural tags as alternatives. The + model may produce any one of the provided elements. Can be used indirectly + with | operator. + """ def __add__(self, arg0: typing.Any) -> StructuredOutputConfig.Concat: ... + @typing.overload + def __init__(self, elements: collections.abc.Iterable) -> None: + ... + @typing.overload + def __init__(self, *args) -> None: + ... def __or__(self, arg0: typing.Any) -> StructuredOutputConfig.Union: ... def __repr__(self) -> str: ... + @property + def elements(self) -> list[str | openvino_genai.py_openvino_genai.StructuredOutputConfig.Regex | openvino_genai.py_openvino_genai.StructuredOutputConfig.JSONSchema | openvino_genai.py_openvino_genai.StructuredOutputConfig.EBNF | openvino_genai.py_openvino_genai.StructuredOutputConfig.ConstString | openvino_genai.py_openvino_genai.StructuredOutputConfig.AnyText | openvino_genai.py_openvino_genai.StructuredOutputConfig.QwenXMLParametersFormat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Concat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Union | openvino_genai.py_openvino_genai.StructuredOutputConfig.Tag | openvino_genai.py_openvino_genai.StructuredOutputConfig.TriggeredTags | openvino_genai.py_openvino_genai.StructuredOutputConfig.TagsWithSeparator]: + ... + @elements.setter + def elements(self, arg0: collections.abc.Sequence[str | openvino_genai.py_openvino_genai.StructuredOutputConfig.Regex | openvino_genai.py_openvino_genai.StructuredOutputConfig.JSONSchema | openvino_genai.py_openvino_genai.StructuredOutputConfig.EBNF | openvino_genai.py_openvino_genai.StructuredOutputConfig.ConstString | openvino_genai.py_openvino_genai.StructuredOutputConfig.AnyText | openvino_genai.py_openvino_genai.StructuredOutputConfig.QwenXMLParametersFormat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Concat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Union | openvino_genai.py_openvino_genai.StructuredOutputConfig.Tag | openvino_genai.py_openvino_genai.StructuredOutputConfig.TriggeredTags | openvino_genai.py_openvino_genai.StructuredOutputConfig.TagsWithSeparator]) -> None: + ... @typing.overload def __init__(self) -> None: - """ - Default constructor for StructuredOutputConfig - """ + ... @typing.overload def __init__(self, **kwargs) -> None: - """ - Constructor that initializes the structured output configuration with kwargs. - """ + ... def __repr__(self) -> str: ... @property - def compound_grammar(self) -> openvino_genai.py_openvino_genai.StructuredOutputConfig.Regex | openvino_genai.py_openvino_genai.StructuredOutputConfig.JSONSchema | openvino_genai.py_openvino_genai.StructuredOutputConfig.EBNF | openvino_genai.py_openvino_genai.StructuredOutputConfig.Concat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Union | None: + def compound_grammar(self) -> str | openvino_genai.py_openvino_genai.StructuredOutputConfig.Regex | openvino_genai.py_openvino_genai.StructuredOutputConfig.JSONSchema | openvino_genai.py_openvino_genai.StructuredOutputConfig.EBNF | openvino_genai.py_openvino_genai.StructuredOutputConfig.ConstString | openvino_genai.py_openvino_genai.StructuredOutputConfig.AnyText | openvino_genai.py_openvino_genai.StructuredOutputConfig.QwenXMLParametersFormat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Concat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Union | openvino_genai.py_openvino_genai.StructuredOutputConfig.Tag | openvino_genai.py_openvino_genai.StructuredOutputConfig.TriggeredTags | openvino_genai.py_openvino_genai.StructuredOutputConfig.TagsWithSeparator | None: """ Compound grammar for structured output generation """ @compound_grammar.setter - def compound_grammar(self, arg0: openvino_genai.py_openvino_genai.StructuredOutputConfig.Regex | openvino_genai.py_openvino_genai.StructuredOutputConfig.JSONSchema | openvino_genai.py_openvino_genai.StructuredOutputConfig.EBNF | openvino_genai.py_openvino_genai.StructuredOutputConfig.Concat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Union | None) -> None: + def compound_grammar(self, arg0: str | openvino_genai.py_openvino_genai.StructuredOutputConfig.Regex | openvino_genai.py_openvino_genai.StructuredOutputConfig.JSONSchema | openvino_genai.py_openvino_genai.StructuredOutputConfig.EBNF | openvino_genai.py_openvino_genai.StructuredOutputConfig.ConstString | openvino_genai.py_openvino_genai.StructuredOutputConfig.AnyText | openvino_genai.py_openvino_genai.StructuredOutputConfig.QwenXMLParametersFormat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Concat | openvino_genai.py_openvino_genai.StructuredOutputConfig.Union | openvino_genai.py_openvino_genai.StructuredOutputConfig.Tag | openvino_genai.py_openvino_genai.StructuredOutputConfig.TriggeredTags | openvino_genai.py_openvino_genai.StructuredOutputConfig.TagsWithSeparator | None) -> None: ... @property def grammar(self) -> str | None: @@ -2715,12 +2857,12 @@ class StructuredOutputConfig: def regex(self, arg0: str | None) -> None: ... @property - def structural_tags_config(self) -> openvino_genai.py_openvino_genai.StructuralTagsConfig | None: + def structural_tags_config(self) -> typing.Any: """ - Configuration for structural tags in structured output generation + Configuration for structural tags in structured output generation (can be StructuralTagsConfig or StructuralTag) """ @structural_tags_config.setter - def structural_tags_config(self, arg0: openvino_genai.py_openvino_genai.StructuralTagsConfig | None) -> None: + def structural_tags_config(self, arg1: typing.Any) -> None: ... class SummaryStats: def __init__(self) -> None: diff --git a/src/python/py_generation_config.cpp b/src/python/py_generation_config.cpp index 9459b8dfc3..e6030d066e 100644 --- a/src/python/py_generation_config.cpp +++ b/src/python/py_generation_config.cpp @@ -47,6 +47,74 @@ auto structured_output_config_docstring = R"( The compound grammar a Union or Concat of several grammars, where each grammar can be a JSON schema, regex, EBNF, Union or Concat. )"; +auto regex_docstring = R"( + Regex structural tag constrains output using a regular expression. +)"; + +auto jsonschema_docstring = R"( + JSONSchema structural tag constrains output to a JSON document that + must conform to the provided JSON Schema string. +)"; + +auto ebnf_docstring = R"( + EBNF structural tag constrains output using an EBNF grammar. +)"; + +auto conststring_docstring = R"( + ConstString structural tag forces the generator to produce exactly + the provided constant string value. +)"; + +auto anytext_docstring = R"( + AnyText structural tag allows any text for the portion of output + covered by this tag. +)"; + +auto qwenxml_docstring = R"( + QwenXMLParametersFormat instructs the generator to output an XML + parameters block derived from the provided JSON schema. This is a + specialized helper for Qwen-style XML parameter formatting. +)"; + +auto concat_docstring = R"( + Concat composes multiple structural tags in sequence. Each element + must be produced in the given order. Can be used indirectly with + operator. + + Example: Concat(ConstString("a"), ConstString("b")) produces "ab". + ConstString("a") + ConstString("b") is equivalent. +)"; + +auto union_docstring = R"( + Union composes multiple structural tags as alternatives. The + model may produce any one of the provided elements. Can be used indirectly + with | operator. +)"; + +auto tag_docstring = R"( + Tag defines a begin/end wrapper with constrained inner content. + + The generator will output `begin`, then the `content` (a StructuralTag), + and finally `end`. + + Example: Tag("", AnyText(), "") represents thinking portion of the model output. +)"; + +auto triggered_tags_docstring = R"( + TriggeredTags associates a set of `triggers` with multiple `tags`. + + When the model generates any of the trigger strings the structured + generation activates to produce configured tags. Flags allow requiring + at least one tag and stopping structured generation after the first tag. +)"; + +auto tags_with_separator_docstring = R"( + TagsWithSeparator configures generation of a sequence of tags + separated by a fixed separator string. + + Can be used to produce repeated tagged elements like "A;B" + where `separator`=";". +)"; + auto structured_tags_config_docstring = R"( Configures structured output generation by combining regular sampling with structural tags. @@ -132,13 +200,26 @@ template void add_grammar_operators(PyClass& py_cls) { py_cls .def("__add__", [](py::object self, py::object other) { - return pyutils::py_obj_to_compound_grammar(self) + pyutils::py_obj_to_compound_grammar(other); + return pyutils::py_obj_to_structural_tag(self) + pyutils::py_obj_to_structural_tag(other); }) .def("__or__", [](py::object self, py::object other) { - return pyutils::py_obj_to_compound_grammar(self) | pyutils::py_obj_to_compound_grammar(other); + return pyutils::py_obj_to_structural_tag(self) | pyutils::py_obj_to_structural_tag(other); }); }; +std::vector collect_structural_tags(py::iterable elements, + const char* context) { + std::vector tags; + for (py::handle element : elements) { + tags.emplace_back(pyutils::py_obj_to_structural_tag( + py::reinterpret_borrow(element))); + } + if (tags.size() < 2) { + throw py::value_error(std::string(context) + " requires at least two elements"); + } + return tags; +}; + void init_generation_config(py::module_& m) { // Binding for StopCriteria py::enum_(m, "StopCriteria", stop_criteria_docstring) @@ -148,10 +229,10 @@ void init_generation_config(py::module_& m) { py::class_(m, "StructuralTagItem", structured_tags_item_docstring) - .def(py::init<>(), "Default constructor for StructuralTagItem") + .def(py::init<>()) .def(py::init([](py::kwargs kwargs) { return StructuralTagItem(pyutils::kwargs_to_any_map(kwargs)); - }), "Constructor that initializes the structured tags configuration with kwargs.") + })) .def_readwrite("begin", &StructuralTagItem::begin, "Begin string for Structural Tag Item") .def_readwrite("schema", &StructuralTagItem::schema, "Json schema for Structural Tag Item") .def_readwrite("end", &StructuralTagItem::end, "End string for Structural Tag Item") @@ -165,10 +246,10 @@ void init_generation_config(py::module_& m) { py::class_(m, "StructuralTagsConfig", structured_tags_config_docstring) - .def(py::init<>(), "Default constructor for StructuralTagsConfig") + .def(py::init<>()) .def(py::init([](py::kwargs kwargs) { return StructuralTagsConfig(pyutils::kwargs_to_any_map(kwargs)); - }), "Constructor that initializes the structured tags configuration with kwargs.") + })) .def_readwrite("structural_tags", &StructuralTagsConfig::structural_tags, "List of structural tag items for structured output generation") .def_readwrite("triggers", &StructuralTagsConfig::triggers, "List of strings that will trigger generation of structured output") .def("__repr__", @@ -182,60 +263,144 @@ void init_generation_config(py::module_& m) { // pybind11-stubgen generates not accurate signatures and shows warning/errors // because Concat/Union/StructuredOutputConfig use EBNF/JSONSchema/etc. which are not defined yet. auto structured_output_config = py::class_(m, "StructuredOutputConfig", structured_output_config_docstring); - auto concat = py::class_>(structured_output_config, "Concat"); - auto union_ = py::class_>(structured_output_config, "Union"); - - auto regex = py::class_(structured_output_config, "Regex") - .def(py::init(), "Regex building block for compound grammar configuration.") + auto concat = py::class_>(structured_output_config, "Concat", concat_docstring); + auto union_ = py::class_>(structured_output_config, "Union", union_docstring); + auto tag = py::class_>(structured_output_config, "Tag", tag_docstring); + auto triggered_tags = py::class_>(structured_output_config, "TriggeredTags", triggered_tags_docstring); + auto tags_with_separator = py::class_>(structured_output_config, "TagsWithSeparator", tags_with_separator_docstring); + + auto regex = py::class_(structured_output_config, "Regex", regex_docstring) + .def(py::init()) .def_readwrite("value", &StructuredOutputConfig::Regex::value) .def("__repr__", [](const StructuredOutputConfig::Regex& self) { return self.to_string(); }); add_grammar_operators(regex); - auto json_schema = py::class_(structured_output_config, "JSONSchema") - .def(py::init(), "JSON schema building block for compound grammar configuration.") + auto json_schema = py::class_(structured_output_config, "JSONSchema", jsonschema_docstring) + .def(py::init()) .def_readwrite("value", &StructuredOutputConfig::JSONSchema::value) .def("__repr__", [](const StructuredOutputConfig::JSONSchema& self) { return self.to_string(); }); add_grammar_operators(json_schema); - auto ebnf = py::class_(structured_output_config, "EBNF") - .def(py::init(), "EBNF grammar building block for compound grammar configuration.") + auto ebnf = py::class_(structured_output_config, "EBNF", ebnf_docstring) + .def(py::init()) .def_readwrite("value", &StructuredOutputConfig::EBNF::value) .def("__repr__", [](const StructuredOutputConfig::EBNF& self) { return self.to_string(); }); add_grammar_operators(ebnf); + auto const_string = py::class_(structured_output_config, "ConstString", conststring_docstring) + .def(py::init()) + .def_readwrite("value", &StructuredOutputConfig::ConstString::value) + .def("__repr__", [](const StructuredOutputConfig::ConstString& self) { return self.to_string(); }); + add_grammar_operators(const_string); + + auto any_text = py::class_(structured_output_config, "AnyText", anytext_docstring) + .def(py::init<>()) + .def("__repr__", [](const StructuredOutputConfig::AnyText& self) { return self.to_string(); }); + add_grammar_operators(any_text); + + auto qwen_xml = py::class_(structured_output_config, "QwenXMLParametersFormat", qwenxml_docstring) + .def(py::init()) + .def_readwrite("json_schema", &StructuredOutputConfig::QwenXMLParametersFormat::json_schema) + .def("__repr__", [](const StructuredOutputConfig::QwenXMLParametersFormat& self) { return self.to_string(); }); + add_grammar_operators(qwen_xml); + concat - .def_static("__new__", [](py::object cls, py::object left, py::object right) { + .def(py::init([](py::iterable elements) { + return std::make_shared( + collect_structural_tags(elements, "StructuredOutputConfig.Concat")); + }), py::arg("elements")) + .def(py::init([](py::args args) { return std::make_shared( - pyutils::py_obj_to_compound_grammar(left), - pyutils::py_obj_to_compound_grammar(right) - ); - }, "Concat combines two grammars sequentially, e.g. \"A B\" means A followed by B") - .def_readwrite("left", &StructuredOutputConfig::Concat::left) - .def_readwrite("right", &StructuredOutputConfig::Concat::right) + collect_structural_tags(py::reinterpret_borrow(args), + "StructuredOutputConfig.Concat")); + })) + .def_readwrite("elements", &StructuredOutputConfig::Concat::elements) .def("__repr__", [](const StructuredOutputConfig::Concat& self) { return self.to_string(); }); add_grammar_operators(concat); union_ - .def_static("__new__", [](py::object cls, py::object left, py::object right) { + .def(py::init([](py::iterable elements) { return std::make_shared( - pyutils::py_obj_to_compound_grammar(left), - pyutils::py_obj_to_compound_grammar(right) - ); - }, "Union combines two grammars in parallel, e.g. \"A | B\" means either A or B") - .def_readwrite("left", &StructuredOutputConfig::Union::left) - .def_readwrite("right", &StructuredOutputConfig::Union::right) + collect_structural_tags(elements, "StructuredOutputConfig.Union")); + }), py::arg("elements")) + .def(py::init([](py::args args) { + return std::make_shared( + collect_structural_tags(py::reinterpret_borrow(args), + "StructuredOutputConfig.Union")); + })) + .def_readwrite("elements", &StructuredOutputConfig::Union::elements) .def("__repr__", [](const StructuredOutputConfig::Union& self) { return self.to_string(); }); add_grammar_operators(union_); + tag + .def(py::init(), + py::arg("begin"), py::arg("content"), py::arg("end")) + .def_readwrite("begin", &StructuredOutputConfig::Tag::begin) + .def_readwrite("content", &StructuredOutputConfig::Tag::content) + .def_readwrite("end", &StructuredOutputConfig::Tag::end) + .def("__repr__", [](const StructuredOutputConfig::Tag& self) { return self.to_string(); }); + add_grammar_operators(tag); + + triggered_tags + .def(py::init&, const std::vector&, bool, bool>(), + py::arg("triggers"), py::arg("tags"), py::arg("at_least_one") = false, py::arg("stop_after_first") = false) + .def_readwrite("triggers", &StructuredOutputConfig::TriggeredTags::triggers) + .def_readwrite("tags", &StructuredOutputConfig::TriggeredTags::tags) + .def_readwrite("at_least_one", &StructuredOutputConfig::TriggeredTags::at_least_one) + .def_readwrite("stop_after_first", &StructuredOutputConfig::TriggeredTags::stop_after_first) + .def("__repr__", [](const StructuredOutputConfig::TriggeredTags& self) { return self.to_string(); }); + add_grammar_operators(triggered_tags); + + tags_with_separator + .def(py::init&, const std::string&, bool, bool>(), + py::arg("tags"), py::arg("separator"), py::arg("at_least_one") = false, py::arg("stop_after_first") = false) + .def_readwrite("tags", &StructuredOutputConfig::TagsWithSeparator::tags) + .def_readwrite("separator", &StructuredOutputConfig::TagsWithSeparator::separator) + .def_readwrite("at_least_one", &StructuredOutputConfig::TagsWithSeparator::at_least_one) + .def_readwrite("stop_after_first", &StructuredOutputConfig::TagsWithSeparator::stop_after_first) + .def("__repr__", [](const StructuredOutputConfig::TagsWithSeparator& self) { return self.to_string(); }); + add_grammar_operators(tags_with_separator); + structured_output_config - .def(py::init<>(), "Default constructor for StructuredOutputConfig") + .def(py::init<>()) .def(py::init([](py::kwargs kwargs) { return StructuredOutputConfig(pyutils::kwargs_to_any_map(kwargs)); - }), "Constructor that initializes the structured output configuration with kwargs.") + })) .def_readwrite("json_schema", &StructuredOutputConfig::json_schema, "JSON schema for structured output generation") .def_readwrite("regex", &StructuredOutputConfig::regex, "Regular expression for structured output generation") .def_readwrite("grammar", &StructuredOutputConfig::grammar, "Grammar for structured output generation") - .def_readwrite("structural_tags_config", &StructuredOutputConfig::structural_tags_config, "Configuration for structural tags in structured output generation") + .def_property("structural_tags_config", + [](const StructuredOutputConfig& self) -> py::object { + if (!self.structural_tags_config.has_value()) { + return py::none(); + } + return std::visit([](const auto& config) -> py::object { + return py::cast(config); + }, *self.structural_tags_config); + }, + [](StructuredOutputConfig& self, py::object value) { + if (value.is_none()) { + self.structural_tags_config = std::nullopt; + } else if (py::isinstance(value)) { + self.structural_tags_config = py::cast(value); + } else if (py::isinstance(value) + || py::isinstance(value) + || py::isinstance(value) + || py::isinstance(value) + || py::isinstance(value) + || py::isinstance(value) + || py::isinstance(value) + || py::isinstance(value) + || py::isinstance(value) + || py::isinstance(value) + || py::isinstance(value) + || py::isinstance(value)) { + self.structural_tags_config = pyutils::py_obj_to_structural_tag(value); + } else { + throw py::type_error("structural_tags_config must be either StructuralTagsConfig or a StructuralTag (Regex, JSONSchema, EBNF, ConstString, AnyText, QwenXMLParametersFormat, Union, Concat, Tag, TriggeredTags, TagsWithSeparator or plain str)"); + } + }, + "Configuration for structural tags in structured output generation (can be StructuralTagsConfig or StructuralTag)") .def_readwrite("compound_grammar", &StructuredOutputConfig::compound_grammar, "Compound grammar for structured output generation") .def("__repr__", [](const StructuredOutputConfig &self) { diff --git a/src/python/py_utils.cpp b/src/python/py_utils.cpp index 15f5af5977..ab1b8e21f0 100644 --- a/src/python/py_utils.cpp +++ b/src/python/py_utils.cpp @@ -90,6 +90,11 @@ ov::Any py_object_to_any(const py::object& py_obj, std::string property_name) { py::object float_32_type = py::module_::import("numpy").attr("float32"); if (py::isinstance(py_obj)) { + if (property_name == "structural_tags_config") { + std::variant variant_value = + py_obj.cast(); + return variant_value; + } return py_obj.cast(); } else if (py::isinstance(py_obj)) { return py_obj.cast(); @@ -309,14 +314,32 @@ ov::Any py_object_to_any(const py::object& py_obj, std::string property_name) { } else if (py::isinstance(py_obj)) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { + // For structural_tags_config property, wrap in variant + if (property_name == "structural_tags_config") { + std::variant variant_value = + py::cast(py_obj); + return variant_value; + } return py::cast(py_obj); } else if (py::isinstance(py_obj) || py::isinstance(py_obj) || py::isinstance(py_obj) + || py::isinstance(py_obj) + || py::isinstance(py_obj) + || py::isinstance(py_obj) // python does not use std::shared_ptr to obj || py::isinstance(py_obj) - || py::isinstance(py_obj)) { - return py_obj_to_compound_grammar(py_obj); + || py::isinstance(py_obj) + || py::isinstance(py_obj) + || py::isinstance(py_obj) + || py::isinstance(py_obj)) { + // For structural_tags_config property, wrap in variant + if (property_name == "structural_tags_config") { + std::variant variant_value = + py_obj_to_structural_tag(py_obj); + return variant_value; + } + return py_obj_to_structural_tag(py_obj); } else if (py::isinstance(py_obj)) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { @@ -421,19 +444,33 @@ ov::genai::StreamerVariant pystreamer_to_streamer(const PyBindStreamerVariant& p return streamer; } -StructuredOutputConfig::CompoundGrammar py_obj_to_compound_grammar(const py::object& py_obj) { +StructuredOutputConfig::StructuralTag py_obj_to_structural_tag(const py::object& py_obj) { if (py::isinstance(py_obj)) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { return py::cast(py_obj); } else if (py::isinstance(py_obj)) { return py::cast(py_obj); + } else if (py::isinstance(py_obj)) { + return py::cast(py_obj); + } else if (py::isinstance(py_obj)) { + return py::cast(py_obj); + } else if (py::isinstance(py_obj)) { + return py::cast(py_obj); + } else if (py::isinstance(py_obj)) { + return py::cast(py_obj); } else if (py::isinstance(py_obj)) { return py::cast>(py_obj); } else if (py::isinstance(py_obj)) { return py::cast>(py_obj); + } else if (py::isinstance(py_obj)) { + return py::cast>(py_obj); + } else if (py::isinstance(py_obj)) { + return py::cast>(py_obj); + } else if (py::isinstance(py_obj)) { + return py::cast>(py_obj); } else { - OPENVINO_THROW(py_obj.get_type(), " type isn't supported for StructuredOutputConfig compound grammar: ", py::str(py_obj)); + OPENVINO_THROW(py_obj.get_type(), " type isn't supported for StructuralTag: ", py::str(py_obj)); } } diff --git a/src/python/py_utils.hpp b/src/python/py_utils.hpp index 7786afcc60..346e44967e 100644 --- a/src/python/py_utils.hpp +++ b/src/python/py_utils.hpp @@ -22,7 +22,7 @@ using PyBindStreamerVariant = std::variant< std::shared_ptr, std::monostate>; -ov::genai::StructuredOutputConfig::CompoundGrammar py_obj_to_compound_grammar(const py::object& py_obj); +ov::genai::StructuredOutputConfig::CompoundGrammar py_obj_to_structural_tag(const py::object& py_obj); template struct overloaded : Ts... { diff --git a/tests/python_tests/test_structured_output.py b/tests/python_tests/test_structured_output.py index 7926f3ebff..98b704095e 100644 --- a/tests/python_tests/test_structured_output.py +++ b/tests/python_tests/test_structured_output.py @@ -1,53 +1,61 @@ -import pytest import json -import openvino_genai as ov_genai +import re +from typing import Literal +import openvino_genai as ov_genai +import pytest +from openvino_genai import StructuredOutputConfig as SOC from pydantic import BaseModel, Field -from typing import Literal from utils.hugging_face import download_and_convert_model from utils.ov_genai_pipelines import create_ov_pipeline -import re + @pytest.fixture(scope="module") def ov_pipe(request): _, _, models_path = download_and_convert_model(request.param) return create_ov_pipeline(models_path) + class Person(BaseModel): name: str = Field(pattern=r"^[A-Z][a-z]{1,20}$") age: int = Field(ge=0, le=128) city: Literal["Dublin", "Dubai", "Munich"] + class Transaction(BaseModel): id: int = Field(ge=0, le=2**14) amount: float = Field(ge=0.0, le=1e6) currency: Literal["USD", "EUR", "GBP"] + class RESTAPIResponse(BaseModel): status: Literal["success", "error"] data: str = Field(pattern=r"^[A-Z][a-z]{1,20}$") + structured_id_models = [ - 'TinyLlama/TinyLlama-1.1B-Chat-v1.0', - 'katuni4ka/tiny-random-phi3', + "TinyLlama/TinyLlama-1.1B-Chat-v1.0", + "katuni4ka/tiny-random-phi3", ] + @pytest.mark.precommit @pytest.mark.parametrize("ov_pipe", structured_id_models, indirect=True) -@pytest.mark.parametrize("prompt_and_scheme", [ - ("Generate a json about a person.", Person), - ("Generate a json about a transaction.", Transaction), - ("Generate a json about a REST API response.", RESTAPIResponse) -]) +@pytest.mark.parametrize( + "prompt_and_scheme", + [ + ("Generate a json about a person.", Person), + ("Generate a json about a transaction.", Transaction), + ("Generate a json about a REST API response.", RESTAPIResponse), + ], +) @pytest.mark.parametrize("use_compound_grammar", [True, False]) -def test_structured_output_generation(ov_pipe, prompt_and_scheme, use_compound_grammar): +def test_structured_json(ov_pipe, prompt_and_scheme, use_compound_grammar, capfd): prompt, SchemeType = prompt_and_scheme structured_output_config = ov_genai.StructuredOutputConfig() if use_compound_grammar: - structured_output_config.compound_grammar = structured_output_config.JSONSchema( - json.dumps(SchemeType.model_json_schema()) - ) + structured_output_config.compound_grammar = SOC.JSONSchema(json.dumps(SchemeType.model_json_schema())) else: structured_output_config.json_schema = json.dumps(SchemeType.model_json_schema()) @@ -56,27 +64,29 @@ def test_structured_output_generation(ov_pipe, prompt_and_scheme, use_compound_g gen_config.structured_output_config = structured_output_config res_str = ov_pipe.generate(prompt, generation_config=gen_config) - - # If it's invalid it will raise an error. - SchemeType.model_validate_json(res_str) + try: + SchemeType.model_validate_json(res_str) + except Exception as e: + pytest.fail(f"Output {res_str} is not valid json schema {SchemeType.model_json_schema()}: {e}") @pytest.mark.precommit @pytest.mark.parametrize("ov_pipe", structured_id_models, indirect=True) -@pytest.mark.parametrize("prompt_and_regex", [ - ("Generate a json about a person.", r'^\{"city":"(Dublin|Dubai|Munich)"\}$'), - # without regex constraint it generates email letter content, but with the regex it generates an email address string - ("Generate an email.", r'^[a-zA-Z0-9._%+-]{1,64}@[a-z]{1,64}\.[a-z]{1,10}$'), - ("Generate a json about a REST API response.", r'^\{"status":"(success|error)"\}$'), -]) +@pytest.mark.parametrize( + "prompt_and_regex", + [ + ("Generate a json about a person.", r'^\{"city":"(Dublin|Dubai|Munich)"\}$'), + # without regex constraint it generates email letter content, but with the regex it generates an email address string + ("Generate an email.", r"^[a-zA-Z0-9._%+-]{1,64}@[a-z]{1,64}\.[a-z]{1,10}$"), + ("Generate a json about a REST API response.", r'^\{"status":"(success|error)"\}$'), + ], +) @pytest.mark.parametrize("use_compound_grammar", [True, False]) def test_structured_regex(ov_pipe, prompt_and_regex, use_compound_grammar): prompt, regex_str = prompt_and_regex structured_output_config = ov_genai.StructuredOutputConfig() if use_compound_grammar: - structured_output_config.compound_grammar = structured_output_config.Regex( - regex_str - ) + structured_output_config.compound_grammar = structured_output_config.Regex(regex_str) else: structured_output_config.regex = regex_str @@ -84,33 +94,35 @@ def test_structured_regex(ov_pipe, prompt_and_regex, use_compound_grammar): gen_config.max_new_tokens = 100 gen_config.structured_output_config = structured_output_config res_str = ov_pipe.generate(prompt, generation_config=gen_config) - + assert re.match(regex_str, res_str), f"Output {res_str} does not match regex {regex_str}" - + + @pytest.mark.precommit @pytest.mark.parametrize("ov_pipe", structured_id_models, indirect=True) -@pytest.mark.parametrize("prompt_and_ebnf", [ - # EBNF grammar for generating a date in the format YYYY-MM-DD - ( - "Generate a date", - """ +@pytest.mark.parametrize( + "prompt_and_ebnf", + [ + # EBNF grammar for generating a date in the format YYYY-MM-DD + ( + "Generate a date", + """ root ::= date date ::= year "-" month "-" day year ::= digit digit digit digit month ::= digit digit day ::= digit digit digit ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" - """ - ), -]) + """, + ), + ], +) @pytest.mark.parametrize("use_compound_grammar", [True, False]) def test_structured_ebnf(ov_pipe, prompt_and_ebnf, use_compound_grammar): prompt, ebnf_grammar = prompt_and_ebnf structured_output_config = ov_genai.StructuredOutputConfig() if use_compound_grammar: - structured_output_config.compound_grammar = structured_output_config.EBNF( - ebnf_grammar - ) + structured_output_config.compound_grammar = structured_output_config.EBNF(ebnf_grammar) else: structured_output_config.grammar = ebnf_grammar @@ -130,17 +142,18 @@ def test_structured_ebnf(ov_pipe, prompt_and_ebnf, use_compound_grammar): @pytest.mark.parametrize( "ov_pipe", [model_id for model_id in structured_id_models if "random" not in model_id], indirect=True ) -@pytest.mark.parametrize("prompt_and_structural_tag", [ - ( - "Repeat the word 'function'", - ov_genai.StructuralTagItem( - begin="function", - schema=json.dumps(RESTAPIResponse.model_json_schema()), - end="" - ) - ), -]) -def test_structural_tags(ov_pipe, prompt_and_structural_tag): +@pytest.mark.parametrize( + "prompt_and_structural_tag", + [ + ( + "Repeat the word 'function'", + ov_genai.StructuralTagItem( + begin="function", schema=json.dumps(RESTAPIResponse.model_json_schema()), end="" + ), + ), + ], +) +def test_structural_tags_old(ov_pipe, prompt_and_structural_tag): prompt, structural_tag = prompt_and_structural_tag structured_output_config = ov_genai.StructuredOutputConfig( structural_tags_config=ov_genai.StructuralTagsConfig( @@ -158,3 +171,110 @@ def test_structural_tags(ov_pipe, prompt_and_structural_tag): match = re.search(rf"{structural_tag.begin}(.*?){structural_tag.end}", res_str) assert match, f"Output `{res_str}` does not contain structural tag {structural_tag.begin}...{structural_tag.end}" RESTAPIResponse.model_validate_json(match.group(1)) + + +@pytest.mark.precommit +# use only non-random model for stable output in TriggeredTags test +@pytest.mark.parametrize("ov_pipe", ["TinyLlama/TinyLlama-1.1B-Chat-v1.0"], indirect=True) +@pytest.mark.parametrize( + "prompt,tag,validate", + [ + pytest.param( + "", + """ + { + "type": "structural_tag", + "format": { + "type": "const_string", + "value": "abc" + } + }""", + lambda x: x == "abc", + id="Raw string structural tag", + ), + pytest.param("", SOC.Regex("a*"), lambda x: re.match(r"^a*$", x) is not None, id="Regex"), + pytest.param( + "", + SOC.JSONSchema(json.dumps(RESTAPIResponse.model_json_schema())), + RESTAPIResponse.model_validate_json, + id="JSONSchema", + ), + pytest.param( + "", + SOC.EBNF( + """ + root ::= date + date ::= year "-" month "-" day + year ::= digit digit digit digit + month ::= digit digit + day ::= digit digit + digit ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" + """ + ), + lambda x: re.match(r"^\d{4}-\d{2}-\d{2}$", x) is not None, + id="EBNF", + ), + pytest.param("", SOC.ConstString("constant_string"), lambda x: x == "constant_string", id="ConstantString"), + pytest.param("", SOC.AnyText(), lambda x: len(x) > 0, id="AnyText"), + pytest.param( + "", + SOC.Tag(begin="function", content=SOC.ConstString("..."), end=""), + lambda x: x == "function...", + id="Tag", + ), + pytest.param( + "", SOC.ConstString("a") + SOC.ConstString("b") + SOC.ConstString("c"), lambda x: x == "abc", id="Concat" + ), + pytest.param( + "", + SOC.ConstString("a") | SOC.ConstString("b") | SOC.ConstString("c"), + lambda x: x in ["a", "b", "c"], + id="Union", + ), + pytest.param( + "", + SOC.QwenXMLParametersFormat(json.dumps(RESTAPIResponse.model_json_schema())), + lambda x: ( + # enum values are placed in double quotes for some reason + re.search(r"\"(success|error)\"", x) is not None + and re.search(r"[A-Z][a-z]{1,20}", x) is not None + ), + id="QwenXMLParametersFormat", + ), + pytest.param( + "TriggeredTags. Repeat word 'function'", + SOC.TriggeredTags( + triggers=["function"], + tags=[ + SOC.Tag(begin="function", content=SOC.ConstString("A"), end=""), + SOC.Tag(begin="function", content=SOC.ConstString("B"), end=""), + ], + at_least_one=True, + stop_after_first=True, + ), + lambda x: re.match(r"(function(A|B))", x) is not None, + id="TriggeredTags", + ), + pytest.param( + "", + SOC.TagsWithSeparator( + tags=[ + SOC.Tag(begin="", content=SOC.ConstString("A"), end=""), + SOC.Tag(begin="", content=SOC.ConstString("B"), end=""), + ], + separator=";", + at_least_one=True, + stop_after_first=False, + ), + lambda x: re.match(r"((A|B)(;(A|B)))*", x) is not None, + id="TagsWithSeparator", + ), + ], +) +def test_structural_tags(ov_pipe, prompt, tag, validate): + gen_config = ov_genai.GenerationConfig() + gen_config.max_new_tokens = 3 if isinstance(tag, SOC.AnyText) else 100 + gen_config.do_sample = False + gen_config.structured_output_config = SOC(structural_tags_config=tag) + res_str = ov_pipe.generate(prompt, generation_config=gen_config) + assert validate(res_str), f"Output `{res_str}` does not match structural tag {tag}"