Skip to content
Open
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
03bbea3
Fix: Permission issue for EMT helm
14pankaj Aug 4, 2025
1628cb2
update: changed to opt
14pankaj Aug 4, 2025
033e15f
[ChatQnA-Core] Ollama integration
hteeyeoh Aug 6, 2025
3e69d76
[ChatQnA-Core] Helm Chart for Ollama integration
hteeyeoh Aug 14, 2025
60dfe07
Merge branch 'open-edge-platform:main' into ollama
hteeyeoh Aug 20, 2025
5be48af
[ChatQnA-Core] Unit test for both openvino and ollama
hteeyeoh Aug 20, 2025
73b429e
[ChatQnA-Core]: Update documentation
hteeyeoh Aug 21, 2025
30ab643
[ChatQnA-Core] Update ollama model endpoint
hteeyeoh Aug 22, 2025
1cdbbfe
[ChatQnA-Core] Update ollama to latest version
hteeyeoh Aug 26, 2025
7c5b689
[ChatQnA Core] Include trust-remote-code flag during conversion
hteeyeoh Aug 26, 2025
abd3e02
fix: ITEP-71084 updated nginx image for sample apps
14pankaj Aug 26, 2025
af7b358
Update: nginx alipne image for stability
14pankaj Aug 28, 2025
74ac839
[chatqna-core] Update dependencies packages
hteeyeoh Aug 28, 2025
9787324
[chatqna-core-ui] Update dependencies and fix unittest
hteeyeoh Aug 28, 2025
fd4da5b
[chatqna-core-ui] Update UI image tag
hteeyeoh Sep 3, 2025
7b83a7c
Merge branch 'open-edge-platform:main' into ollama
hteeyeoh Sep 3, 2025
bd36f39
host path to OPT folder
14pankaj Sep 4, 2025
acf48ed
Feat: Upgraded OVMS and deployment step changes
14pankaj Sep 4, 2025
88abdf8
fix:updated folder names
14pankaj Sep 4, 2025
de68af6
fea: Debian nginx images for sample apps
14pankaj Sep 4, 2025
e497e02
Merge branch 'open-edge-platform:main' into ollama
hteeyeoh Sep 8, 2025
d8abf0c
[chatqna-ui] Bump NGINX version to 1.29.1
hteeyeoh Sep 8, 2025
63646b9
feat: nginx Image update
14pankaj Sep 9, 2025
7a2edd1
fix: minor updates
14pankaj Sep 9, 2025
3780832
fix: Updated helm with ovms image & script updates.
14pankaj Sep 9, 2025
d79d059
minor updates
14pankaj Sep 9, 2025
ddc0f0f
Merge branch 'ollama' into PenTestFxies
hteeyeoh Sep 9, 2025
59aed07
Merge pull request #3 from 14pankaj/PenTestFxies
hteeyeoh Sep 9, 2025
13c6016
Merge branch 'open-edge-platform:main' into ollama
hteeyeoh Sep 9, 2025
23992cc
[chatqna-core] Rename config variable name and create validator class
hteeyeoh Sep 10, 2025
aa83c9b
updated ovms to latest 2025.3
14pankaj Sep 11, 2025
2ff26e5
Merge remote-tracking branch 'origin/main' into ollama
hteeyeoh Sep 17, 2025
4c5ffca
Merge branch 'main' into ollama
hteeyeoh Sep 23, 2025
9011358
[chatqna-core]: Update helm deployment md
hteeyeoh Sep 23, 2025
3efa9b0
Merge branch 'main' into ollama
hteeyeoh Sep 26, 2025
a829c59
Merge branch 'open-edge-platform:main' into ollama
hteeyeoh Sep 30, 2025
fb7ed12
Merge branch 'main' into ollama
hteeyeoh Oct 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ coverage
*.lock
!poetry.lock
.vscode
nginx_config/*.conf
81 changes: 23 additions & 58 deletions sample-applications/chat-question-and-answer-core/app/chain.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,14 @@
from .config import config
from .utils import login_to_huggingface, download_huggingface_model, convert_model
from .document import load_file_document
from .logger import logger
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OpenVINOBgeEmbeddings
from langchain_community.document_compressors.openvino_rerank import OpenVINOReranker
from langchain.retrievers import ContextualCompressionRetriever
from langchain_huggingface import HuggingFacePipeline
from langchain_community.vectorstores import FAISS
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.prompts import ChatPromptTemplate
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os
import importlib
import pandas as pd

vectorstore = None
Expand All @@ -20,56 +17,23 @@
# If RUN_TEST is set to "True", the model download and conversion steps are skipped.
# This flag is set in the conftest.py file before running the tests.
if os.getenv("RUN_TEST", "").lower() != "true":
# login huggingface
login_to_huggingface(config.HF_ACCESS_TOKEN)
if config.MODEL_RUNTIME == "openvino":
runtime_module = importlib.import_module("app.openvino_backend")
runtime_instance = runtime_module.OpenVINOBackend()

# Download convert the model to openvino optimized
download_huggingface_model(config.EMBEDDING_MODEL_ID, config._CACHE_DIR)
download_huggingface_model(config.RERANKER_MODEL_ID, config._CACHE_DIR)
download_huggingface_model(config.LLM_MODEL_ID, config._CACHE_DIR)
elif config.MODEL_RUNTIME == "ollama":
runtime_module = importlib.import_module("app.ollama_backend")
runtime_instance = runtime_module.OllamaBackend()

# Convert to openvino IR
convert_model(config.EMBEDDING_MODEL_ID, config._CACHE_DIR, "embedding")
convert_model(config.RERANKER_MODEL_ID, config._CACHE_DIR, "reranker")
convert_model(config.LLM_MODEL_ID, config._CACHE_DIR, "llm")
else:
raise ValueError(f"Unsupported model runtime: {config.MODEL_RUNTIME}")

embedding, llm, reranker = runtime_instance.init_models()

template = config.PROMPT_TEMPLATE

prompt = ChatPromptTemplate.from_template(template)

# Initialize Embedding Model
embedding = OpenVINOBgeEmbeddings(
model_name_or_path=f"{config._CACHE_DIR}/{config.EMBEDDING_MODEL_ID}",
model_kwargs={"device": config.EMBEDDING_DEVICE, "compile": False},
)
embedding.ov_model.compile()

# Initialize Reranker Model
reranker = OpenVINOReranker(
model_name_or_path=f"{config._CACHE_DIR}/{config.RERANKER_MODEL_ID}",
model_kwargs={"device": config.RERANKER_DEVICE},
top_n=2,
)

# Initialize LLM
llm = HuggingFacePipeline.from_model_id(
model_id=f"{config._CACHE_DIR}/{config.LLM_MODEL_ID}",
task="text-generation",
backend="openvino",
model_kwargs={
"device": config.LLM_DEVICE,
"ov_config": {
"PERFORMANCE_HINT": "LATENCY",
"NUM_STREAMS": "1",
"CACHE_DIR": f"{config._CACHE_DIR}/{config.LLM_MODEL_ID}/model_cache",
},
"trust_remote_code": True,
},
pipeline_kwargs={"max_new_tokens": config.MAX_TOKENS},
)
if llm.pipeline.tokenizer.eos_token_id:
llm.pipeline.tokenizer.pad_token_id = llm.pipeline.tokenizer.eos_token_id
else:
logger.info("Bypassing to mock these functions because RUN_TEST is set to 'True' to run pytest unit test.")

Expand All @@ -88,35 +52,36 @@ def default_context(docs):
return ""


def get_retriever(enable_rerank=True, search_method="similarity_score_threshold"):
def get_retriever():
"""
Creates and returns a retriever object with optional reranking capability.

Args:
enable_rerank (bool): If True, enables the reranker to improve retrieval results. Defaults to True.
search_method (str): The method used for searching within the vector store. Defaults to "similarity_score_threshold".

Returns:
retriever: A retriever object, optionally wrapped with a contextual compression reranker.

"""

enable_rerank = config._ENABLE_RERANK
logger.info(f"Reranker enabled: {enable_rerank}")
search_method = config._SEARCH_METHOD
fetch_k = config._FETCH_K

if vectorstore == None:
return None

else:
retriever = vectorstore.as_retriever(
search_kwargs={"k": 3, "score_threshold": 0.5}, search_type=search_method
search_kwargs={
"k": 3,
"fetch_k": fetch_k,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we have any empirical number on the quality of retrieval with threshold vs. top-k strategy?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

both are finds and now i try to align with what modular does to keep the design same.

},
search_type=search_method
)
if enable_rerank:
logger.info("Enable reranker")

return ContextualCompressionRetriever(
base_compressor=reranker, base_retriever=retriever
)
else:
logger.info("Disable reranker")

return retriever


Expand Down
92 changes: 55 additions & 37 deletions sample-applications/chat-question-and-answer-core/app/config.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,54 @@
from pydantic import PrivateAttr
from pydantic_settings import BaseSettings
from typing import Union
from os.path import dirname, abspath
from .prompt import get_prompt_template
from .runtime_validators import OpenVINOValidator, OllamaValidator
import os
import yaml

class Settings(BaseSettings):
"""
Settings class for configuring the Chatqna-Core application.
This class manages application-wide configuration, including model settings, device preferences,
supported file formats, and paths for caching and configuration files. It loads additional
configuration from a YAML file if provided, and updates its attributes accordingly.
This class manages application settings, including model backend runtime selection,
model IDs, device configurations, prompt templates, and various internal paths.
It loads configuration from a YAML file, validates backend-specific requirements,
and ensures prompt templates contain required placeholders.

Attributes:
APP_DISPLAY_NAME (str): Display name of the application.
BASE_DIR (str): Base directory of the application.
SUPPORTED_FORMATS (set): Supported document file formats.
DEBUG (bool): Flag to enable or disable debug mode.
HF_ACCESS_TOKEN (str): Hugging Face access token for model downloads.
EMBEDDING_MODEL_ID (str): Model ID for embeddings.
RERANKER_MODEL_ID (str): Model ID for reranker.
LLM_MODEL_ID (str): Model ID for large language model.
PROMPT_TEMPLATE (str): Prompt template for the LLM.
EMBEDDING_DEVICE (str): Device to run embedding model on.
RERANKER_DEVICE (str): Device to run reranker model on.
LLM_DEVICE (str): Device to run LLM on.
SUPPORTED_FORMATS (set): Supported file formats for input documents.
DEBUG (bool): Debug mode flag.
HF_ACCESS_TOKEN (str): Hugging Face access token.
MODEL_RUNTIME (str): Backend runtime to use for models ('openvino' or 'ollama').
EMBEDDING_MODEL_ID (str): Identifier for the embedding model.
RERANKER_MODEL_ID (str): Identifier for the reranker model.
LLM_MODEL_ID (str): Identifier for the large language model.
PROMPT_TEMPLATE (str): Prompt template string for the LLM.
EMBEDDING_DEVICE (str): Device for embedding model ('CPU', etc.).
RERANKER_DEVICE (str): Device for reranker model ('CPU', etc.).
LLM_DEVICE (str): Device for LLM ('CPU', etc.).
MAX_TOKENS (int): Maximum number of tokens for LLM responses.
ENABLE_RERANK (bool): Flag to enable or disable reranking.
_CACHE_DIR (str): Directory for model cache (private).
_HF_DATASETS_CACHE (str): Directory for Hugging Face datasets cache (private).
_TMP_FILE_PATH (str): Temporary file path for documents (private).
_DEFAULT_MODEL_CONFIG (str): Path to default model configuration YAML (private).
_MODEL_CONFIG_PATH (str): Path to user-provided model configuration YAML (private).
KEEP_ALIVE (Union[str, int, None]): Keep-alive setting for the application.

Private Attributes:
_ENABLE_RERANK (bool): Whether reranking is enabled.
_SEARCH_METHOD (str): Search method used for retrieval.
_FETCH_K (int): Number of documents to fetch during retrieval.
_CACHE_DIR (str): Directory for model cache.
_HF_DATASETS_CACHE (str): Directory for Hugging Face datasets cache.
_TMP_FILE_PATH (str): Temporary file path for documents.
_DEFAULT_MODEL_CONFIG (str): Path to the default model configuration YAML.
_MODEL_CONFIG_PATH (str): Path to the user-provided model configuration YAML.

Methods:
__init__(**kwargs): Initializes the Settings object, loads configuration from YAML file,
and updates attributes accordingly.
__init__(**kwargs): Initializes settings, loads configuration from YAML, and validates settings.
_validate_backend_settings(): Validates backend-specific settings and required model IDs.
_check_and_validate_prompt_template(): Ensures the prompt template is set and contains required placeholders.

Raises:
ValueError: If required settings are missing or invalid, or if unsupported backend is specified.
"""

APP_DISPLAY_NAME: str = "Chatqna-Core"
Expand All @@ -44,6 +57,7 @@ class Settings(BaseSettings):
DEBUG: bool = False

HF_ACCESS_TOKEN: str = ""
MODEL_RUNTIME: str = ""
EMBEDDING_MODEL_ID: str = ""
RERANKER_MODEL_ID: str = ""
LLM_MODEL_ID: str = ""
Expand All @@ -52,9 +66,12 @@ class Settings(BaseSettings):
RERANKER_DEVICE: str = "CPU"
LLM_DEVICE: str = "CPU"
MAX_TOKENS: int = 1024
ENABLE_RERANK: bool = True
KEEP_ALIVE: Union[str, int, None] = None

# These fields will not be affected by environment variables
_ENABLE_RERANK: bool = PrivateAttr(True)
_SEARCH_METHOD: str = PrivateAttr("mmr")
_FETCH_K: int = PrivateAttr(10)
_CACHE_DIR: str = PrivateAttr("/tmp/model_cache")
_HF_DATASETS_CACHE: str = PrivateAttr("/tmp/model_cache")
_TMP_FILE_PATH: str = PrivateAttr("/tmp/chatqna/documents")
Expand All @@ -65,13 +82,6 @@ class Settings(BaseSettings):
def __init__(self, **kwargs):
super().__init__(**kwargs)

# The RUN_TEST flag is used to bypass the model config loading during pytest unit testing.
# If RUN_TEST is set to "True", the model config loading is skipped.
# This flag is set in the conftest.py file before running the tests.
if os.getenv("RUN_TEST", "").lower() == "true":
print("INFO - Skipping model config loading in test mode.")
return

config_file = self._MODEL_CONFIG_PATH if os.path.isfile(self._MODEL_CONFIG_PATH) else self._DEFAULT_MODEL_CONFIG

if config_file == self._MODEL_CONFIG_PATH:
Expand All @@ -89,15 +99,23 @@ def __init__(self, **kwargs):
if hasattr(self, key):
setattr(self, key, value)

self._validate_model_ids()

self._validate_runtime_settings()
self._check_and_validate_prompt_template()

def _validate_model_ids(self):
for model_name in ["EMBEDDING_MODEL_ID", "RERANKER_MODEL_ID", "LLM_MODEL_ID"]:
model_id = getattr(self, model_name)
if not model_id:
raise ValueError(f"{model_name} must not be an empty string.")
def _validate_runtime_settings(self):
validators = {
"openvino": OpenVINOValidator,
"ollama": OllamaValidator,
}

runtime = self.MODEL_RUNTIME.lower()
validator_cls = validators.get(runtime)

if not validator_cls:
raise ValueError(f"Unsupported model runtime: {self.MODEL_RUNTIME}. Supported runtimes are: {', '.join(validators.keys())}")

validator = validator_cls(self)
validator.validate()

def _check_and_validate_prompt_template(self):
if not self.PROMPT_TEMPLATE:
Expand All @@ -111,4 +129,4 @@ def _check_and_validate_prompt_template(self):
raise ValueError(f"PROMPT_TEMPLATE must include the placeholder {placeholder}.")


config = Settings()
config = Settings()
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ async def save_document(file_object: UploadFile):
return tmp_path, None

except Exception as err:
logger.exception("Error saving file.", error=err)
logger.exception("Error saving file.")
return None, err


Expand Down
Loading