open-edge-platform · hteeyeoh · Aug 4, 2025 · Aug 4, 2025 · Aug 6, 2025 · Aug 14, 2025
@@ -10,3 +10,4 @@ coverage
 *.lock
 !poetry.lock
 .vscode
+nginx_config/*.conf
@@ -1,17 +1,14 @@
 from .config import config
-from .utils import login_to_huggingface, download_huggingface_model, convert_model
 from .document import load_file_document
 from .logger import logger
-from langchain_community.vectorstores import FAISS
-from langchain_community.embeddings import OpenVINOBgeEmbeddings
-from langchain_community.document_compressors.openvino_rerank import OpenVINOReranker
 from langchain.retrievers import ContextualCompressionRetriever
-from langchain_huggingface import HuggingFacePipeline
+from langchain_community.vectorstores import FAISS
 from langchain_core.runnables import RunnablePassthrough
 from langchain_core.output_parsers import StrOutputParser
-from langchain_text_splitters import RecursiveCharacterTextSplitter
 from langchain_core.prompts import ChatPromptTemplate
+from langchain_text_splitters import RecursiveCharacterTextSplitter
 import os
+import importlib
 import pandas as pd
 
 vectorstore = None
@@ -20,56 +17,23 @@
 # If RUN_TEST is set to "True", the model download and conversion steps are skipped.
 # This flag is set in the conftest.py file before running the tests.
 if os.getenv("RUN_TEST", "").lower() != "true":
-    # login huggingface
-    login_to_huggingface(config.HF_ACCESS_TOKEN)
+    if config.MODEL_RUNTIME == "openvino":
+        runtime_module = importlib.import_module("app.openvino_backend")
+        runtime_instance = runtime_module.OpenVINOBackend()
 
-    # Download convert the model to openvino optimized
-    download_huggingface_model(config.EMBEDDING_MODEL_ID, config._CACHE_DIR)
-    download_huggingface_model(config.RERANKER_MODEL_ID, config._CACHE_DIR)
-    download_huggingface_model(config.LLM_MODEL_ID, config._CACHE_DIR)
+    elif config.MODEL_RUNTIME == "ollama":
+        runtime_module = importlib.import_module("app.ollama_backend")
+        runtime_instance = runtime_module.OllamaBackend()
 
-    # Convert to openvino IR
-    convert_model(config.EMBEDDING_MODEL_ID, config._CACHE_DIR, "embedding")
-    convert_model(config.RERANKER_MODEL_ID, config._CACHE_DIR, "reranker")
-    convert_model(config.LLM_MODEL_ID, config._CACHE_DIR, "llm")
+    else:
+        raise ValueError(f"Unsupported model runtime: {config.MODEL_RUNTIME}")
 
+    embedding, llm, reranker = runtime_instance.init_models()
 
     template = config.PROMPT_TEMPLATE
 
     prompt = ChatPromptTemplate.from_template(template)
 
-    # Initialize Embedding Model
-    embedding = OpenVINOBgeEmbeddings(
-        model_name_or_path=f"{config._CACHE_DIR}/{config.EMBEDDING_MODEL_ID}",
-        model_kwargs={"device": config.EMBEDDING_DEVICE, "compile": False},
-    )
-    embedding.ov_model.compile()
-
-    # Initialize Reranker Model
-    reranker = OpenVINOReranker(
-        model_name_or_path=f"{config._CACHE_DIR}/{config.RERANKER_MODEL_ID}",
-        model_kwargs={"device": config.RERANKER_DEVICE},
-        top_n=2,
-    )
-
-    # Initialize LLM
-    llm = HuggingFacePipeline.from_model_id(
-        model_id=f"{config._CACHE_DIR}/{config.LLM_MODEL_ID}",
-        task="text-generation",
-        backend="openvino",
-        model_kwargs={
-            "device": config.LLM_DEVICE,
-            "ov_config": {
-                "PERFORMANCE_HINT": "LATENCY",
-                "NUM_STREAMS": "1",
-                "CACHE_DIR": f"{config._CACHE_DIR}/{config.LLM_MODEL_ID}/model_cache",
-            },
-            "trust_remote_code": True,
-        },
-        pipeline_kwargs={"max_new_tokens": config.MAX_TOKENS},
-    )
-    if llm.pipeline.tokenizer.eos_token_id:
-        llm.pipeline.tokenizer.pad_token_id = llm.pipeline.tokenizer.eos_token_id
 else:
     logger.info("Bypassing to mock these functions because RUN_TEST is set to 'True' to run pytest unit test.")
 
@@ -88,35 +52,36 @@ def default_context(docs):
     return ""
 
 
-def get_retriever(enable_rerank=True, search_method="similarity_score_threshold"):
+def get_retriever():
     """
     Creates and returns a retriever object with optional reranking capability.
 
-    Args:
-        enable_rerank (bool): If True, enables the reranker to improve retrieval results. Defaults to True.
-        search_method (str): The method used for searching within the vector store. Defaults to "similarity_score_threshold".
-
     Returns:
         retriever: A retriever object, optionally wrapped with a contextual compression reranker.
 
     """
 
+    enable_rerank = config._ENABLE_RERANK
+    logger.info(f"Reranker enabled: {enable_rerank}")
+    search_method = config._SEARCH_METHOD
+    fetch_k = config._FETCH_K
+
     if vectorstore == None:
         return None
 
     else:
         retriever = vectorstore.as_retriever(
-            search_kwargs={"k": 3, "score_threshold": 0.5}, search_type=search_method
+            search_kwargs={
+                "k": 3,
+                "fetch_k": fetch_k,
+            },
+            search_type=search_method
         )
         if enable_rerank:
-            logger.info("Enable reranker")
-
             return ContextualCompressionRetriever(
                 base_compressor=reranker, base_retriever=retriever
             )
         else:
-            logger.info("Disable reranker")
-
             return retriever
 
 

@@ -1,41 +1,54 @@
 from pydantic import PrivateAttr
 from pydantic_settings import BaseSettings
+from typing import Union
 from os.path import dirname, abspath
 from .prompt import get_prompt_template
+from .runtime_validators import OpenVINOValidator, OllamaValidator
 import os
 import yaml
 
 class Settings(BaseSettings):
     """
     Settings class for configuring the Chatqna-Core application.
-    This class manages application-wide configuration, including model settings, device preferences,
-    supported file formats, and paths for caching and configuration files. It loads additional
-    configuration from a YAML file if provided, and updates its attributes accordingly.
+    This class manages application settings, including model backend runtime selection,
+    model IDs, device configurations, prompt templates, and various internal paths.
+    It loads configuration from a YAML file, validates backend-specific requirements,
+    and ensures prompt templates contain required placeholders.
 
     Attributes:
         APP_DISPLAY_NAME (str): Display name of the application.
         BASE_DIR (str): Base directory of the application.
-        SUPPORTED_FORMATS (set): Supported document file formats.
-        DEBUG (bool): Flag to enable or disable debug mode.
-        HF_ACCESS_TOKEN (str): Hugging Face access token for model downloads.
-        EMBEDDING_MODEL_ID (str): Model ID for embeddings.
-        RERANKER_MODEL_ID (str): Model ID for reranker.
-        LLM_MODEL_ID (str): Model ID for large language model.
-        PROMPT_TEMPLATE (str): Prompt template for the LLM.
-        EMBEDDING_DEVICE (str): Device to run embedding model on.
-        RERANKER_DEVICE (str): Device to run reranker model on.
-        LLM_DEVICE (str): Device to run LLM on.
+        SUPPORTED_FORMATS (set): Supported file formats for input documents.
+        DEBUG (bool): Debug mode flag.
+        HF_ACCESS_TOKEN (str): Hugging Face access token.
+        MODEL_RUNTIME (str): Backend runtime to use for models ('openvino' or 'ollama').
+        EMBEDDING_MODEL_ID (str): Identifier for the embedding model.
+        RERANKER_MODEL_ID (str): Identifier for the reranker model.
+        LLM_MODEL_ID (str): Identifier for the large language model.
+        PROMPT_TEMPLATE (str): Prompt template string for the LLM.
+        EMBEDDING_DEVICE (str): Device for embedding model ('CPU', etc.).
+        RERANKER_DEVICE (str): Device for reranker model ('CPU', etc.).
+        LLM_DEVICE (str): Device for LLM ('CPU', etc.).
         MAX_TOKENS (int): Maximum number of tokens for LLM responses.
-        ENABLE_RERANK (bool): Flag to enable or disable reranking.
-        _CACHE_DIR (str): Directory for model cache (private).
-        _HF_DATASETS_CACHE (str): Directory for Hugging Face datasets cache (private).
-        _TMP_FILE_PATH (str): Temporary file path for documents (private).
-        _DEFAULT_MODEL_CONFIG (str): Path to default model configuration YAML (private).
-        _MODEL_CONFIG_PATH (str): Path to user-provided model configuration YAML (private).
+        KEEP_ALIVE (Union[str, int, None]): Keep-alive setting for the application.
+
+    Private Attributes:
+        _ENABLE_RERANK (bool): Whether reranking is enabled.
+        _SEARCH_METHOD (str): Search method used for retrieval.
+        _FETCH_K (int): Number of documents to fetch during retrieval.
+        _CACHE_DIR (str): Directory for model cache.
+        _HF_DATASETS_CACHE (str): Directory for Hugging Face datasets cache.
+        _TMP_FILE_PATH (str): Temporary file path for documents.
+        _DEFAULT_MODEL_CONFIG (str): Path to the default model configuration YAML.
+        _MODEL_CONFIG_PATH (str): Path to the user-provided model configuration YAML.
 
     Methods:
-        __init__(**kwargs): Initializes the Settings object, loads configuration from YAML file,
-            and updates attributes accordingly.
+        __init__(**kwargs): Initializes settings, loads configuration from YAML, and validates settings.
+        _validate_backend_settings(): Validates backend-specific settings and required model IDs.
+        _check_and_validate_prompt_template(): Ensures the prompt template is set and contains required placeholders.
+
+    Raises:
+        ValueError: If required settings are missing or invalid, or if unsupported backend is specified.
     """
 
     APP_DISPLAY_NAME: str = "Chatqna-Core"
@@ -44,6 +57,7 @@ class Settings(BaseSettings):
     DEBUG: bool = False
 
     HF_ACCESS_TOKEN: str = ""
+    MODEL_RUNTIME: str = ""
     EMBEDDING_MODEL_ID: str = ""
     RERANKER_MODEL_ID: str = ""
     LLM_MODEL_ID: str = ""
@@ -52,9 +66,12 @@ class Settings(BaseSettings):
     RERANKER_DEVICE: str = "CPU"
     LLM_DEVICE: str = "CPU"
     MAX_TOKENS: int = 1024
-    ENABLE_RERANK: bool = True
+    KEEP_ALIVE: Union[str, int, None] = None
 
     # These fields will not be affected by environment variables
+    _ENABLE_RERANK: bool = PrivateAttr(True)
+    _SEARCH_METHOD: str = PrivateAttr("mmr")
+    _FETCH_K: int = PrivateAttr(10)
     _CACHE_DIR: str = PrivateAttr("/tmp/model_cache")
     _HF_DATASETS_CACHE: str = PrivateAttr("/tmp/model_cache")
     _TMP_FILE_PATH: str = PrivateAttr("/tmp/chatqna/documents")
@@ -65,13 +82,6 @@ class Settings(BaseSettings):
     def __init__(self, **kwargs):
         super().__init__(**kwargs)
 
-        # The RUN_TEST flag is used to bypass the model config loading during pytest unit testing.
-        # If RUN_TEST is set to "True", the model config loading is skipped.
-        # This flag is set in the conftest.py file before running the tests.
-        if os.getenv("RUN_TEST", "").lower() == "true":
-            print("INFO - Skipping model config loading in test mode.")
-            return
-
         config_file = self._MODEL_CONFIG_PATH if os.path.isfile(self._MODEL_CONFIG_PATH) else self._DEFAULT_MODEL_CONFIG
 
         if config_file == self._MODEL_CONFIG_PATH:
@@ -89,15 +99,23 @@ def __init__(self, **kwargs):
                 if hasattr(self, key):
                     setattr(self, key, value)
 
-        self._validate_model_ids()
-
+        self._validate_runtime_settings()
         self._check_and_validate_prompt_template()
 
-    def _validate_model_ids(self):
-        for model_name in ["EMBEDDING_MODEL_ID", "RERANKER_MODEL_ID", "LLM_MODEL_ID"]:
-            model_id = getattr(self, model_name)
-            if not model_id:
-                raise ValueError(f"{model_name} must not be an empty string.")
+    def _validate_runtime_settings(self):
+        validators = {
+            "openvino": OpenVINOValidator,
+            "ollama": OllamaValidator,
+        }
+
+        runtime = self.MODEL_RUNTIME.lower()
+        validator_cls = validators.get(runtime)
+
+        if not validator_cls:
+            raise ValueError(f"Unsupported model runtime: {self.MODEL_RUNTIME}. Supported runtimes are: {', '.join(validators.keys())}")
+
+        validator = validator_cls(self)
+        validator.validate()
 
     def _check_and_validate_prompt_template(self):
         if not self.PROMPT_TEMPLATE:
@@ -111,4 +129,4 @@ def _check_and_validate_prompt_template(self):
                 raise ValueError(f"PROMPT_TEMPLATE must include the placeholder {placeholder}.")
 
 
-config = Settings()
+config = Settings()
@@ -57,7 +57,7 @@ async def save_document(file_object: UploadFile):
         return tmp_path, None
 
     except Exception as err:
-        logger.exception("Error saving file.", error=err)
+        logger.exception("Error saving file.")
         return None, err
-Original file line number
+Diff line change
@@ Expand Up / @@ -10,3 +10,4 @@ coverage @@
     *.lock
     !poetry.lock
     .vscode
+    nginx_config/*.conf