Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion common/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import rag.utils
import rag.utils.es_conn
import rag.utils.infinity_conn
import rag.utils.ob_conn
import rag.utils.opensearch_conn
from rag.utils.azure_sas_conn import RAGFlowAzureSasBlob
from rag.utils.azure_spn_conn import RAGFlowAzureSpnBlob
Expand Down Expand Up @@ -103,6 +104,7 @@
AZURE = {}
S3 = {}
MINIO = {}
OB = {}
OSS = {}
OS = {}

Expand Down Expand Up @@ -227,7 +229,7 @@ def init_settings():
FEISHU_OAUTH = get_base_config("oauth", {}).get("feishu")
OAUTH_CONFIG = get_base_config("oauth", {})

global DOC_ENGINE, docStoreConn, ES, OS, INFINITY
global DOC_ENGINE, docStoreConn, ES, OB, OS, INFINITY
DOC_ENGINE = os.environ.get("DOC_ENGINE", "elasticsearch")
# DOC_ENGINE = os.environ.get('DOC_ENGINE', "opensearch")
lower_case_doc_engine = DOC_ENGINE.lower()
Expand All @@ -240,6 +242,9 @@ def init_settings():
elif lower_case_doc_engine == "opensearch":
OS = get_base_config("os", {})
docStoreConn = rag.utils.opensearch_conn.OSConnection()
elif lower_case_doc_engine == "oceanbase":
OB = get_base_config("oceanbase", {})
docStoreConn = rag.utils.ob_conn.OBConnection()
else:
raise Exception(f"Not supported doc engine: {DOC_ENGINE}")

Expand Down
8 changes: 8 additions & 0 deletions conf/service_conf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ os:
infinity:
uri: 'localhost:23817'
db_name: 'default_db'
oceanbase:
scheme: 'oceanbase' # set 'mysql' to create connection using mysql config
config:
db_name: 'test'
user: 'root@ragflow'
password: 'infini_rag_flow'
host: 'localhost'
port: 2881
redis:
db: 1
password: 'infini_rag_flow'
Expand Down
22 changes: 22 additions & 0 deletions docker/.env
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# Available options:
# - `elasticsearch` (default)
# - `infinity` (https://github.com/infiniflow/infinity)
# - `oceanbase` (https://github.com/oceanbase/oceanbase)
# - `opensearch` (https://github.com/opensearch-project/OpenSearch)
DOC_ENGINE=${DOC_ENGINE:-elasticsearch}

Expand Down Expand Up @@ -62,6 +63,27 @@ INFINITY_THRIFT_PORT=23817
INFINITY_HTTP_PORT=23820
INFINITY_PSQL_PORT=5432

# The hostname where the OceanBase service is exposed
OCEANBASE_HOST=oceanbase
# The port used to expose the OceanBase service
OCEANBASE_PORT=2881
# The username for OceanBase
OCEANBASE_USER=root@ragflow
# The password for OceanBase
OCEANBASE_PASSWORD=infini_rag_flow
# The doc database of the OceanBase service to use
OCEANBASE_DOC_DBNAME=ragflow_doc

# OceanBase container configuration
OB_CLUSTER_NAME=${OB_CLUSTER_NAME:-ragflow}
OB_TENANT_NAME=${OB_TENANT_NAME:-ragflow}
OB_SYS_PASSWORD=${OCEANBASE_PASSWORD:-infini_rag_flow}
OB_TENANT_PASSWORD=${OCEANBASE_PASSWORD:-infini_rag_flow}
OB_MEMORY_LIMIT=${OB_MEMORY_LIMIT:-10G}
OB_SYSTEM_MEMORY=${OB_SYSTEM_MEMORY:-2G}
OB_DATAFILE_SIZE=${OB_DATAFILE_SIZE:-20G}
OB_LOG_DISK_SIZE=${OB_LOG_DISK_SIZE:-20G}

# The password for MySQL.
MYSQL_PASSWORD=infini_rag_flow
# The hostname where the MySQL service is exposed
Expand Down
9 changes: 9 additions & 0 deletions docker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,15 @@ The [.env](./.env) file contains important environment variables for Docker.
- `password`: The password for MinIO.
- `host`: The MinIO serving IP *and* port inside the Docker container. Defaults to `minio:9000`.

- `oceanbase`
- `scheme`: The connection scheme. Set to `mysql` to use mysql config, or other values to use config below.
- `config`:
- `db_name`: The OceanBase database name.
- `user`: The username for OceanBase.
- `password`: The password for OceanBase.
- `host`: The hostname of the OceanBase service.
- `port`: The port of OceanBase.

- `oss`
- `access_key`: The access key ID used to authenticate requests to the OSS service.
- `secret_key`: The secret access key used to authenticate requests to the OSS service.
Expand Down
27 changes: 27 additions & 0 deletions docker/docker-compose-base.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,31 @@ services:
retries: 120
restart: on-failure

oceanbase:
profiles:
- oceanbase
image: oceanbase/oceanbase-ce:4.4.1.0-100000032025101610
volumes:
- ./oceanbase/data:/root/ob
- ./oceanbase/conf:/root/.obd/cluster
- ./oceanbase/init.d:/root/boot/init.d
ports:
- ${OCEANBASE_PORT:-2881}:2881
env_file: .env
environment:
- MODE=normal
- OB_SERVER_IP=127.0.0.1
mem_limit: ${MEM_LIMIT}
healthcheck:
test: [ 'CMD-SHELL', 'obclient -h127.0.0.1 -P2881 -uroot@${OB_TENANT_NAME:-ragflow} -p${OB_TENANT_PASSWORD:-infini_rag_flow} -e "CREATE DATABASE IF NOT EXISTS ${OCEANBASE_DOC_DBNAME:-ragflow_doc};"' ]
interval: 10s
retries: 30
start_period: 30s
timeout: 10s
networks:
- ragflow
restart: on-failure

sandbox-executor-manager:
profiles:
- sandbox
Expand Down Expand Up @@ -256,6 +281,8 @@ volumes:
driver: local
infinity_data:
driver: local
ob_data:
driver: local
mysql_data:
driver: local
minio_data:
Expand Down
1 change: 1 addition & 0 deletions docker/oceanbase/init.d/vec_memory.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ALTER SYSTEM SET ob_vector_memory_limit_percentage = 30;
8 changes: 8 additions & 0 deletions docker/service_conf.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ os:
infinity:
uri: '${INFINITY_HOST:-infinity}:23817'
db_name: 'default_db'
oceanbase:
scheme: 'oceanbase' # set 'mysql' to create connection using mysql config
config:
db_name: '${OCEANBASE_DOC_DBNAME:-test}'
user: '${OCEANBASE_USER:-root@ragflow}'
password: '${OCEANBASE_PASSWORD:-infini_rag_flow}'
host: '${OCEANBASE_HOST:-oceanbase}'
port: ${OCEANBASE_PORT:-2881}
redis:
db: 1
password: '${REDIS_PASSWORD:-infini_rag_flow}'
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ dependencies = [
"captcha>=0.7.1",
"pip>=25.2",
"pypandoc>=1.16",
"pyobvector==0.2.18",
]

[dependency-groups]
Expand Down
8 changes: 5 additions & 3 deletions rag/nlp/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ def add_space_between_eng_zh(txt):
return txt

def question(self, txt, tbl="qa", min_match: float = 0.6):
original_query = txt
txt = FulltextQueryer.add_space_between_eng_zh(txt)
txt = re.sub(
r"[ :|\r\n\t,,。??/`!!&^%%()\[\]{}<>]+",
Expand Down Expand Up @@ -127,7 +128,7 @@ def question(self, txt, tbl="qa", min_match: float = 0.6):
q.append(txt)
query = " ".join(q)
return MatchTextExpr(
self.query_fields, query, 100
self.query_fields, query, 100, {"original_query": original_query}
), keywords

def need_fine_grained_tokenize(tk):
Expand Down Expand Up @@ -212,7 +213,7 @@ def need_fine_grained_tokenize(tk):
if not query:
query = otxt
return MatchTextExpr(
self.query_fields, query, 100, {"minimum_should_match": min_match}
self.query_fields, query, 100, {"minimum_should_match": min_match, "original_query": original_query}
), keywords
return None, keywords

Expand Down Expand Up @@ -259,6 +260,7 @@ def paragraph(self, content_tks: str, keywords: list = [], keywords_topn=30):
content_tks = [c.strip() for c in content_tks.strip() if c.strip()]
tks_w = self.tw.weights(content_tks, preprocess=False)

origin_keywords = keywords.copy()
keywords = [f'"{k.strip()}"' for k in keywords]
for tk, w in sorted(tks_w, key=lambda x: x[1] * -1)[:keywords_topn]:
tk_syns = self.syn.lookup(tk)
Expand All @@ -274,4 +276,4 @@ def paragraph(self, content_tks: str, keywords: list = [], keywords_topn=30):
keywords.append(f"{tk}^{w}")

return MatchTextExpr(self.query_fields, " ".join(keywords), 100,
{"minimum_should_match": min(3, len(keywords) // 10)})
{"minimum_should_match": min(3, len(keywords) / 10), "original_query": " ".join(origin_keywords)})
Loading