Skip to content

Commit 862ad08

Browse files
committed
sync
1 parent 6818230 commit 862ad08

File tree

10 files changed

+1624
-22
lines changed

10 files changed

+1624
-22
lines changed

.github/workflows/auto_answer.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
enable-cache: true
2424

2525
- name: Install the project
26-
run: uv pip install -r requirements.txt
26+
run: uv run pip install -r requirements.txt
2727

2828
- name: Run the answer bot with uv run
2929
env:

KnowledgeBaseBot/all_texts.json

Lines changed: 324 additions & 0 deletions
Large diffs are not rendered by default.

KnowledgeBaseBot/all_texts.pkl

-519 KB
Binary file not shown.

KnowledgeBaseBot/auto_answer.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ jobs:
2323
enable-cache: true
2424

2525
- name: Install the project
26-
run: uv pip install -r requirements.txt
26+
run: uv run pip install -r requirements.txt
2727

2828
- name: Run the answer bot with uv run
2929
env:

KnowledgeBaseBot/build_knowledge_base.py

Lines changed: 6 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import os
2+
import json
23
import faiss
34
import pickle
45
import numpy as np
@@ -17,13 +18,13 @@
1718

1819
# --- File Paths for State ---
1920
INDEX_FILE = "unified_index.faiss"
20-
METADATA_FILE = "metadata.pkl"
21-
TEXTS_FILE = "all_texts.pkl"
22-
STATE_FILE = "kb_state.pkl" # New file to store the last update time
21+
METADATA_FILE = "metadata.json"
22+
TEXTS_FILE = "all_texts.json"
23+
STATE_FILE = "kb_state.json"
2324

2425
# init pandoc
25-
from pypandoc.pandoc_download import download_pandoc
26-
download_pandoc()
26+
# from pypandoc.pandoc_download import download_pandoc
27+
# download_pandoc()
2728

2829
# --- Initialization ---
2930
g = Github(GITHUB_TOKEN)
@@ -60,20 +61,6 @@
6061
all_texts.append(chunk.page_content)
6162
metadata.append({'source': 'documentation', 'file': chunk.metadata.get('source', 'N/A')})
6263

63-
# --- Process Documentation ---
64-
print("Processing documentation...")
65-
# Use DirectoryLoader to load all .md files from the directory
66-
loader = DirectoryLoader(DOCS_PATH, glob="**/*.rst")
67-
docs = loader.load()
68-
69-
# Split the documents into smaller, manageable chunks
70-
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
71-
doc_chunks = text_splitter.split_documents(docs)
72-
73-
for chunk in doc_chunks:
74-
all_texts.append(chunk.page_content)
75-
metadata.append({'source': 'documentation', 'file': chunk.metadata.get('source', 'N/A')})
76-
7764
# --- Process GitHub Issues ---
7865
# --- Fetch New Issues from GitHub ---
7966
print(f"Fetching issues updated since {last_update_time.isoformat()}...")

KnowledgeBaseBot/kb_state.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"1970-01-01T00:00:00+00:00"

KnowledgeBaseBot/kb_state.pkl

-98 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)