Skip to content

Commit 8fbbf72

Browse files
fix: only use regex search for the mcp server example (#148)
### TL;DR Improved search functionality in the docs-server with better regex handling, structured logging, and enhanced error handling. ### What changed? This uses the changes proposed by Brandon for our own MCP server. We'll use just the regex. Also, I incorrectly assumed we didn't need the models for the server, that is not correct as store_learnings requires the models. I've added them back to the configuration and README.md. I've tested also retrieving the information from the learnings (using the full string as well as the terms), this wasn't working properly as store_learnings was failing due to the lack of model configuration.
1 parent d3842b0 commit 8fbbf72

File tree

8 files changed

+424
-165
lines changed

8 files changed

+424
-165
lines changed

examples/mcp/docs-server/README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ General models are great at Typescript and Python, not your private APIs. This s
3131
1. **(Optional) Set environment variables:**
3232

3333
```bash
34+
export GOOGLE_API_KEY="your-google-developer-api-key"
3435
# Optional: Set custom data directory (defaults to ~/.fenic)
3536
export FENIC_WORK_DIR="/path/to/custom/directory"
3637
```
@@ -49,7 +50,10 @@ General models are great at Typescript and Python, not your private APIs. This s
4950
"mcpServers": {
5051
"fenic-docs": {
5152
"command": "/path/to/fenic/examples/mcp/docs-server/.venv/bin/python",
52-
"args": ["/path/to/fenic/examples/mcp/docs-server/server.py"]
53+
"args": ["/path/to/fenic/examples/mcp/docs-server/server.py"],
54+
"env": {
55+
"GOOGLE_API_KEY":"your-google-developer-api-key",
56+
}
5357
}
5458
}
5559
}

examples/mcp/docs-server/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ dependencies = [
88
"fastmcp>=0.1.0",
99
"griffe>=0.42.0",
1010
"fenic[anthropic,google]>=0.3.0",
11+
"structlog>=25.4.0",
1112
]
1213

1314
[project.scripts]

examples/mcp/docs-server/search.py

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
import logging
2+
import re
3+
from typing import Callable
4+
5+
import fenic as fc
6+
7+
logger = logging.getLogger(__name__)
8+
logger.setLevel(logging.DEBUG)
9+
10+
class FenicAPIDocQuerySearch:
11+
"""Search for queries to the Fenic API.
12+
Supports both keyword and regex search.
13+
"""
14+
15+
@classmethod
16+
def _is_valid_regex(cls, query: str) -> bool:
17+
"""Heuristic check to see if the query is a regex."""
18+
try:
19+
re.compile(query)
20+
return True
21+
except re.error:
22+
return False
23+
24+
@classmethod
25+
def _search_api_docs_regex(cls, df: fc.DataFrame, query: str) -> fc.DataFrame:
26+
"""Search API documentation using regex."""
27+
return df.filter(
28+
fc.col("name").rlike(f"(?i){query}")
29+
| fc.col("qualified_name").rlike(f"(?i){query}")
30+
| (
31+
fc.col("docstring").is_not_null()
32+
& fc.col("docstring").rlike(f"(?i){query}")
33+
)
34+
| (
35+
fc.col("annotation").is_not_null()
36+
& fc.col("annotation").rlike(f"(?i){query}")
37+
)
38+
| (
39+
fc.col("returns").is_not_null()
40+
& fc.col("returns").rlike(f"(?i){query}")
41+
)
42+
)
43+
44+
@classmethod
45+
def _search_learnings_regex(cls, df: fc.DataFrame, query: str) -> fc.DataFrame:
46+
"""Search learnings using regex."""
47+
return df.filter(
48+
fc.col("question").rlike(f"(?i){query}")
49+
| fc.col("answer").rlike(f"(?i){query}")
50+
| fc.array_contains(fc.col("keywords"), query)
51+
)
52+
53+
@classmethod
54+
def _search_learnings_keyword(cls, df: fc.DataFrame, term: str) -> fc.DataFrame:
55+
"""Search learnings using keyword."""
56+
return df.filter(
57+
fc.col("question").contains(term)
58+
| fc.col("answer").contains(term)
59+
| fc.array_contains(fc.col("keywords"), term)
60+
)
61+
62+
@classmethod
63+
def _search_terms(
64+
cls,
65+
df: fc.DataFrame,
66+
query: str,
67+
search_func: Callable[[fc.DataFrame, str], fc.DataFrame],
68+
) -> fc.DataFrame:
69+
"""Search using multiple terms."""
70+
# First search the query as a whole.
71+
result_df = search_func(df, query)
72+
logger.debug(f"result_df - {query}: {result_df.count()}")
73+
74+
# look for each individual term as well.
75+
terms = query.lower().split()
76+
terms_data_frames = []
77+
for term in terms:
78+
terms_data_frames.append(search_func(df, term))
79+
result_df = result_df.union(terms_data_frames[0])
80+
for df in terms_data_frames[1:]:
81+
result_df = result_df.union(df)
82+
83+
logger.debug(f"learnings results: {result_df.to_pydict()}")
84+
85+
return result_df
86+
87+
@classmethod
88+
def search_learnings(cls, session: fc.Session, query: str) -> fc.DataFrame:
89+
"""Search learnings using keyword."""
90+
if session.catalog.does_table_exist("learnings"):
91+
try:
92+
learnings_df = session.table("learnings")
93+
94+
logger.debug(f"Searching learnings with regex: {query}")
95+
learnings_search = cls._search_terms(
96+
learnings_df, query, cls._search_learnings_regex
97+
)
98+
99+
# Add relevance scoring for learnings
100+
learnings_scored = learnings_search.select(
101+
"question",
102+
"answer",
103+
"learning_type",
104+
"keywords",
105+
"related_functions",
106+
fc.when(fc.col("question").rlike(f"(?i){query}"), fc.lit(10))
107+
.otherwise(fc.lit(0))
108+
.alias("question_score"),
109+
fc.when(fc.col("answer").rlike(f"(?i){query}"), fc.lit(5))
110+
.otherwise(fc.lit(0))
111+
.alias("answer_score"),
112+
fc.when(fc.array_contains(fc.col("keywords"), query), fc.lit(3))
113+
.otherwise(fc.lit(0))
114+
.alias("keywords_score"),
115+
)
116+
117+
# Calculate total score with correction boost
118+
learnings_scored = learnings_scored.select(
119+
"*",
120+
(
121+
fc.col("question_score")
122+
+ fc.col("answer_score")
123+
+ fc.col("keywords_score")
124+
).alias("base_score"),
125+
).select(
126+
"*",
127+
fc.when(
128+
fc.col("learning_type") == "correction",
129+
fc.col("base_score") * 1.5,
130+
)
131+
.otherwise(fc.col("base_score"))
132+
.alias("score"),
133+
)
134+
135+
# Sort and limit learnings (max 7 results)
136+
return learnings_scored.order_by(fc.col("score").desc()).limit(7)
137+
except Exception as e:
138+
logger.error(f"Warning: Learnings search failed: {e}")
139+
return None
140+
141+
@classmethod
142+
def search_api_docs(cls, session: fc.Session, query: str) -> fc.DataFrame:
143+
# Search API documentation
144+
df = session.table("api_df")
145+
146+
# Filter only public API elements
147+
df = df.filter(
148+
(fc.col("is_public")) & (~fc.col("qualified_name").contains("._"))
149+
)
150+
151+
if not cls._is_valid_regex(query):
152+
raise ValueError("Invalid regex query")
153+
logger.debug(f"Searching API docs with regex: {query}")
154+
search_df = cls._search_api_docs_regex(df, query)
155+
156+
# Add relevance scoring
157+
search_df = search_df.select(
158+
"type",
159+
"name",
160+
"qualified_name",
161+
"docstring",
162+
fc.when(fc.col("name").rlike(f"(?i){query}"), fc.lit(10))
163+
.otherwise(fc.lit(0))
164+
.alias("name_score"),
165+
fc.when(fc.col("qualified_name").rlike(f"(?i){query}"), fc.lit(5))
166+
.otherwise(fc.lit(0))
167+
.alias("path_score"),
168+
)
169+
170+
# Calculate total score and sort
171+
search_df = search_df.select(
172+
"*", (fc.col("name_score") + fc.col("path_score")).alias("score")
173+
)
174+
175+
return search_df

0 commit comments

Comments
 (0)