Skip to content

Commit 4260ef1

Browse files
Merge branch 'main' into 07-17-fix_use_the_cloud_catalog_when_doing_save_as_table_in_a_cloud_session
2 parents f451b4f + 8fbbf72 commit 4260ef1

File tree

8 files changed

+424
-165
lines changed

8 files changed

+424
-165
lines changed

examples/mcp/docs-server/README.md

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ General models are great at Typescript and Python, not your private APIs. This s
3131
1. **(Optional) Set environment variables:**
3232

3333
```bash
34+
export GOOGLE_API_KEY="your-google-developer-api-key"
3435
# Optional: Set custom data directory (defaults to ~/.fenic)
3536
export FENIC_WORK_DIR="/path/to/custom/directory"
3637
```
@@ -49,7 +50,10 @@ General models are great at Typescript and Python, not your private APIs. This s
4950
"mcpServers": {
5051
"fenic-docs": {
5152
"command": "/path/to/fenic/examples/mcp/docs-server/.venv/bin/python",
52-
"args": ["/path/to/fenic/examples/mcp/docs-server/server.py"]
53+
"args": ["/path/to/fenic/examples/mcp/docs-server/server.py"],
54+
"env": {
55+
"GOOGLE_API_KEY":"your-google-developer-api-key",
56+
}
5357
}
5458
}
5559
}

examples/mcp/docs-server/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ dependencies = [
88
"fastmcp>=0.1.0",
99
"griffe>=0.42.0",
1010
"fenic[anthropic,google]>=0.3.0",
11+
"structlog>=25.4.0",
1112
]
1213

1314
[project.scripts]

examples/mcp/docs-server/search.py

Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
import logging
2+
import re
3+
from typing import Callable
4+
5+
import fenic as fc
6+
7+
logger = logging.getLogger(__name__)
8+
logger.setLevel(logging.DEBUG)
9+
10+
class FenicAPIDocQuerySearch:
11+
"""Search for queries to the Fenic API.
12+
Supports both keyword and regex search.
13+
"""
14+
15+
@classmethod
16+
def _is_valid_regex(cls, query: str) -> bool:
17+
"""Heuristic check to see if the query is a regex."""
18+
try:
19+
re.compile(query)
20+
return True
21+
except re.error:
22+
return False
23+
24+
@classmethod
25+
def _search_api_docs_regex(cls, df: fc.DataFrame, query: str) -> fc.DataFrame:
26+
"""Search API documentation using regex."""
27+
return df.filter(
28+
fc.col("name").rlike(f"(?i){query}")
29+
| fc.col("qualified_name").rlike(f"(?i){query}")
30+
| (
31+
fc.col("docstring").is_not_null()
32+
& fc.col("docstring").rlike(f"(?i){query}")
33+
)
34+
| (
35+
fc.col("annotation").is_not_null()
36+
& fc.col("annotation").rlike(f"(?i){query}")
37+
)
38+
| (
39+
fc.col("returns").is_not_null()
40+
& fc.col("returns").rlike(f"(?i){query}")
41+
)
42+
)
43+
44+
@classmethod
45+
def _search_learnings_regex(cls, df: fc.DataFrame, query: str) -> fc.DataFrame:
46+
"""Search learnings using regex."""
47+
return df.filter(
48+
fc.col("question").rlike(f"(?i){query}")
49+
| fc.col("answer").rlike(f"(?i){query}")
50+
| fc.array_contains(fc.col("keywords"), query)
51+
)
52+
53+
@classmethod
54+
def _search_learnings_keyword(cls, df: fc.DataFrame, term: str) -> fc.DataFrame:
55+
"""Search learnings using keyword."""
56+
return df.filter(
57+
fc.col("question").contains(term)
58+
| fc.col("answer").contains(term)
59+
| fc.array_contains(fc.col("keywords"), term)
60+
)
61+
62+
@classmethod
63+
def _search_terms(
64+
cls,
65+
df: fc.DataFrame,
66+
query: str,
67+
search_func: Callable[[fc.DataFrame, str], fc.DataFrame],
68+
) -> fc.DataFrame:
69+
"""Search using multiple terms."""
70+
# First search the query as a whole.
71+
result_df = search_func(df, query)
72+
logger.debug(f"result_df - {query}: {result_df.count()}")
73+
74+
# look for each individual term as well.
75+
terms = query.lower().split()
76+
terms_data_frames = []
77+
for term in terms:
78+
terms_data_frames.append(search_func(df, term))
79+
result_df = result_df.union(terms_data_frames[0])
80+
for df in terms_data_frames[1:]:
81+
result_df = result_df.union(df)
82+
83+
logger.debug(f"learnings results: {result_df.to_pydict()}")
84+
85+
return result_df
86+
87+
@classmethod
88+
def search_learnings(cls, session: fc.Session, query: str) -> fc.DataFrame:
89+
"""Search learnings using keyword."""
90+
if session.catalog.does_table_exist("learnings"):
91+
try:
92+
learnings_df = session.table("learnings")
93+
94+
logger.debug(f"Searching learnings with regex: {query}")
95+
learnings_search = cls._search_terms(
96+
learnings_df, query, cls._search_learnings_regex
97+
)
98+
99+
# Add relevance scoring for learnings
100+
learnings_scored = learnings_search.select(
101+
"question",
102+
"answer",
103+
"learning_type",
104+
"keywords",
105+
"related_functions",
106+
fc.when(fc.col("question").rlike(f"(?i){query}"), fc.lit(10))
107+
.otherwise(fc.lit(0))
108+
.alias("question_score"),
109+
fc.when(fc.col("answer").rlike(f"(?i){query}"), fc.lit(5))
110+
.otherwise(fc.lit(0))
111+
.alias("answer_score"),
112+
fc.when(fc.array_contains(fc.col("keywords"), query), fc.lit(3))
113+
.otherwise(fc.lit(0))
114+
.alias("keywords_score"),
115+
)
116+
117+
# Calculate total score with correction boost
118+
learnings_scored = learnings_scored.select(
119+
"*",
120+
(
121+
fc.col("question_score")
122+
+ fc.col("answer_score")
123+
+ fc.col("keywords_score")
124+
).alias("base_score"),
125+
).select(
126+
"*",
127+
fc.when(
128+
fc.col("learning_type") == "correction",
129+
fc.col("base_score") * 1.5,
130+
)
131+
.otherwise(fc.col("base_score"))
132+
.alias("score"),
133+
)
134+
135+
# Sort and limit learnings (max 7 results)
136+
return learnings_scored.order_by(fc.col("score").desc()).limit(7)
137+
except Exception as e:
138+
logger.error(f"Warning: Learnings search failed: {e}")
139+
return None
140+
141+
@classmethod
142+
def search_api_docs(cls, session: fc.Session, query: str) -> fc.DataFrame:
143+
# Search API documentation
144+
df = session.table("api_df")
145+
146+
# Filter only public API elements
147+
df = df.filter(
148+
(fc.col("is_public")) & (~fc.col("qualified_name").contains("._"))
149+
)
150+
151+
if not cls._is_valid_regex(query):
152+
raise ValueError("Invalid regex query")
153+
logger.debug(f"Searching API docs with regex: {query}")
154+
search_df = cls._search_api_docs_regex(df, query)
155+
156+
# Add relevance scoring
157+
search_df = search_df.select(
158+
"type",
159+
"name",
160+
"qualified_name",
161+
"docstring",
162+
fc.when(fc.col("name").rlike(f"(?i){query}"), fc.lit(10))
163+
.otherwise(fc.lit(0))
164+
.alias("name_score"),
165+
fc.when(fc.col("qualified_name").rlike(f"(?i){query}"), fc.lit(5))
166+
.otherwise(fc.lit(0))
167+
.alias("path_score"),
168+
)
169+
170+
# Calculate total score and sort
171+
search_df = search_df.select(
172+
"*", (fc.col("name_score") + fc.col("path_score")).alias("score")
173+
)
174+
175+
return search_df

0 commit comments

Comments
 (0)