Skip to content

Commit 9e7cb49

Browse files
committed
address additional comments, add some shortcuts, better documentation, additional testing, better naming
1 parent f97806d commit 9e7cb49

File tree

13 files changed

+193
-134
lines changed

13 files changed

+193
-134
lines changed

docs/topics/fenic-mcp.md

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,15 @@ session.catalog.drop_tool("users_by_name_regex", ignore_if_not_exists=True)
136136

137137
### Step 2a: Auto-generate system tools from catalog tables
138138

139-
You can generate a suite of reusable data tools (Schema, Profile, Read, Search Summary, Search Content, Analyze) directly from catalog tables and their descriptions. This is helpful for quickly exposing exploratory and read/query capabilities to MCP.
139+
You can generate a suite of reusable data tools (Schema, Profile, Read, Search Summary, Search Content, Analyze) directly from catalog tables and their descriptions.
140+
This is helpful for quickly exposing exploratory and read/query capabilities to MCP. Available tools include:
141+
142+
- Schema: list columns/types for any or all tables
143+
- Profile: column statistics (counts, basic numeric analysis [min, max, mean, etc.], contextual information for text columns [average_length, etc.])
144+
- Read: read a selection of rows from a single table. These rows can be paged over, filtered and can use column projections.
145+
- Search Summary: regex search across all text columns in all tables -- returns back dataframe names with result counts.
146+
- Search Content: regex search across a single table, specifying one or more text columns to search across -- returns back rows corresponding to the query.
147+
- Analyze: Write raw SQL to perform complex analysis on one or more tables.
140148

141149
Requirements:
142150

@@ -147,37 +155,34 @@ Example:
147155
```python
148156
from fenic import Session
149157
from fenic.api.mcp.server import create_mcp_server
150-
from fenic.api.mcp.tools import ToolGenerationConfig
158+
from fenic.api.mcp.tools import SystemToolConfig
151159

152160
session = Session.get_or_create(...)
153161
server = create_mcp_server(
154162
session,
155163
server_name="Fenic MCP",
156-
system_tools=ToolGenerationConfig(
157-
table_names=["orders", "users"],
158-
tool_group_name="Dataset Exploration",
164+
system_tools=SystemToolConfig(
165+
table_names=session.catalog.list_tables(),
166+
tool_namespace="Dataset Exploration",
159167
max_result_rows=200,
160168
),
161169
)
162170
```
163171

164172
## Step 3a: Serve tools programmatically
165173

166-
Use the MCP server helpers to serve existing catalog tools. If you want all registered tools, call `list_tools()`. If you want a subset, fetch by name.
174+
Use the MCP server helpers to serve existing catalog tools. To use all catalog tools in the MCP server,
175+
pass `session.catalog.list_tools` to `create_mcp_server`:
167176

168177
```python
169178
from fenic import Session,SessionConfig
170-
from fenic.api.mcp.server import create_mcp_server, run_mcp_server_sync, run_mcp_server_async, run_mcp_server_asgi,
179+
from fenic.api.mcp.server import create_mcp_server, run_mcp_server_sync, run_mcp_server_async, run_mcp_server_asgi
171180

172181
session = Session.get_or_create(SessionConfig(
173182
app_name="mcp_example",
174183
...
175184
))
176-
177-
# Load all catalog tools
178-
tools = session.catalog.list_tools()
179-
180-
server = create_mcp_server(session, server_name="Fenic MCP", user_defined_tools=tools)
185+
server = create_mcp_server(session, server_name="Fenic MCP", user_defined_tools=session.catalog.list_tools())
181186

182187
# Run HTTP server (defaults shown); if additional configuration is required, any argument that can be passed to FastMCP `run` can be passed here
183188
#

src/fenic/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
SemanticExtensions,
2424
Session,
2525
SessionConfig,
26-
ToolGenerationConfig,
26+
SystemToolConfig,
2727
array,
2828
array_agg,
2929
array_contains,
@@ -239,7 +239,7 @@
239239
"BoundToolParam",
240240
"UserDefinedTool",
241241
"SystemTool",
242-
"ToolGenerationConfig",
242+
"SystemToolConfig",
243243
"create_mcp_server",
244244
"run_mcp_server_asgi",
245245
"run_mcp_server_async",

src/fenic/api/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
from fenic.api.io import DataFrameReader, DataFrameWriter
4545
from fenic.api.lineage import Lineage
4646
from fenic.api.mcp import (
47-
ToolGenerationConfig,
47+
SystemToolConfig,
4848
create_mcp_server,
4949
run_mcp_server_asgi,
5050
run_mcp_server_async,
@@ -137,5 +137,5 @@
137137
"run_mcp_server_sync",
138138
"run_mcp_server_async",
139139
"run_mcp_server_asgi",
140-
"ToolGenerationConfig",
140+
"SystemToolConfig",
141141
]

src/fenic/api/mcp/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""MCP Tool Creation/Server Management API."""
22

33
from fenic.api.mcp.server import (
4-
ToolGenerationConfig,
4+
SystemToolConfig,
55
create_mcp_server,
66
run_mcp_server_asgi,
77
run_mcp_server_async,
@@ -13,5 +13,5 @@
1313
"run_mcp_server_sync",
1414
"run_mcp_server_async",
1515
"run_mcp_server_asgi",
16-
"ToolGenerationConfig",
16+
"SystemToolConfig",
1717
]

src/fenic/api/mcp/_tool_generation_utils.py

Lines changed: 47 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,23 @@
88
import polars as pl
99
from typing_extensions import Annotated
1010

11-
from fenic import (
11+
from fenic.api import col
12+
from fenic.api.dataframe import DataFrame
13+
from fenic.api.functions import avg, stddev
14+
from fenic.api.functions import max as max_
15+
from fenic.api.functions import min as min_
16+
from fenic.api.session import Session
17+
from fenic.core._logical_plan import LogicalPlan
18+
from fenic.core._logical_plan.plans import InMemorySource
19+
from fenic.core.error import ConfigurationError, ValidationError
20+
from fenic.core.mcp.types import SystemTool
21+
from fenic.core.types.datatypes import (
1222
BooleanType,
13-
DataFrame,
1423
DoubleType,
1524
FloatType,
1625
IntegerType,
17-
Session,
1826
StringType,
19-
SystemTool,
20-
avg,
21-
col,
22-
stddev,
2327
)
24-
from fenic import max as max_
25-
from fenic import min as min_
26-
from fenic.core._logical_plan import LogicalPlan
27-
from fenic.core._logical_plan.plans import InMemorySource
28-
from fenic.core._utils.schema import convert_custom_dtype_to_polars
29-
from fenic.core.error import ConfigurationError, ValidationError
3028

3129
PROFILE_MAX_SAMPLE_SIZE = 10_000
3230

@@ -44,11 +42,11 @@ class DatasetSpec:
4442
description: str
4543
df: DataFrame
4644

47-
def auto_generate_system_tools(
45+
def _auto_generate_system_tools(
4846
datasets: List[DatasetSpec],
4947
session: Session,
5048
*,
51-
tool_group_name: str,
49+
tool_namespace: Optional[str],
5250
max_result_limit: int = 100,
5351
) -> List[SystemTool]:
5452
"""Generate core tools spanning all datasets: Schema, Profile, Analyze.
@@ -67,7 +65,7 @@ def auto_generate_system_tools(
6765
schema_tool = _auto_generate_schema_tool(
6866
datasets,
6967
session,
70-
tool_name=f"{tool_group_name} - Schema",
68+
tool_name=f"{tool_namespace} - Schema" if tool_namespace else "Schema",
7169
tool_description="\n\n".join([
7270
"Show the schema (column names and types) for any or all of the datasets listed below. This call should be the first step in exploring the available datasets.",
7371
group_desc,
@@ -77,7 +75,7 @@ def auto_generate_system_tools(
7775
profile_tool = _auto_generate_profile_tool(
7876
datasets,
7977
session,
80-
tool_name=f"{tool_group_name} - Profile",
78+
tool_name=f"{tool_namespace} - Profile" if tool_namespace else "Profile",
8179
tool_description="\n".join([
8280
"Return dataset data profile: row_count and per-column stats for any or all of the datasets listed below.",
8381
"This call should be used as a follow up after calling the `Schema` tool."
@@ -91,8 +89,8 @@ def auto_generate_system_tools(
9189
read_tool = _auto_generate_read_tool(
9290
datasets,
9391
session,
94-
tool_name=f"{tool_group_name} - Read",
95-
tool_description="\n\n".join([
92+
tool_name=f"{tool_namespace} - Read" if tool_namespace else "Read",
93+
tool_description="\n".join([
9694
"Read rows from a single dataset. Use to sample data, or to execute simple queries over the data that do not require filtering or grouping.",
9795
"Use `include_columns` and `exclude_columns` to filter columns by name -- this is important to conserve token usage. Use the `Profile` tool to understand the columns and their sizes.",
9896
"Available datasets:",
@@ -104,8 +102,8 @@ def auto_generate_system_tools(
104102
search_summary_tool = _auto_generate_search_summary_tool(
105103
datasets,
106104
session,
107-
tool_name=f"{tool_group_name} - Search Summary",
108-
tool_description="\n\n".join([
105+
tool_name=f"{tool_namespace} - Search Summary" if tool_namespace else "Search Summary",
106+
tool_description="\n".join([
109107
"Perform a substring/regex search across all datasets and return a summary of the number of matches per dataset.",
110108
"Available datasets:",
111109
group_desc,
@@ -114,8 +112,8 @@ def auto_generate_system_tools(
114112
search_content_tool = _auto_generate_search_content_tool(
115113
datasets,
116114
session,
117-
tool_name=f"{tool_group_name} - Search Content",
118-
tool_description="\n\n".join([
115+
tool_name=f"{tool_namespace} - Search Content" if tool_namespace else "Search Content",
116+
tool_description="\n".join([
119117
"Return matching rows from a single dataset using substring/regex across string columns.",
120118
"Available datasets:",
121119
group_desc,
@@ -126,8 +124,8 @@ def auto_generate_system_tools(
126124
analyze_tool = _auto_generate_sql_tool(
127125
datasets,
128126
session,
129-
tool_name=f"{tool_group_name} - Analyze",
130-
tool_description="\n\n".join([
127+
tool_name=f"{tool_namespace} - Analyze" if tool_namespace else "Analyze",
128+
tool_description="\n".join([
131129
"Execute Read-Only (SELECT) SQL over the provided datasets using fenic's SQL support.",
132130
"DDL/DML and multiple top-level queries are not allowed.",
133131
"For text search, prefer regular expressions (REGEXP_MATCHES()/REGEXP_EXTRACT()).",
@@ -142,14 +140,11 @@ def auto_generate_system_tools(
142140
return [schema_tool, profile_tool, read_tool, search_summary_tool, search_content_tool, analyze_tool]
143141

144142

145-
def build_datasets_from_tables(table_names: List[str], session: Session) -> List[DatasetSpec]:
143+
def _build_datasets_from_tables(table_names: List[str], session: Session) -> List[DatasetSpec]:
146144
"""Resolve catalog table names into DatasetSpec list with validated descriptions.
147145
148146
Raises ConfigurationError if any table is missing or lacks a non-empty description.
149147
"""
150-
if len(table_names) == 0:
151-
raise ConfigurationError("No tables provided for tool generation.")
152-
153148
missing_desc: List[str] = []
154149
missing_tables: List[str] = []
155150
specs: List[DatasetSpec] = []
@@ -398,8 +393,7 @@ async def schema_func(
398393

399394
for name, d in selected.items():
400395
# Build a single-row DataFrame with a common list<struct{column,type}> schema column
401-
schema_entries = [{"column": f.name, "type": str(convert_custom_dtype_to_polars(f.data_type))} for f in
402-
d.schema.column_fields]
396+
schema_entries = [{"column": f.name, "type": str(f.data_type)} for f in d.schema.column_fields]
403397
dataset_names.append(name)
404398
dataset_schemas.append(schema_entries)
405399

@@ -768,3 +762,25 @@ async def _compute_profile_rows(
768762
stats.string_example_values = sampled
769763
rows_list.append(stats)
770764
return rows_list
765+
766+
767+
def auto_generate_system_tools_from_tables(
768+
table_names: list[str],
769+
session: Session,
770+
*,
771+
tool_namespace: Optional[str],
772+
max_result_limit: int = 100,
773+
) -> List[SystemTool]:
774+
"""Generate Schema/Profile/Read/Search [Content/Summary]/Analyze tools from catalog tables.
775+
776+
Validates that each table exists and has a non-empty description in catalog metadata.
777+
"""
778+
if not table_names:
779+
raise ConfigurationError("At least one table name must be specified for automated system tool creation.")
780+
datasets = _build_datasets_from_tables(table_names, session)
781+
return _auto_generate_system_tools(
782+
datasets,
783+
session,
784+
tool_namespace=tool_namespace,
785+
max_result_limit=max_result_limit,
786+
)

src/fenic/api/mcp/server.py

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
"""
77
from typing import List, Optional
88

9+
from fenic.api.mcp._tool_generation_utils import auto_generate_system_tools_from_tables
910
from fenic.api.mcp.tools import (
10-
ToolGenerationConfig,
11-
auto_generate_system_tools_from_tables,
11+
SystemToolConfig,
1212
)
1313
from fenic.api.session.session import Session
1414
from fenic.core.error import ConfigurationError
@@ -21,27 +21,28 @@ def create_mcp_server(
2121
server_name: str,
2222
*,
2323
user_defined_tools: Optional[List[UserDefinedTool]] = None,
24-
system_tools: Optional[ToolGenerationConfig] = None,
24+
system_tools: Optional[SystemToolConfig] = None,
2525
concurrency_limit: int = 8,
2626
) -> FenicMCPServer:
2727
"""Create an MCP server from datasets and tools.
2828
2929
Args:
3030
session: Fenic session used to execute tools.
3131
server_name: Name of the MCP server.
32-
system_tools: List of system tools to register (optional).
33-
user_defined_tools: Tools to register (optional).
34-
system_tools: Generate system tools for one or more Dataframes.
32+
user_defined_tools: User defined tools to register with the MCP server.
33+
system_tools: Configuration for automatically created system tools.
3534
concurrency_limit: Maximum number of concurrent tool executions.
3635
"""
3736
generated_system_tools = []
3837
user_defined_tools = user_defined_tools or []
3938
if system_tools:
40-
generated_system_tools.extend(auto_generate_system_tools_from_tables(
41-
system_tools.table_names,
42-
session,
43-
tool_group_name=system_tools.tool_group_name,
44-
max_result_limit=system_tools.max_result_rows)
39+
generated_system_tools.extend(
40+
auto_generate_system_tools_from_tables(
41+
system_tools.table_names,
42+
session,
43+
tool_namespace=system_tools.tool_namespace,
44+
max_result_limit=system_tools.max_result_rows
45+
)
4546
)
4647
if not (user_defined_tools or system_tools):
4748
raise ConfigurationError("No tools provided. Either provide tools or set generate_automated_tools=True and provide datasets.")

0 commit comments

Comments
 (0)