You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
"""Decorator to bind a DataFrame to a user-authored tool function.
101
105
106
+
Args:
107
+
tool_name: The name of the tool.
108
+
tool_description: The description of the tool.
109
+
max_result_limit: The maximum number of results to return.
110
+
default_table_format: The default table format to return.
111
+
read_only: A hint to provide to the model that the tool does not modify its environment.
112
+
idempotent: A hint to provide to the model that calling the tool multiple times with the same input will always return the same result (redundant if read_only is True).
113
+
destructive: A hint to provide to the model that the tool may destructively modify its environment.
114
+
open_world: A hint to provide to the model that the tool may interact with an "open world" of external entities outside of the MCP server's environment.
pred = fc.semantic.predicate("Matches: {{q}} Data: {{bio}}", q=query, bio=fc.col("bio"))
121
+
pred = fc.semantic.predicate("Matches: {{q}} Data: {{bio}}", q=fc.lit(query), bio=fc.col("bio"))
108
122
return df.filter(pred)
109
123
110
124
mcp_server = fc.create_mcp_server(
@@ -114,6 +128,14 @@ def find_rust(
114
128
)
115
129
fc.run_mcp_server_sync(mcp_server)
116
130
131
+
Example: Creating an open-world tool that reaches out to an external API. The open_world flag indicates to the model that the tool may interact with an "open world" of external entities
search_columns: Annotated[Optional[str], "Comma separated list of column names search within; if omitted, matches in any string coluumn will be returned. Use this to query only specific columns in the search as needed."] =None,
"- For text search, prefer regular expressions using REGEXP_MATCHES().",
437
-
"- Paging: use ORDER BY to define row order, then LIMIT and OFFSET for pages.",
438
-
"",
439
-
"Examples:", # nosec B608 - example text only
440
-
f"- SELECT * FROM {{{example_name}}} WHERE REGEXP_MATCHES(message, '(?i)error|fail') LIMIT 100", # nosec B608 - example text only
441
-
f"- SELECT dept, COUNT(*) AS n FROM {{{example_name}}} WHERE status = 'active' GROUP BY dept HAVING n > 10 ORDER BY n DESC LIMIT 100", # nosec B608 - example text only
442
-
f"- -- Paging: page 2 of size 50\n SELECT * FROM {{{example_name}}} ORDER BY created_at DESC LIMIT 50 OFFSET 50", # nosec B608 - example text only
442
+
"\n\nNotes:\n",
443
+
"- SQL dialect: DuckDB.\n",
444
+
"- For text search, prefer regular expressions using REGEXP_MATCHES().\n",
445
+
"- Paging: use ORDER BY to define row order, then LIMIT and OFFSET for pages.\n",
446
+
f"- Returns a maximum of {result_limit} rows.\n",
447
+
"Examples:\n", # nosec B608 - example text only
448
+
f"- SELECT * FROM {example_name} WHERE REGEXP_MATCHES(message, '(?i)error|fail') LIMIT {result_limit}", # nosec B608 - example text only
449
+
f"- SELECT dept, COUNT(*) AS n FROM {example_name} WHERE status = 'active' GROUP BY dept HAVING n > 10 ORDER BY n DESC LIMIT {result_limit}", # nosec B608 - example text only
450
+
f"- Paging: page 2 of size {result_limit}\n SELECT * FROM {example_name} ORDER BY created_at DESC LIMIT {result_limit} OFFSET {result_limit}", # nosec B608 - example text only
443
451
]
444
452
)
445
453
enhanced_description="\n".join(lines)
@@ -449,6 +457,7 @@ def analyze_func(
449
457
description=enhanced_description,
450
458
_func=analyze_func,
451
459
max_result_limit=result_limit,
460
+
add_limit_parameter=False,
452
461
)
453
462
returntool
454
463
@@ -473,35 +482,41 @@ def _apply_paging(
473
482
order_by: list[str] |None,
474
483
sort_ascending: bool|None,
475
484
) ->LogicalPlan:
476
-
"""Apply deterministic paging semantics: ORDER BY + LIMIT/OFFSET via SQL fallback.
485
+
"""Apply ordering, limit, and offset via a single SQL statement.
477
486
478
-
- If offset is provided, order_by must also be provided; performs SQL-based ORDER BY LIMIT OFFSET.
479
-
- If only limit is provided, uses DataFrame.limit.
480
-
- Otherwise, returns the original plan.
487
+
- If offset is provided, order_by must also be provided to ensure deterministic paging.
488
+
- Validates that all order_by columns exist.
489
+
- Builds: SELECT * FROM {src} [ORDER BY ...] [LIMIT N] [OFFSET M]
490
+
- When no ordering/limit/offset are provided, returns the original plan.
481
491
"""
482
-
ifoffsetisnotNone:
483
-
ifnotorder_by:
484
-
raiseValidationError("offset requires order_by to ensure deterministic paging.")
base_sql="SELECT * FROM {src} ORDER BY "+safe_order_by+f" {direction}"#nosec B608: little to no SQL injection risk as this is running on limited user-provided dataframe.
495
-
iflim_valisnotNone:
496
-
base_sql+=f" LIMIT {lim_val}"
512
+
base_sql+=" ORDER BY "+safe_order_by+f" {direction}"#nosec B608
0 commit comments