Skip to content

Commit cbf16eb

Browse files
authored
feat: Make timeouts configurable in semantic LLM operations (#270)
### TL;DR Added configurable request timeout for all semantic operators in Fenic. ### What changed? - Added an optional `request_timeout` parameter to all semantic operators and extensions (map, extract, predicate, reduce, classify, analyze_sentiment, summarize, parse_pdf, join, sim_join) - Added verification for request_timeout to be more than 0 and not more than system maximum - Updated all LLM provider clients to use http clients with large 10 minute timeout - Modified the inference layer to respect this timeout parameter when making LLM requests - Updated the model client to use the request-specific timeout, or default to old 120-second value - Propagated the timeout parameter through all relevant classes in the execution path - Added tool for testing long llm requests - Embedding request objects do not have a timeout ### How to test? 1. Test with a standard request: ```python import fenic as fn df = fn.DataFrame({"text": ["This is a long document..."]}) result = df.select(fn.semantic.map("Summarize this text", text=fn.col("text"))) ``` 2. Test with a custom timeout: ```python # Set a longer timeout for complex operations result = df.select(fn.semantic.map("Summarize this text", text=fn.col("text"), request_timeout=300.0)) # Set a shorter timeout for quick operations result = df.select(fn.semantic.analyze_sentiment(fn.col("text"), request_timeout=30.0)) ``` 3. Verify timeout behavior by intentionally setting a very short timeout: ```python # This should trigger a timeout and retry result = df.select(fn.semantic.extract(fn.col("text"), MySchema, request_timeout=0.1)) ``` ### Why make this change? Different semantic operations may require different timeout thresholds: - Complex operations like parsing large PDFs or extracting structured data from long documents may need longer timeouts - Simple operations like sentiment analysis might benefit from shorter timeouts - Users with specific latency requirements can now customize timeouts to match their needs This change improves flexibility and user control over request handling, especially for workloads with varying complexity and time sensitivity.
1 parent 00f5489 commit cbf16eb

File tree

25 files changed

+490
-29
lines changed

25 files changed

+490
-29
lines changed

src/fenic/_backends/local/semantic_operators/analyze_sentiment.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,7 @@ def __init__(
134134
model: LanguageModel,
135135
temperature: float,
136136
model_alias: Optional[ResolvedModelAlias] = None,
137+
request_timeout: Optional[float] = None,
137138
):
138139
super().__init__(
139140
input,
@@ -145,6 +146,7 @@ def __init__(
145146
temperature=temperature,
146147
response_format=SENTIMENT_ANALYSIS_FORMAT,
147148
model_profile=model_alias.profile if model_alias else None,
149+
request_timeout=request_timeout,
148150
),
149151
),
150152
EXAMPLES,

src/fenic/_backends/local/semantic_operators/base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ def send_requests(
7878
response_format=self.inference_config.response_format,
7979
top_logprobs=self.inference_config.top_logprobs,
8080
model_profile=self.inference_config.model_profile,
81+
request_timeout=self.inference_config.request_timeout,
8182
)
8283

8384
completions = [

src/fenic/_backends/local/semantic_operators/classify.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def __init__(
4343
temperature: float,
4444
examples: Optional[ClassifyExampleCollection] = None,
4545
model_alias: Optional[ResolvedModelAlias] = None,
46+
request_timeout: Optional[float] = None,
4647
):
4748
self.classes = classes
4849
self.valid_labels = {class_def.label for class_def in classes}
@@ -59,6 +60,7 @@ def __init__(
5960
temperature=temperature,
6061
response_format=ResolvedResponseFormat.from_pydantic_model(self.output_model, generate_struct_type=False),
6162
model_profile=model_alias.profile if model_alias else None,
63+
request_timeout=request_timeout,
6264
),
6365
),
6466
examples,

src/fenic/_backends/local/semantic_operators/extract.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def __init__(
5050
max_output_tokens: int,
5151
temperature: float,
5252
model_alias: Optional[ResolvedModelAlias] = None,
53+
request_timeout: Optional[float] = None,
5354
):
5455
self.resolved_format = response_format
5556
super().__init__(
@@ -61,6 +62,7 @@ def __init__(
6162
temperature=temperature,
6263
response_format=response_format,
6364
model_profile=model_alias.profile if model_alias else None,
65+
request_timeout=request_timeout,
6466
),
6567
model=model,
6668
),

src/fenic/_backends/local/semantic_operators/map.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ def __init__(
5353
model_alias: Optional[ResolvedModelAlias] = None,
5454
response_format: Optional[ResolvedResponseFormat] = None,
5555
examples: Optional[MapExampleCollection] = None,
56+
request_timeout: Optional[float] = None,
5657
):
5758
super().__init__(
5859
input,
@@ -64,6 +65,7 @@ def __init__(
6465
response_format=response_format,
6566
temperature=temperature,
6667
model_profile=model_alias.profile if model_alias else None,
68+
request_timeout=request_timeout,
6769
),
6870
),
6971
jinja_template=jinja2.Template(jinja_template),

src/fenic/_backends/local/semantic_operators/parse_pdf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def __init__(
5050
describe_images: bool = False,
5151
model_alias: Optional[ResolvedModelAlias] = None,
5252
max_output_tokens: Optional[int] = None,
53+
request_timeout: Optional[float] = None,
5354
):
5455
self.page_separator = page_separator
5556
self.describe_images = describe_images
@@ -68,6 +69,7 @@ def __init__(
6869
max_output_tokens=max_output_tokens,
6970
temperature=1.0, # Use a higher temperature so gemini flash models can handle complex table formatting. For more info see the conversation here: https://discuss.ai.google.dev/t/gemini-2-0-flash-has-a-weird-bug/65119/26
7071
model_profile=model_alias.profile if model_alias else None,
72+
request_timeout=request_timeout,
7173
),
7274
),
7375
examples=None, # PDF parsing doesn't use examples

src/fenic/_backends/local/semantic_operators/predicate.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ def __init__(
4646
temperature: float,
4747
examples: Optional[PredicateExampleCollection] = None,
4848
model_alias: Optional[ResolvedModelAlias] = None,
49+
request_timeout: Optional[float] = None,
4950
):
5051
super().__init__(
5152
input,
@@ -56,6 +57,7 @@ def __init__(
5657
response_format=PREDICATE_FORMAT,
5758
temperature=temperature,
5859
model_profile=model_alias.profile if model_alias else None,
60+
request_timeout=request_timeout,
5961
),
6062
model=model,
6163
),

src/fenic/_backends/local/semantic_operators/summarize.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ def __init__(
4545
temperature: float,
4646
model: LanguageModel,
4747
model_alias: Optional[ResolvedModelAlias] = None,
48+
request_timeout: Optional[float] = None,
4849
):
4950
self.format = format
5051

@@ -56,6 +57,7 @@ def __init__(
5657
max_output_tokens=self.get_max_tokens(),
5758
temperature=temperature,
5859
model_profile=model_alias.profile if model_alias else None,
60+
request_timeout=request_timeout,
5961
),
6062
model=model,
6163
),

src/fenic/_backends/local/transpiler/expr_converter.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,7 @@ def sem_map_fn(batch: pl.Series) -> pl.Series:
581581
temperature=logical.temperature,
582582
response_format=logical.response_format,
583583
model_alias=logical.model_alias,
584+
request_timeout=logical.request_timeout,
584585
).execute()
585586

586587
column_exprs = [self._convert_expr(expr) for expr in logical.exprs]
@@ -680,6 +681,7 @@ def sem_ext_fn(batch: pl.Series) -> pl.Series:
680681
max_output_tokens=logical.max_tokens,
681682
temperature=logical.temperature,
682683
model_alias=logical.model_alias,
684+
request_timeout=logical.request_timeout,
683685
).execute()
684686

685687
return self._convert_expr(logical.expr).map_batches(
@@ -700,6 +702,7 @@ def sem_pred_fn(batch: pl.Series) -> pl.Series:
700702
examples=logical.examples,
701703
temperature=logical.temperature,
702704
model_alias=logical.model_alias,
705+
request_timeout=logical.request_timeout,
703706
).execute()
704707

705708
column_exprs = [self._convert_expr(expr) for expr in logical.exprs]
@@ -722,6 +725,7 @@ def sem_classify_fn(batch: pl.Series) -> pl.Series:
722725
temperature=logical.temperature,
723726
examples=logical.examples,
724727
model_alias=logical.model_alias,
728+
request_timeout=logical.request_timeout,
725729
).execute()
726730

727731
return self._convert_expr(logical.expr).map_batches(
@@ -737,6 +741,7 @@ def sem_sentiment_fn(batch: pl.Series) -> pl.Series:
737741
model=self.session_state.get_language_model(logical.model_alias),
738742
temperature=logical.temperature,
739743
model_alias=logical.model_alias,
744+
request_timeout=logical.request_timeout,
740745
).execute()
741746

742747
return self._convert_expr(logical.expr).map_batches(
@@ -753,7 +758,7 @@ def sem_summarize_fn(batch: pl.Series) -> pl.Series:
753758
format=logical.format,
754759
temperature=logical.temperature,
755760
model=self.session_state.get_language_model(logical.model_alias),
756-
761+
request_timeout=logical.request_timeout,
757762
).execute()
758763

759764
return self._convert_expr(logical.expr).map_batches(
@@ -770,6 +775,7 @@ def parse_pdf_fn(batch: pl.Series) -> pl.Series:
770775
describe_images=logical.describe_images,
771776
model_alias=logical.model_alias,
772777
max_output_tokens=logical.max_output_tokens,
778+
request_timeout=logical.request_timeout,
773779
).execute()
774780

775781
return self._convert_expr(logical.expr).map_batches(

src/fenic/_constants.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@
2020
DEFAULT_MAX_TOKENS = 512
2121
DEFAULT_TEMPERATURE: float = 0
2222

23+
# Model client timeout constants
24+
MAX_MODEL_CLIENT_TIMEOUT = 600 # 10 minutes
25+
DEFAULT_MODEL_CLIENT_TIMEOUT = 120 # 2 minutes
26+
2327
# If the output type is known to us before runtime, this is a rough upper bound.
2428
# Higher than expected because Anthropic estimates output tokens differently than other providers
2529
# during tool use.

0 commit comments

Comments
 (0)