Skip to content

Commit 8e53f1d

Browse files
committed
feat: tweak pdf parser for corner cases and add 120s demo
1 parent 3ed6592 commit 8e53f1d

File tree

7 files changed

+773
-20
lines changed

7 files changed

+773
-20
lines changed

examples/fenic_in_120_seconds/18_pdf_processing.ipynb

Lines changed: 752 additions & 0 deletions
Large diffs are not rendered by default.

src/fenic/_backends/local/semantic_operators/parse_pdf.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@ class ParsePDF(BaseSingleColumnFilePathOperator[str, str]):
2424
"""Operator for parsing PDF files using language models with PDF parsing capabilities."""
2525
SYSTEM_PROMPT = jinja2.Template(dedent("""\
2626
Transcribe the main content of this PDF document to clean, well-formatted markdown.
27-
- Output should be raw markdown, don't surround in code fences or backticks.
28-
- Preserve the structure, formatting, headings, lists, and any tables to the best of your ability
27+
- Output should be raw markdown, don't surround the whole output in code fences or backticks.
28+
- For each topic, create a markdown heading. For key terms, use bold text.
29+
- Preserve the structure, formatting, headings, lists, table of contents, and any tables using markdown syntax.
2930
- Format tables as github markdown tables, however:
3031
- for table headings, immediately add ' |' after the table heading
3132
{% if multiple_pages %}

src/fenic/_inference/common_openai/openai_chat_completions_core.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,9 @@ async def make_single_request(
9090
common_params: dict[str, Any] = {
9191
"model": self._model,
9292
"messages": convert_messages(request.messages),
93+
"max_completion_tokens": self._get_max_output_token_request_limit(request),
9394
"n": 1,
9495
}
95-
if request.max_completion_tokens:
96-
common_params.update({"max_completion_tokens": request.max_completion_tokens + profile_configuration.expected_additional_reasoning_tokens})
9796
if request.temperature:
9897
common_params.update({"temperature": request.temperature})
9998

src/fenic/_inference/google/gemini_native_chat_completions_client.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
TransientException,
2828
)
2929
from fenic._inference.rate_limit_strategy import (
30+
MAX_OUTPUT_TOKENS_PER_PARSE_PDF_REQUEST,
3031
TokenEstimate,
3132
UnifiedTokenRateLimitStrategy,
3233
)
@@ -176,16 +177,14 @@ async def make_single_request(
176177
"""
177178

178179
profile_config = self._profile_manager.get_profile_by_name(request.model_profile)
179-
max_output_tokens = self._get_max_output_token_request_limit(request)
180-
181180
generation_config: GenerateContentConfigDict = {
182181
"temperature": request.temperature,
183182
"response_logprobs": request.top_logprobs is not None,
184183
"logprobs": request.top_logprobs,
184+
"max_output_tokens": self._get_max_output_token_request_limit(request),
185185
"system_instruction": request.messages.system,
186186
}
187-
if max_output_tokens is not None:
188-
generation_config["max_output_tokens"] = max_output_tokens
187+
189188
generation_config.update(profile_config.additional_generation_config)
190189
if request.structured_output is not None:
191190
generation_config.update(
@@ -342,14 +341,15 @@ def _estimate_output_tokens(self, request: FenicCompletionsRequest) -> int:
342341
def _get_max_output_token_request_limit(self, request: FenicCompletionsRequest) -> Optional[int]:
343342
"""Get the upper limit of output tokens for a request.
344343
345-
If max_completion_tokens is not set, don't apply a limit and return None.
344+
For file parsing requests, use a guardrail limit of 8192 tokens (the lowest output limit of a VLM model we support).
346345
347346
Include the thinking token budget with a safety margin."""
348347
max_output_tokens = request.max_completion_tokens or 0
349348
if request.max_completion_tokens is None and request.messages.user_file:
350349
# Guardrail to ensure the model uses a sane amount of output tokens.
350+
# Note: we can't use our token estimation because the pdf could be empty, or have only images (scans).
351351
# TODO(DY): the semantic operator should dictate how the file affects the token estimate
352-
max_output_tokens = self.token_counter.count_file_output_tokens(request.messages) * 2
352+
max_output_tokens = MAX_OUTPUT_TOKENS_PER_PARSE_PDF_REQUEST
353353
return max_output_tokens + self._get_expected_additional_reasoning_tokens(request)
354354

355355
def _get_expected_additional_reasoning_tokens(self, request: FenicCompletionsRequest) -> int:

src/fenic/_inference/openai/openai_batch_chat_completions_client.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
)
1616
from fenic._inference.openai.openai_provider import OpenAIModelProvider
1717
from fenic._inference.rate_limit_strategy import (
18+
MAX_OUTPUT_TOKENS_PER_PARSE_PDF_REQUEST,
1819
RateLimitStrategy,
1920
TokenEstimate,
2021
)
@@ -137,8 +138,9 @@ def _get_max_output_token_request_limit(self, request: FenicCompletionsRequest)
137138
max_output_tokens = request.max_completion_tokens or 0
138139
if request.max_completion_tokens is None and request.messages.user_file:
139140
# Guardrail to ensure the model uses a sane amount of output tokens.
141+
# Note: we can't use our token estimation because the pdf could be empty, or have only images (scans).
140142
# TODO(DY): the semantic operator should dictate how the file affects the token estimate
141-
max_output_tokens = self.token_counter.count_file_output_tokens(request.messages) * 2
143+
max_output_tokens = MAX_OUTPUT_TOKENS_PER_PARSE_PDF_REQUEST
142144
return max_output_tokens + self._get_expected_additional_reasoning_tokens(request)
143145

144146
def _get_expected_additional_reasoning_tokens(self, request: FenicCompletionsRequest) -> int:

src/fenic/_inference/openrouter/openrouter_batch_chat_completions_client.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
)
2222
from fenic._inference.openrouter.openrouter_provider import OpenRouterModelProvider
2323
from fenic._inference.rate_limit_strategy import (
24+
MAX_OUTPUT_TOKENS_PER_PARSE_PDF_REQUEST,
2425
AdaptiveBackoffRateLimitStrategy,
2526
RateLimitStrategy,
2627
TokenEstimate,
@@ -268,17 +269,15 @@ def _estimate_output_tokens(self, request: FenicCompletionsRequest) -> int:
268269
def _get_max_output_token_request_limit(self, request: FenicCompletionsRequest) -> int:
269270
"""Get the upper limit of output tokens for a request.
270271
271-
If max_completion_tokens is not set, don't apply a limit and return None.
272+
For file parsing requests, use a guardrail limit of 8192 tokens (the lowest output limit of a VLM model we support).
272273
273274
Include the thinking token budget with a safety margin."""
274-
if request.max_completion_tokens:
275-
max_output_tokens = request.max_completion_tokens
276-
elif request.messages.user_file:
275+
max_output_tokens = request.max_completion_tokens
276+
if request.max_completion_tokens is None and request.messages.user_file:
277277
# Guardrail to ensure the model uses a sane amount of output tokens.
278-
if self._google_token_counter:
279-
max_output_tokens = self._google_token_counter.count_file_output_tokens(messages=request.messages) * 2
280-
else:
281-
max_output_tokens = self.token_counter.count_file_output_tokens(messages=request.messages) * 2
278+
# Note: we can't use our token estimation because the pdf could be empty, or have only images (scans).
279+
# TODO(DY): the semantic operator should dictate how the file affects the token estimate
280+
max_output_tokens = MAX_OUTPUT_TOKENS_PER_PARSE_PDF_REQUEST
282281
return max_output_tokens + self._get_expected_additional_reasoning_tokens(request)
283282

284283
def _estimate_input_tokens(self, request: FenicCompletionsRequest) -> int:

src/fenic/_inference/rate_limit_strategy.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
logger = logging.getLogger(__name__)
1212

13-
13+
MAX_OUTPUT_TOKENS_PER_PARSE_PDF_REQUEST = 8192
1414
@dataclass
1515
class TokenEstimate:
1616
input_tokens: int = 0

0 commit comments

Comments
 (0)