Skip to content

Commit b42c044

Browse files
authored
Make prompt/score methods capture exceptions by default (#51)
1 parent e056513 commit b42c044

File tree

12 files changed

+194
-566
lines changed

12 files changed

+194
-566
lines changed

CHANGELOG.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
## [1.1.0] - 2025-04-21
11+
12+
### Changed
13+
14+
- All `.prompt()` / `.get_trustworthiness_score()` / `.generate()` / `.score()` methods will now catch any errors and return `null` values alongside a log of why the exception occurred
15+
- `try_` methods are deprecated and will share the same functionality as the "non-try" methods
16+
1017
## [1.0.22] - 2025-04-18
1118

19+
### Added
20+
1221
- Update `response_helpfulness` default criteria
1322

1423
## [1.0.21] - 2025-04-17
@@ -125,7 +134,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
125134

126135
- Release of the Cleanlab TLM Python client.
127136

128-
[Unreleased]: https://github.com/cleanlab/cleanlab-tlm/compare/v1.0.23...HEAD
137+
[Unreleased]: https://github.com/cleanlab/cleanlab-tlm/compare/v1.1.0...HEAD
138+
[1.1.0]: https://github.com/cleanlab/cleanlab-tlm/compare/v1.0.23...v1.1.0
129139
[1.0.23]: https://github.com/cleanlab/cleanlab-tlm/compare/v1.0.22...v1.0.23
130140
[1.0.22]: https://github.com/cleanlab/cleanlab-tlm/compare/v1.0.21...v1.0.22
131141
[1.0.21]: https://github.com/cleanlab/cleanlab-tlm/compare/v1.0.20...v1.0.21

src/cleanlab_tlm/__about__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
# SPDX-License-Identifier: MIT
2-
__version__ = "1.0.23"
2+
__version__ = "1.1.0"

src/cleanlab_tlm/internal/exception_handling.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def decorator(
5858
async def wrapper(*args: Any, **kwargs: Any) -> ResponseT:
5959
capture_exceptions = kwargs.get("capture_exceptions", False)
6060
batch_index = kwargs.get("batch_index")
61-
evals = kwargs.get("evals", [])
61+
evals = getattr(args[0], "_evals", [])
6262
try:
6363
return await func(*args, **kwargs)
6464
except asyncio.TimeoutError:
@@ -162,7 +162,9 @@ def _handle_exception(
162162
error_log = {"error": {"message": error_message, "retryable": retryable}}
163163

164164
# Helper function to create evaluation metrics dictionary
165-
def create_eval_metrics(include_response: bool = False) -> dict[str, Union[dict[str, Any], None]]:
165+
def create_eval_metrics(
166+
include_response: bool = False,
167+
) -> dict[str, Union[dict[str, Any], None]]:
166168
result: dict[str, Union[dict[str, Any], None]] = {
167169
"trustworthiness": {
168170
"score": None,
@@ -206,9 +208,4 @@ def create_eval_metrics(include_response: bool = False) -> dict[str, Union[dict[
206208

207209
raise ValueError(f"Unsupported response type: {response_type}")
208210

209-
if len(e.args) > 0 and response_type != RAG_GENERATE_RESPONSE_TYPE and response_type != RAG_SCORE_RESPONSE_TYPE:
210-
additional_message = "Consider using `TLM.try_prompt()` or `TLM.try_get_trustworthiness_score()` to gracefully handle errors and preserve partial results. For large datasets, consider also running it on multiple smaller batches."
211-
new_args = (str(e.args[0]) + "\n" + additional_message,) + e.args[1:]
212-
raise type(e)(*new_args)
213-
214211
raise e # in the case where the error has no message/args

src/cleanlab_tlm/internal/validation.py

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -35,16 +35,6 @@ def validate_tlm_prompt(prompt: Union[str, Sequence[str]]) -> None:
3535
)
3636

3737

38-
def validate_tlm_try_prompt(prompt: Sequence[str]) -> None:
39-
if isinstance(prompt, str):
40-
raise ValidationError("Invalid type str, prompt must be a list/iterable of strings.")
41-
42-
if isinstance(prompt, Sequence) and any(not isinstance(p, str) for p in prompt):
43-
raise ValidationError(
44-
"Some items in prompt are of invalid types, all items in the prompt list must be of type str."
45-
)
46-
47-
4838
def validate_tlm_prompt_response(prompt: Union[str, Sequence[str]], response: Union[str, Sequence[str]]) -> None:
4939
if isinstance(prompt, str):
5040
if not isinstance(response, str):
@@ -74,24 +64,6 @@ def validate_tlm_prompt_response(prompt: Union[str, Sequence[str]], response: Un
7464
)
7565

7666

77-
def validate_try_tlm_prompt_response(prompt: Sequence[str], response: Sequence[str]) -> None:
78-
if isinstance(prompt, str):
79-
raise ValidationError("Invalid type str, prompt must be a list/iterable of strings.")
80-
81-
if isinstance(prompt, Sequence):
82-
if len(prompt) != len(response):
83-
raise ValidationError("Length of the prompt and response lists must match.")
84-
85-
if any(not isinstance(p, str) for p in prompt):
86-
raise ValidationError(
87-
"Some items in prompt are of invalid types, all items in the prompt list must be of type str."
88-
)
89-
if any(not isinstance(r, str) for r in response):
90-
raise ValidationError(
91-
"Some items in response are of invalid types, all items in the response list must be of type str."
92-
)
93-
94-
9567
def validate_tlm_options(options: Any, support_custom_eval_criteria: bool = True) -> None:
9668
from cleanlab_tlm.tlm import TLMOptions
9769

0 commit comments

Comments
 (0)