Skip to content

Commit 5d812ab

Browse files
authored
added score_async (#125)
* added score_async * fmt * update changelog
1 parent 96fa4cc commit 5d812ab

File tree

3 files changed

+68
-23
lines changed

3 files changed

+68
-23
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Added
11+
12+
- Add `score_async` API for TLMChatCompletion
13+
1014
## [1.1.34] - 2025-09-24
1115

1216
### Added

src/cleanlab_tlm/utils/chat_completions.py

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,28 @@ def score(
7878
) -> TLMScore:
7979
"""Score the trustworthiness of an OpenAI ChatCompletion response.
8080
81+
Args:
82+
response (ChatCompletion): The OpenAI ChatCompletion response object to evaluate
83+
**openai_kwargs (Any): The original kwargs passed to OpenAI's create() method, must include 'messages'
84+
85+
Returns:
86+
TLMScore: A dict containing the trustworthiness score and optional logs
87+
"""
88+
return self._event_loop.run_until_complete(self.score_async(response=response, **openai_kwargs))
89+
90+
async def score_async(
91+
self,
92+
*,
93+
response: "ChatCompletion",
94+
**openai_kwargs: Any,
95+
) -> TLMScore:
96+
"""Asynchronously score the trustworthiness of an OpenAI ChatCompletion response.
97+
This method is similar to the [`score()`](#method-score) method but operates asynchronously,
98+
allowing for non-blocking concurrent operations.
99+
100+
Use this method if you want to score multiple ChatCompletion responses concurrently
101+
without blocking the execution of other operations.
102+
81103
Args:
82104
response (ChatCompletion): The OpenAI ChatCompletion response object to evaluate
83105
**openai_kwargs (Any): The original kwargs passed to OpenAI's create() method, must include 'messages'
@@ -113,15 +135,13 @@ def score(
113135
combined_kwargs["response_format"] = type_to_response_format_param(combined_kwargs["response_format"])
114136
return cast(
115137
TLMScore,
116-
self._event_loop.run_until_complete(
117-
asyncio.wait_for(
118-
tlm_chat_completions_score(
119-
api_key=self._api_key,
120-
response=response,
121-
**combined_kwargs,
122-
),
123-
timeout=self._timeout,
124-
)
138+
await asyncio.wait_for(
139+
tlm_chat_completions_score(
140+
api_key=self._api_key,
141+
response=response,
142+
**combined_kwargs,
143+
),
144+
timeout=self._timeout,
125145
),
126146
)
127147

@@ -131,7 +151,7 @@ def score(
131151
prompt_text = _form_prompt_chat_completions_api(messages, tools)
132152
response_text = form_response_string_chat_completions(response=response)
133153

134-
return cast(TLMScore, self._tlm.get_trustworthiness_score(prompt_text, response_text))
154+
return cast(TLMScore, await self._tlm.get_trustworthiness_score_async(prompt_text, response_text))
135155

136156
def get_explanation(
137157
self,

tests/test_chat_completions.py

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1+
import asyncio
12
import json
2-
from typing import Callable
3+
from typing import Any, Callable
34

45
import pytest
56
from openai.types.chat import ChatCompletion, ChatCompletionMessage
@@ -23,6 +24,18 @@
2324
test_response = make_text_unique(TEST_RESPONSE)
2425

2526

27+
def _run_score_sync_or_async(
28+
tlm_chat: TLMChatCompletion,
29+
response: ChatCompletion,
30+
is_async: bool,
31+
**openai_kwargs: Any,
32+
) -> TLMScore:
33+
"""Runs either sync or async score method based on is_async parameter."""
34+
if is_async:
35+
return asyncio.run(tlm_chat.score_async(response=response, **openai_kwargs))
36+
return tlm_chat.score(response=response, **openai_kwargs)
37+
38+
2639
def test_get_model_name() -> None:
2740
tlm = TLMChatCompletion()
2841
model_name = tlm.get_model_name()
@@ -35,7 +48,8 @@ def test_get_model_name() -> None:
3548
"quality_preset",
3649
["base", "low", "medium", "high", "best"],
3750
)
38-
def test_tlm_chat_completion_score(quality_preset: TLMQualityPreset) -> None:
51+
@pytest.mark.parametrize("is_async", [False, True], ids=["sync", "async"])
52+
def test_tlm_chat_completion_score(quality_preset: TLMQualityPreset, is_async: bool) -> None:
3953
tlm_chat = TLMChatCompletion(quality_preset=quality_preset)
4054
openai_kwargs = {
4155
"model": "gpt-4.1-mini",
@@ -55,13 +69,14 @@ def test_tlm_chat_completion_score(quality_preset: TLMQualityPreset) -> None:
5569
object="chat.completion",
5670
)
5771

58-
score = tlm_chat.score(response=response, **openai_kwargs)
72+
score = _run_score_sync_or_async(tlm_chat, response, is_async, **openai_kwargs)
5973

6074
assert score is not None
6175
assert is_trustworthiness_score_json_format(score)
6276

6377

64-
def test_tlm_chat_completion_score_with_options() -> None:
78+
@pytest.mark.parametrize("is_async", [False, True], ids=["sync", "async"])
79+
def test_tlm_chat_completion_score_with_options(is_async: bool) -> None:
6580
tlm_chat = TLMChatCompletion(options={"log": ["explanation", "perplexity"]})
6681
openai_kwargs = {
6782
"model": "gpt-4.1-mini",
@@ -81,13 +96,14 @@ def test_tlm_chat_completion_score_with_options() -> None:
8196
object="chat.completion",
8297
)
8398

84-
score = tlm_chat.score(response=response, **openai_kwargs)
99+
score = _run_score_sync_or_async(tlm_chat, response, is_async, **openai_kwargs)
85100

86101
assert score is not None
87102
assert is_trustworthiness_score_json_format(score)
88103

89104

90-
def test_tlm_chat_completion_score_with_tools() -> None:
105+
@pytest.mark.parametrize("is_async", [False, True], ids=["sync", "async"])
106+
def test_tlm_chat_completion_score_with_tools(is_async: bool) -> None:
91107
tlm_chat = TLMChatCompletion()
92108
openai_kwargs = {
93109
"model": "gpt-4.1-mini",
@@ -126,13 +142,14 @@ def test_tlm_chat_completion_score_with_tools() -> None:
126142
object="chat.completion",
127143
)
128144

129-
score = tlm_chat.score(response=response, **openai_kwargs)
145+
score = _run_score_sync_or_async(tlm_chat, response, is_async, **openai_kwargs)
130146

131147
assert score is not None
132148
assert is_trustworthiness_score_json_format(score)
133149

134150

135-
def test_tlm_chat_completion_score_with_structured_output() -> None:
151+
@pytest.mark.parametrize("is_async", [False, True], ids=["sync", "async"])
152+
def test_tlm_chat_completion_score_with_structured_output(is_async: bool) -> None:
136153
tlm_chat = TLMChatCompletion()
137154
openai_kwargs = {
138155
"model": "gpt-4.1-mini",
@@ -200,13 +217,14 @@ def test_tlm_chat_completion_score_with_structured_output() -> None:
200217
object="chat.completion",
201218
)
202219

203-
score = tlm_chat.score(response=response, **openai_kwargs)
220+
score = _run_score_sync_or_async(tlm_chat, response, is_async, **openai_kwargs)
204221

205222
assert score is not None
206223
assert is_trustworthiness_score_json_format(score)
207224

208225

209-
def test_tlm_chat_completion_structured_output_per_field_scoring() -> None:
226+
@pytest.mark.parametrize("is_async", [False, True], ids=["sync", "async"])
227+
def test_tlm_chat_completion_structured_output_per_field_scoring(is_async: bool) -> None:
210228
tlm_chat = TLMChatCompletion(options={"log": ["per_field_score"]})
211229

212230
openai_kwargs = {
@@ -275,7 +293,7 @@ def test_tlm_chat_completion_structured_output_per_field_scoring() -> None:
275293
object="chat.completion",
276294
)
277295

278-
score = tlm_chat.score(response=response, **openai_kwargs)
296+
score = _run_score_sync_or_async(tlm_chat, response, is_async, **openai_kwargs)
279297

280298
assert score is not None
281299
assert is_trustworthiness_score_json_format(score)
@@ -339,7 +357,10 @@ def test_tlm_chat_completion_score_missing_messages() -> None:
339357
],
340358
ids=["bad_arguments", "good_arguments"],
341359
)
342-
def test_tlm_chat_completion_score_tool_calls(arguments: str, condition: Callable[[TLMScore], bool]) -> None:
360+
@pytest.mark.parametrize("is_async", [False, True], ids=["sync", "async"])
361+
def test_tlm_chat_completion_score_tool_calls(
362+
arguments: str, condition: Callable[[TLMScore], bool], is_async: bool
363+
) -> None:
343364
tlm_chat = TLMChatCompletion()
344365

345366
openai_kwargs = {
@@ -390,7 +411,7 @@ def test_tlm_chat_completion_score_tool_calls(arguments: str, condition: Callabl
390411
object="chat.completion",
391412
)
392413

393-
score = tlm_chat.score(response=response, **openai_kwargs)
414+
score = _run_score_sync_or_async(tlm_chat, response, is_async, **openai_kwargs)
394415

395416
assert score is not None
396417
assert condition(score)

0 commit comments

Comments
 (0)