1+ import asyncio
12import json
2- from typing import Callable
3+ from typing import Any , Callable
34
45import pytest
56from openai .types .chat import ChatCompletion , ChatCompletionMessage
2324test_response = make_text_unique (TEST_RESPONSE )
2425
2526
27+ def _run_score_sync_or_async (
28+ tlm_chat : TLMChatCompletion ,
29+ response : ChatCompletion ,
30+ is_async : bool ,
31+ ** openai_kwargs : Any ,
32+ ) -> TLMScore :
33+ """Runs either sync or async score method based on is_async parameter."""
34+ if is_async :
35+ return asyncio .run (tlm_chat .score_async (response = response , ** openai_kwargs ))
36+ return tlm_chat .score (response = response , ** openai_kwargs )
37+
38+
2639def test_get_model_name () -> None :
2740 tlm = TLMChatCompletion ()
2841 model_name = tlm .get_model_name ()
@@ -35,7 +48,8 @@ def test_get_model_name() -> None:
3548 "quality_preset" ,
3649 ["base" , "low" , "medium" , "high" , "best" ],
3750)
38- def test_tlm_chat_completion_score (quality_preset : TLMQualityPreset ) -> None :
51+ @pytest .mark .parametrize ("is_async" , [False , True ], ids = ["sync" , "async" ])
52+ def test_tlm_chat_completion_score (quality_preset : TLMQualityPreset , is_async : bool ) -> None :
3953 tlm_chat = TLMChatCompletion (quality_preset = quality_preset )
4054 openai_kwargs = {
4155 "model" : "gpt-4.1-mini" ,
@@ -55,13 +69,14 @@ def test_tlm_chat_completion_score(quality_preset: TLMQualityPreset) -> None:
5569 object = "chat.completion" ,
5670 )
5771
58- score = tlm_chat . score ( response = response , ** openai_kwargs )
72+ score = _run_score_sync_or_async ( tlm_chat , response , is_async , ** openai_kwargs )
5973
6074 assert score is not None
6175 assert is_trustworthiness_score_json_format (score )
6276
6377
64- def test_tlm_chat_completion_score_with_options () -> None :
78+ @pytest .mark .parametrize ("is_async" , [False , True ], ids = ["sync" , "async" ])
79+ def test_tlm_chat_completion_score_with_options (is_async : bool ) -> None :
6580 tlm_chat = TLMChatCompletion (options = {"log" : ["explanation" , "perplexity" ]})
6681 openai_kwargs = {
6782 "model" : "gpt-4.1-mini" ,
@@ -81,13 +96,14 @@ def test_tlm_chat_completion_score_with_options() -> None:
8196 object = "chat.completion" ,
8297 )
8398
84- score = tlm_chat . score ( response = response , ** openai_kwargs )
99+ score = _run_score_sync_or_async ( tlm_chat , response , is_async , ** openai_kwargs )
85100
86101 assert score is not None
87102 assert is_trustworthiness_score_json_format (score )
88103
89104
90- def test_tlm_chat_completion_score_with_tools () -> None :
105+ @pytest .mark .parametrize ("is_async" , [False , True ], ids = ["sync" , "async" ])
106+ def test_tlm_chat_completion_score_with_tools (is_async : bool ) -> None :
91107 tlm_chat = TLMChatCompletion ()
92108 openai_kwargs = {
93109 "model" : "gpt-4.1-mini" ,
@@ -126,13 +142,14 @@ def test_tlm_chat_completion_score_with_tools() -> None:
126142 object = "chat.completion" ,
127143 )
128144
129- score = tlm_chat . score ( response = response , ** openai_kwargs )
145+ score = _run_score_sync_or_async ( tlm_chat , response , is_async , ** openai_kwargs )
130146
131147 assert score is not None
132148 assert is_trustworthiness_score_json_format (score )
133149
134150
135- def test_tlm_chat_completion_score_with_structured_output () -> None :
151+ @pytest .mark .parametrize ("is_async" , [False , True ], ids = ["sync" , "async" ])
152+ def test_tlm_chat_completion_score_with_structured_output (is_async : bool ) -> None :
136153 tlm_chat = TLMChatCompletion ()
137154 openai_kwargs = {
138155 "model" : "gpt-4.1-mini" ,
@@ -200,13 +217,14 @@ def test_tlm_chat_completion_score_with_structured_output() -> None:
200217 object = "chat.completion" ,
201218 )
202219
203- score = tlm_chat . score ( response = response , ** openai_kwargs )
220+ score = _run_score_sync_or_async ( tlm_chat , response , is_async , ** openai_kwargs )
204221
205222 assert score is not None
206223 assert is_trustworthiness_score_json_format (score )
207224
208225
209- def test_tlm_chat_completion_structured_output_per_field_scoring () -> None :
226+ @pytest .mark .parametrize ("is_async" , [False , True ], ids = ["sync" , "async" ])
227+ def test_tlm_chat_completion_structured_output_per_field_scoring (is_async : bool ) -> None :
210228 tlm_chat = TLMChatCompletion (options = {"log" : ["per_field_score" ]})
211229
212230 openai_kwargs = {
@@ -275,7 +293,7 @@ def test_tlm_chat_completion_structured_output_per_field_scoring() -> None:
275293 object = "chat.completion" ,
276294 )
277295
278- score = tlm_chat . score ( response = response , ** openai_kwargs )
296+ score = _run_score_sync_or_async ( tlm_chat , response , is_async , ** openai_kwargs )
279297
280298 assert score is not None
281299 assert is_trustworthiness_score_json_format (score )
@@ -339,7 +357,10 @@ def test_tlm_chat_completion_score_missing_messages() -> None:
339357 ],
340358 ids = ["bad_arguments" , "good_arguments" ],
341359)
342- def test_tlm_chat_completion_score_tool_calls (arguments : str , condition : Callable [[TLMScore ], bool ]) -> None :
360+ @pytest .mark .parametrize ("is_async" , [False , True ], ids = ["sync" , "async" ])
361+ def test_tlm_chat_completion_score_tool_calls (
362+ arguments : str , condition : Callable [[TLMScore ], bool ], is_async : bool
363+ ) -> None :
343364 tlm_chat = TLMChatCompletion ()
344365
345366 openai_kwargs = {
@@ -390,7 +411,7 @@ def test_tlm_chat_completion_score_tool_calls(arguments: str, condition: Callabl
390411 object = "chat.completion" ,
391412 )
392413
393- score = tlm_chat . score ( response = response , ** openai_kwargs )
414+ score = _run_score_sync_or_async ( tlm_chat , response , is_async , ** openai_kwargs )
394415
395416 assert score is not None
396417 assert condition (score )
0 commit comments