cleanlab
diff --git a/‎CHANGELOG.md‎
Lines changed: 8 additions & 1 deletion b/‎CHANGELOG.md‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎src/cleanlab_tlm/__about__.py‎
Lines changed: 1 addition & 1 deletion b/‎src/cleanlab_tlm/__about__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/cleanlab_tlm/internal/api/api.py‎
Lines changed: 43 additions & 0 deletions b/‎src/cleanlab_tlm/internal/api/api.py‎
Lines changed: 43 additions & 0 deletions
diff --git a/‎src/cleanlab_tlm/internal/validation.py‎
Lines changed: 161 additions & 3 deletions b/‎src/cleanlab_tlm/internal/validation.py‎
Lines changed: 161 additions & 3 deletions
@@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [1.1.30] - 2025-09-09
+
+### Added
+
+- Add `get_explanation()` API for TLM, TrustworthyRAG and TLMChatCompletions
+
 ## [1.1.29] - 2025-09-03
 
 ### Added
@@ -341,7 +347,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Release of the Cleanlab TLM Python client.
 
 
-[Unreleased]: https://github.com/cleanlab/cleanlab-tlm/compare/v1.1.29...HEAD
+[Unreleased]: https://github.com/cleanlab/cleanlab-tlm/compare/v1.1.30...HEAD
+[1.1.30]: https://github.com/cleanlab/cleanlab-tlm/compare/v1.1.29...v1.1.30
 [1.1.29]: https://github.com/cleanlab/cleanlab-tlm/compare/v1.1.28...v1.1.29
 [1.1.28]: https://github.com/cleanlab/cleanlab-tlm/compare/v1.1.27...v1.1.28
 [1.1.27]: https://github.com/cleanlab/cleanlab-tlm/compare/v1.1.26...v1.1.27
 
@@ -1,2 +1,2 @@
 # SPDX-License-Identifier: MIT
-__version__ = "1.1.29"
+__version__ = "1.1.30"
@@ -55,6 +55,7 @@
 tlm_base_url = f"{base_url}/v0/trustworthy_llm"
 tlm_rag_base_url = f"{base_url}/v1/rag_trustworthy_llm"
 tlm_openai_base_url = f"{base_url}/v1/openai_trustworthy_llm"
+tlm_explanation_base_url = f"{base_url}/v1/tlm_explanation"
 
 
 def _construct_headers(api_key: Optional[str], content_type: Optional[str] = "application/json") -> JSONDict:
@@ -577,3 +578,45 @@ async def tlm_chat_completions_score(
             await client_session.close()
 
     return cast(JSONDict, res_json)
+
+
+@tlm_retry
+async def tlm_get_explanation(
+    api_key: str,
+    prompt: str,
+    formatted_tlm_result: dict[str, Any],
+    options: Optional[JSONDict],
+    rate_handler: TlmRateHandler,
+    client_session: Optional[aiohttp.ClientSession] = None,
+    batch_index: Optional[int] = None,
+) -> JSONDict:
+    local_scoped_client = False
+    if not client_session:
+        client_session = aiohttp.ClientSession()
+        local_scoped_client = True
+
+    try:
+        async with rate_handler:
+            res = await client_session.post(
+                f"{tlm_explanation_base_url}/get_explanation",
+                json={
+                    _TLM_PROMPT_KEY: prompt,
+                    _TLM_RESPONSE_KEY: formatted_tlm_result,
+                    _TLM_OPTIONS_KEY: options or {},
+                },
+                headers=_construct_headers(api_key),
+            )
+
+            res_json = await res.json()
+
+            await handle_api_key_error_from_resp(res)
+            await handle_http_bad_request_error_from_resp(res)
+            handle_rate_limit_error_from_resp(res)
+            await handle_tlm_client_error_from_resp(res, batch_index)
+            await handle_tlm_api_error_from_resp(res, batch_index)
+
+    finally:
+        if local_scoped_client:
+            await client_session.close()
+
+    return cast(JSONDict, res_json)
@@ -3,7 +3,7 @@
 import os
 import warnings
 from collections.abc import Sequence
-from typing import TYPE_CHECKING, Any, Callable, Optional, Union
+from typing import TYPE_CHECKING, Any, Callable, Optional, Union, cast
 
 from cleanlab_tlm.errors import ValidationError
 from cleanlab_tlm.internal.constants import (
@@ -26,8 +26,8 @@
 from cleanlab_tlm.internal.types import Task
 
 if TYPE_CHECKING:
-    from cleanlab_tlm.tlm import TLMOptions
-    from cleanlab_tlm.utils.rag import Eval
+    from cleanlab_tlm.tlm import TLMOptions, TLMResponse, TLMScore
+    from cleanlab_tlm.utils.rag import Eval, TrustworthyRAGResponse, TrustworthyRAGScore
 
 SKIP_VALIDATE_TLM_OPTIONS: bool = os.environ.get("CLEANLAB_TLM_SKIP_VALIDATE_TLM_OPTIONS", "false").lower() == "true"
 
@@ -366,6 +366,164 @@ def tlm_score_process_response_and_kwargs(
     return [dict(zip(combined_response_keys, values)) for values in combined_response_values_transposed]
 
 
+def tlm_explanation_format_tlm_result(
+    tlm_result: Union[TLMResponse, Sequence[TLMResponse], TLMScore, Sequence[TLMScore]],
+    response: Optional[Union[str, Sequence[str]]] = None,
+) -> Union[dict[str, Any], list[dict[str, Any]]]:
+    if isinstance(tlm_result, Sequence):
+        if not all(isinstance(r, dict) for r in tlm_result):
+            raise ValidationError("all items in the tlm_result sequence must be dicts")
+
+        if not all("trustworthiness_score" in r for r in tlm_result):
+            raise ValidationError("all items in the tlm_result sequence must contain a 'trustworthiness_score' key")
+
+        # for .get_trustworthiness_score() cases, the response is passed in as a separate argument
+        if not all("response" in r for r in tlm_result):
+            if response is None:
+                raise ValidationError(
+                    "'response' is required if not provided in tlm_result, pass it in using the 'response' argument"
+                )
+            if not isinstance(response, Sequence) or isinstance(response, str):
+                raise ValidationError("response must be a sequence when tlm_result is a sequence")
+            if len(response) != len(tlm_result):
+                raise ValidationError("response and score sequences must have the same length")
+            if not all(isinstance(r, str) for r in response):
+                raise ValidationError("all items in the response sequence must be strings")
+
+            return [{"response": r, **tlm_result} for r, tlm_result in zip(response, tlm_result)]
+
+        # for .prompt() cases, the response is provided in the tlm_result dict
+        if response is not None:
+            raise ValidationError(
+                "response should only be provided once, either using the 'response' argument or in 'tlm_result'"
+            )
+
+        return cast(list[dict[str, Any]], tlm_result)
+
+    if not isinstance(tlm_result, dict):
+        raise ValidationError("tlm_result must be a dict or a sequence of dicts")
+
+    if "trustworthiness_score" not in tlm_result:
+        raise ValidationError("tlm_result must contain a 'trustworthiness' key")
+
+    # the .get_trustworthiness_score() case
+    if "response" not in tlm_result:
+        if response is None:
+            raise ValidationError(
+                "'response' is required if not provided in tlm_result, pass it in using the 'response' argument"
+            )
+        if not isinstance(response, str):
+            raise ValidationError("response must be a string when tlm_result is a dict")
+        return {"response": response, **tlm_result}
+
+    # the .prompt() case
+    if response is not None:
+        raise ValidationError(
+            "response should only be provided once, either using the 'response' argument or in 'tlm_result'"
+        )
+    return cast(dict[str, Any], tlm_result)
+
+
+def tlm_explanation_format_trustworthy_rag_result(
+    tlm_result: Union[
+        TrustworthyRAGResponse,
+        Sequence[TrustworthyRAGResponse],
+        TrustworthyRAGScore,
+        Sequence[TrustworthyRAGScore],
+    ],
+    response: Optional[Union[str, Sequence[str]]] = None,
+) -> Union[dict[str, Any], list[dict[str, Any]]]:
+    if isinstance(tlm_result, Sequence):
+        if not all(isinstance(r, dict) for r in tlm_result):
+            raise ValidationError("all items in the tlm_result sequence must be dicts")
+
+        if not all(
+            "trustworthiness" in r
+            and isinstance(r["trustworthiness"], dict)
+            and "score" in r["trustworthiness"]
+            and r["trustworthiness"]["score"] is not None
+            for r in tlm_result
+        ):
+            raise ValidationError(
+                "all items in the tlm_result sequence must contain a 'trustworthiness' dict with a non-None 'score' key"
+            )
+
+        # for .score() cases, the response is passed in as a separate argument
+        if not all("response" in r for r in tlm_result):
+            if response is None:
+                raise ValidationError(
+                    "'response' is required if not provided in tlm_result, pass it in using the 'response' argument"
+                )
+            if not isinstance(response, Sequence) or isinstance(response, str):
+                raise ValidationError("response must be a sequence when tlm_result is a sequence")
+            if len(response) != len(tlm_result):
+                raise ValidationError("response and score sequences must have the same length")
+            if not all(isinstance(r, str) for r in response):
+                raise ValidationError("all items in the response sequence must be strings")
+
+            return [
+                {
+                    "response": resp,
+                    "trustworthiness_score": res["trustworthiness"]["score"],  # type: ignore
+                    **{k: v for k, v in res["trustworthiness"].items() if k != "score"},  # type: ignore
+                }
+                for resp, res in zip(response, tlm_result)
+            ]
+
+        # for .generate() cases, the response is provided in the tlm_result dict
+        if response is not None:
+            raise ValidationError(
+                "response should only be provided once, either using the 'response' argument or in 'tlm_result'"
+            )
+
+        return [
+            {
+                "response": res["response"],
+                "trustworthiness_score": res["trustworthiness"]["score"],  # type: ignore
+                **{k: v for k, v in res["trustworthiness"].items() if k != "score"},  # type: ignore
+            }
+            for res in tlm_result
+        ]
+
+    if not isinstance(tlm_result, dict):
+        raise ValidationError("tlm_result must be a dict or a sequence of dicts")
+
+    if (
+        "trustworthiness" not in tlm_result
+        or not isinstance(tlm_result["trustworthiness"], dict)
+        or "score" not in tlm_result["trustworthiness"]
+        or tlm_result["trustworthiness"]["score"] is None
+    ):
+        raise ValidationError("tlm_result must contain a 'trustworthiness' dict with a non-None 'score' key")
+
+    # the .score() case
+    if "response" not in tlm_result:
+        if response is None:
+            raise ValidationError(
+                "'response' is required if not provided in tlm_result, pass it in using the 'response' argument"
+            )
+        if not isinstance(response, str):
+            raise ValidationError("response must be a string when tlm_result is a dict")
+
+        return {
+            "response": response,
+            "trustworthiness_score": tlm_result["trustworthiness"]["score"],
+            **{k: v for k, v in tlm_result["trustworthiness"].items() if k != "score"},
+        }
+
+    # the .generate() case
+    if response is not None:
+        raise ValidationError(
+            "response should only be provided once, either using the 'response' argument or in 'tlm_result'"
+        )
+
+    return {
+        "response": tlm_result["response"],
+        "trustworthiness_score": tlm_result["trustworthiness"]["score"],
+        **{k: v for k, v in tlm_result["trustworthiness"].items() if k != "score"},
+    }
+
+
 def validate_tlm_lite_score_options(score_options: Any) -> None:
     invalid_score_keys = set(score_options.keys()).intersection(INVALID_SCORE_OPTIONS)
     if invalid_score_keys:
Original file line number	Diff line number	Diff line change
`@@ -1,2 +1,2 @@`
`1`	`1`	`# SPDX-License-Identifier: MIT`
`2`		`-__version__ = "1.1.29"`
	`2`	`+__version__ = "1.1.30"`