diff --git a/examples/causal_lm_with_uncertainty.ipynb b/examples/causal_lm_with_uncertainty.ipynb new file mode 100644 index 00000000..22d8e5a8 --- /dev/null +++ b/examples/causal_lm_with_uncertainty.ipynb @@ -0,0 +1,267 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "\n", + "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Original inference with LLM" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[2025-10-25 22:56:14,099] [INFO] [real_accelerator.py:219:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/artemshelmanov/conda/compiler_compat/ld: cannot find -laio: No such file or directory\n", + "collect2: error: ld returned 1 exit status\n", + "/home/artemshelmanov/conda/compiler_compat/ld: cannot find -lcufile: No such file or directory\n", + "collect2: error: ld returned 1 exit status\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "dfcc1618364c4b5388a1a18024558a77", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/4 [00:00 Dict: """Tokenizes input texts using the model's tokenizer. diff --git a/src/lm_polygraph/utils/causal_lm_with_uncertainty.py b/src/lm_polygraph/utils/causal_lm_with_uncertainty.py new file mode 100644 index 00000000..72c59960 --- /dev/null +++ b/src/lm_polygraph/utils/causal_lm_with_uncertainty.py @@ -0,0 +1,63 @@ +from lm_polygraph.model_adapters import WhiteboxModelBasic +from transformers.generation.utils import GenerateDecoderOnlyOutput +from dataclasses import dataclass, asdict +from typing import Optional, List, Union +import torch + + +@dataclass +class GenerateDecoderOnlyOutputWithUncertainty(GenerateDecoderOnlyOutput): + """Extends GenerateDecoderOnlyOutput to include uncertainty scores""" + + uncertainty_score: Optional[Union[float, List[float], torch.Tensor]] = None + + +class CausalLMWithUncertainty: + def __init__(self, llm, tokenizer, stat_calculators, estimator): + self.llm = llm + self.tokenizer = tokenizer + self.stat_calculators = stat_calculators + self.estimator = estimator + + def generate(self, input_ids, attention_mask=None, **kwargs): + max_new_tokens = kwargs.pop("max_new_tokens", None) + self.model_adapter = WhiteboxModelBasic( + model=self.llm, + tokenizer=self.tokenizer, + tokenizer_args={ + "add_special_tokens": False, + "return_tensors": "pt", + "padding": True, + "truncation": True, + }, + model_type="CausalLM", + generation_parameters=kwargs, + ) + + deps = dict() + deps["model_inputs"] = { + "input_ids": input_ids, + **kwargs, + } + texts = self.tokenizer.batch_decode(input_ids) + for calc in self.stat_calculators: + deps.update( + calc( + deps, + texts=texts, + model=self.model_adapter, + max_new_tokens=max_new_tokens, + ) + ) + + uncertainty_score = self.estimator(deps) + + raw_out = deps["out"] + out_with_uncertainty = GenerateDecoderOnlyOutputWithUncertainty( + **asdict(raw_out), + uncertainty_score=uncertainty_score, + ) + return out_with_uncertainty + + def device(self): + return self.llm.device diff --git a/test/test_high_level_api.py b/test/test_high_level_api.py new file mode 100644 index 00000000..bff61e99 --- /dev/null +++ b/test/test_high_level_api.py @@ -0,0 +1,42 @@ +from transformers import AutoModelForCausalLM, AutoTokenizer + +from lm_polygraph.estimators import MeanTokenEntropy +from lm_polygraph.stat_calculators import InferCausalLMCalculator, EntropyCalculator +from lm_polygraph.utils.causal_lm_with_uncertainty import CausalLMWithUncertainty + +import torch + + +def test_CausalLMWithUncertainty(): + if torch.cuda.is_available(): + device = "cuda" + else: + device = "cpu" + + model_name = "Qwen/Qwen2.5-0.5B-Instruct" + llm = AutoModelForCausalLM.from_pretrained(model_name) + tokenizer = AutoTokenizer.from_pretrained(model_name) + tokenizer.pad_token = tokenizer.eos_token + llm = llm.to(device) + + stat_calculators = [InferCausalLMCalculator(tokenize=False), EntropyCalculator()] + estimator = MeanTokenEntropy() + llm_with_uncertainty = CausalLMWithUncertainty( + llm, tokenizer, stat_calculators, estimator + ) + + prompts = ["Write a short story about a robot learning to paint.\n"] + + chats = [[{"role": "user", "content": prompt}] for prompt in prompts] + chat_prompts = tokenizer.apply_chat_template( + chats, add_generation_prompt=True, tokenize=False + ) + inputs = tokenizer(chat_prompts, return_tensors="pt").to(device) + + output = llm_with_uncertainty.generate( + **inputs, max_new_tokens=30, temperature=0.7, do_sample=True + ) + + print("LLM output:") + print(tokenizer.decode(output.sequences[0], skip_special_tokens=True)) + print("Uncertainty score: ", output.uncertainty_score)