Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions tools/who_what_benchmark/tests/constants.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be a conftest.py. should_cleanup and wwb_cache_path could be session fixtures.

Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from pathlib import Path
import os
import tempfile


WWB_CACHE_PATH = Path(os.path.join(os.environ.get('OV_CACHE', tempfile.TemporaryDirectory()), 'wwb_cache'))
SHOULD_CLEANUP = bool(os.environ.get('CLEANUP_CACHE', None))
27 changes: 17 additions & 10 deletions tools/who_what_benchmark/tests/test_cli_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@
import subprocess # nosec B404
import os
import shutil
from pathlib import Path
import pytest
import logging
import tempfile
import re
from constants import WWB_CACHE_PATH, SHOULD_CLEANUP


logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

MODEL_CACHE = tempfile.mkdtemp()
MODEL_CACHE = WWB_CACHE_PATH
OV_IMAGE_MODELS = ["echarlaix/tiny-random-stable-diffusion-xl",
"yujiepan/stable-diffusion-3-tiny-random",
"katuni4ka/tiny-random-flux",
Expand All @@ -37,13 +38,14 @@ def run_wwb(args):

def setup_module():
for model_id in OV_IMAGE_MODELS:
MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--"))
MODEL_PATH = MODEL_CACHE.joinpath(model_id.replace("/", "--"))
subprocess.run(["optimum-cli", "export", "openvino", "--model", model_id, MODEL_PATH], capture_output=True, text=True)


def teardown_module():
logger.info("Remove models")
shutil.rmtree(MODEL_CACHE)
if SHOULD_CLEANUP:
logger.info("Removing models")
shutil.rmtree(MODEL_CACHE)


def get_similarity(output: str) -> float:
Expand All @@ -67,11 +69,13 @@ def get_similarity(output: str) -> float:
],
)
def test_image_model_types(model_id, model_type, backend, tmp_path):
MODEL_PATH = MODEL_CACHE.joinpath(model_id.replace("/", "--"))
MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id
wwb_args = [
"--base-model",
model_id,
MODEL_PATH,
"--target-model",
model_id,
MODEL_PATH,
"--num-samples",
"1",
"--gt-data",
Expand Down Expand Up @@ -110,11 +114,12 @@ def test_image_model_genai(model_id, model_type, tmp_path):
pytest.xfail("Segfault. Ticket 170877")

GT_FILE = tmp_path / "gt.csv"
MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--"))
MODEL_PATH = MODEL_CACHE.joinpath(model_id.replace("/", "--"))
MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id
Copy link

Copilot AI Oct 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fallback logic reassigns MODEL_PATH from a Path object to a string, creating inconsistent types. Consider using str(MODEL_PATH) in the conditional or ensuring consistent Path usage throughout.

Suggested change
MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id
MODEL_PATH = str(MODEL_PATH) if MODEL_PATH.exists() else model_id

Copilot uses AI. Check for mistakes.

Copy link
Contributor Author

@akashchi akashchi Oct 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The underlying from_pretrained method accepts both str/Path types, so this should not be an issue.


run_wwb([
"--base-model",
model_id,
MODEL_PATH,
"--num-samples",
"1",
"--gt-data",
Expand Down Expand Up @@ -195,9 +200,11 @@ def test_image_model_genai(model_id, model_type, tmp_path):
)
def test_image_custom_dataset(model_id, model_type, backend, tmp_path):
GT_FILE = tmp_path / "test_sd.csv"
MODEL_PATH = MODEL_CACHE.joinpath(model_id.replace("/", "--"))
MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id
wwb_args = [
"--base-model",
model_id,
MODEL_PATH,
"--num-samples",
"1",
"--gt-data",
Expand Down
52 changes: 30 additions & 22 deletions tools/who_what_benchmark/tests/test_cli_text.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import os
import shutil
import tempfile
import pandas as pd
import pytest
import logging
import json
import sys

from constants import WWB_CACHE_PATH, SHOULD_CLEANUP

from transformers import AutoTokenizer
from optimum.intel.openvino import OVModelForCausalLM, OVWeightQuantizationConfig

Expand All @@ -18,35 +19,38 @@


model_id = "facebook/opt-125m"
tmp_dir = tempfile.mkdtemp()
base_model_path = os.path.join(tmp_dir, "opt125m")
target_model_path = os.path.join(tmp_dir, "opt125m_int8")
cache_dir = WWB_CACHE_PATH
base_model_path = os.path.join(cache_dir, "opt125m")
target_model_path = os.path.join(cache_dir, "opt125m_int8")

gptq_model_id = "ybelkada/opt-125m-gptq-4bit"
awq_model_id = "TitanML/tiny-mixtral-AWQ-4bit"


def setup_module():
from optimum.exporters.openvino.convert import export_tokenizer

logger.info("Create models")
tokenizer = AutoTokenizer.from_pretrained(model_id)
base_model = OVModelForCausalLM.from_pretrained(model_id)
base_model.save_pretrained(base_model_path)
tokenizer.save_pretrained(base_model_path)
export_tokenizer(tokenizer, base_model_path)

target_model = OVModelForCausalLM.from_pretrained(
model_id, quantization_config=OVWeightQuantizationConfig(bits=8)
)
target_model.save_pretrained(target_model_path)
tokenizer.save_pretrained(target_model_path)
export_tokenizer(tokenizer, target_model_path)

if not os.path.exists(base_model_path):
logger.info("Create models")
tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=WWB_CACHE_PATH)
base_model = OVModelForCausalLM.from_pretrained(model_id, cache_dir=WWB_CACHE_PATH)
base_model.save_pretrained(base_model_path)
tokenizer.save_pretrained(base_model_path)
export_tokenizer(tokenizer, base_model_path)

if not os.path.exists(target_model_path):
target_model = OVModelForCausalLM.from_pretrained(
model_id, quantization_config=OVWeightQuantizationConfig(bits=8), cache_dir=WWB_CACHE_PATH
)
target_model.save_pretrained(target_model_path)
tokenizer.save_pretrained(target_model_path)
export_tokenizer(tokenizer, target_model_path)


def teardown_module():
logger.info("Remove models")
shutil.rmtree(tmp_dir)
if SHOULD_CLEANUP:
logger.info("Removing models")
shutil.rmtree(cache_dir)


def test_text_target_model():
Expand Down Expand Up @@ -138,9 +142,11 @@ def test_text_verbose():

def test_text_language(tmp_path):
temp_file_name = tmp_path / "gt.csv"
MODEL_PATH = WWB_CACHE_PATH.joinpath('Qwen/Qwen2-0.5B'.replace("/", "--"))
MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else 'Qwen/Qwen2-0.5B'
run_wwb([
"--base-model",
"Qwen/Qwen2-0.5B",
MODEL_PATH,
"--gt-data",
temp_file_name,
"--num-samples",
Expand Down Expand Up @@ -170,9 +176,11 @@ def test_text_language(tmp_path):
)
def test_text_hf_model(model_id, tmp_path):
temp_file_name = tmp_path / "gt.csv"
MODEL_PATH = WWB_CACHE_PATH.joinpath(model_id.replace("/", "--"))
MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id
run_wwb([
"--base-model",
model_id,
MODEL_PATH,
"--gt-data",
temp_file_name,
"--num-samples",
Expand Down
14 changes: 8 additions & 6 deletions tools/who_what_benchmark/tests/test_cli_vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pytest
import logging
from test_cli_image import run_wwb
from constants import WWB_CACHE_PATH


logging.basicConfig(level=logging.INFO)
Expand All @@ -14,9 +15,10 @@
("katuni4ka/tiny-random-llava", "visual-text"),
],
)
def test_vlm_basic(model_id, model_type, tmp_path):
GT_FILE = tmp_path / "gt.csv"
MODEL_PATH = tmp_path / model_id.replace("/", "--")
def test_vlm_basic(model_id, model_type):
GT_FILE = WWB_CACHE_PATH / "gt.csv"
MODEL_PATH = WWB_CACHE_PATH.joinpath(model_id.replace("/", "--"))
MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id

result = subprocess.run(["optimum-cli", "export",
"openvino", "-m", model_id,
Expand All @@ -31,7 +33,7 @@ def test_vlm_basic(model_id, model_type, tmp_path):
# Collect reference with HF model
run_wwb([
"--base-model",
model_id,
MODEL_PATH,
"--num-samples",
"1",
"--gt-data",
Expand Down Expand Up @@ -71,13 +73,13 @@ def test_vlm_basic(model_id, model_type, tmp_path):
model_type,
"--genai",
"--output",
tmp_path,
WWB_CACHE_PATH,
])

# test w/o models
run_wwb([
"--target-data",
tmp_path / "target.csv",
WWB_CACHE_PATH / "target.csv",
"--num-samples",
"1",
"--gt-data",
Expand Down
Loading