Skip to content
Open
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ env:
SCCACHE_AZURE_KEY_PREFIX: genai/ubuntu/22_04/x64
HF_HOME: /mount/caches/huggingface/lin
OV_CACHE: /mount/caches/huggingface/.ov_cache/lin
HF_HUB_VERBOSITY: debug
TRANSFORMERS_VERBOSITY: debug
OPENVINO_LOG_LEVEL: 5
GENAI_ARCHIVE_NAME: genai.tar.gz
GENAI_SAMPLES_NAME: genai_samples.tar.gz
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ env:
CCACHE_MAXSIZE: 500Mi
HF_HOME: C:/mount/caches/huggingface/win
OV_CACHE: C:/mount/caches/huggingface/.ov_cache/win/
HF_HUB_VERBOSITY: debug
TRANSFORMERS_VERBOSITY: debug
OPENVINO_LOG_LEVEL: 5
ARTIFACTS_SHARE: '/mount/build-artifacts'
BASE_PRODUCT_TYPE: public_windows_vs2022
Expand Down
26 changes: 26 additions & 0 deletions tools/who_what_benchmark/tests/constants.py
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could be a conftest.py. should_cleanup and wwb_cache_path could be session fixtures.

Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from pathlib import Path
import os
import tempfile
from datetime import datetime
from importlib import metadata


SHOULD_CLEANUP = os.environ.get("CLEANUP_CACHE", "").lower() in ("1", "true", "yes")


def get_wwb_cache_dir(temp_dir=tempfile.TemporaryDirectory()) -> Path:
if "OV_CACHE" in os.environ:
date_subfolder = datetime.now().strftime("%Y%m%d")
ov_cache = os.path.join(os.environ["OV_CACHE"], date_subfolder)
try:
optimum_intel_version = metadata.version("optimum-intel")
transformers_version = metadata.version("transformers")
ov_cache = os.path.join(ov_cache, f"optimum-intel-{optimum_intel_version}_transformers-{transformers_version}")
Comment on lines +14 to +18
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
ov_cache = os.path.join(os.environ["OV_CACHE"], date_subfolder)
try:
optimum_intel_version = metadata.version("optimum-intel")
transformers_version = metadata.version("transformers")
ov_cache = os.path.join(ov_cache, f"optimum-intel-{optimum_intel_version}_transformers-{transformers_version}")
ov_cache = Path(os.environ["OV_CACHE"]) / date_subfolder
try:
optimum_intel_version = metadata.version("optimum-intel")
transformers_version = metadata.version("transformers")
ov_cache = ov_cache / f"optimum-intel-{optimum_intel_version}_transformers-{transformers_version}"

except metadata.PackageNotFoundError:
pass
else:
ov_cache = temp_dir.name
return Path(ov_cache).joinpath("wwb_cache")


WWB_CACHE_PATH = get_wwb_cache_dir()
16 changes: 9 additions & 7 deletions tools/who_what_benchmark/tests/test_cli_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@
import shutil
import pytest
import logging
import tempfile
import re
from constants import WWB_CACHE_PATH, SHOULD_CLEANUP


logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

MODEL_CACHE = tempfile.mkdtemp()
MODEL_CACHE = WWB_CACHE_PATH
OV_IMAGE_MODELS = ["echarlaix/tiny-random-stable-diffusion-xl",
"yujiepan/stable-diffusion-3-tiny-random",
"katuni4ka/tiny-random-flux",
Expand All @@ -38,13 +38,14 @@ def run_wwb(args):

def setup_module():
for model_id in OV_IMAGE_MODELS:
MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--"))
MODEL_PATH = MODEL_CACHE.joinpath(model_id.replace("/", "--"))
subprocess.run(["optimum-cli", "export", "openvino", "--model", model_id, MODEL_PATH], capture_output=True, text=True)


def teardown_module():
logger.info("Remove models")
shutil.rmtree(MODEL_CACHE)
if SHOULD_CLEANUP:
logger.info("Removing models")
shutil.rmtree(MODEL_CACHE)


def get_similarity(output: str) -> float:
Expand Down Expand Up @@ -121,11 +122,12 @@ def test_image_model_genai(model_id, model_type, tmp_path):
pytest.xfail("Ticket 173169")

GT_FILE = tmp_path / "gt.csv"
MODEL_PATH = os.path.join(MODEL_CACHE, model_id.replace("/", "--"))
MODEL_PATH = MODEL_CACHE.joinpath(model_id.replace("/", "--"))
MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id
Copy link

Copilot AI Oct 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fallback logic reassigns MODEL_PATH from a Path object to a string, creating inconsistent types. Consider using str(MODEL_PATH) in the conditional or ensuring consistent Path usage throughout.

Suggested change
MODEL_PATH = MODEL_PATH if MODEL_PATH.exists() else model_id
MODEL_PATH = str(MODEL_PATH) if MODEL_PATH.exists() else model_id

Copilot uses AI. Check for mistakes.

Copy link
Contributor Author

@akashchi akashchi Oct 6, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The underlying from_pretrained method accepts both str/Path types, so this should not be an issue.


run_wwb([
"--base-model",
model_id,
MODEL_PATH,
"--num-samples",
"1",
"--gt-data",
Expand Down
40 changes: 22 additions & 18 deletions tools/who_what_benchmark/tests/test_cli_text.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import os
import shutil
import tempfile
import pandas as pd
import pytest
import logging
import json
import sys

from constants import WWB_CACHE_PATH, SHOULD_CLEANUP

from transformers import AutoTokenizer
from optimum.intel.openvino import OVModelForCausalLM, OVWeightQuantizationConfig

Expand All @@ -18,9 +19,9 @@


model_id = "facebook/opt-125m"
tmp_dir = tempfile.mkdtemp()
base_model_path = os.path.join(tmp_dir, "opt125m")
target_model_path = os.path.join(tmp_dir, "opt125m_int8")
cache_dir = WWB_CACHE_PATH
base_model_path = os.path.join(cache_dir, "opt125m")
target_model_path = os.path.join(cache_dir, "opt125m_int8")

gptq_model_id = "ybelkada/opt-125m-gptq-4bit"
awq_model_id = "TitanML/tiny-mixtral-AWQ-4bit"
Expand All @@ -29,24 +30,27 @@
def setup_module():
from optimum.exporters.openvino.convert import export_tokenizer

logger.info("Create models")
tokenizer = AutoTokenizer.from_pretrained(model_id)
base_model = OVModelForCausalLM.from_pretrained(model_id)
base_model.save_pretrained(base_model_path)
tokenizer.save_pretrained(base_model_path)
export_tokenizer(tokenizer, base_model_path)
if not os.path.exists(base_model_path):
logger.info("Create models")
tokenizer = AutoTokenizer.from_pretrained(model_id, cache_dir=WWB_CACHE_PATH)
base_model = OVModelForCausalLM.from_pretrained(model_id, cache_dir=WWB_CACHE_PATH)
base_model.save_pretrained(base_model_path)
tokenizer.save_pretrained(base_model_path)
export_tokenizer(tokenizer, base_model_path)

target_model = OVModelForCausalLM.from_pretrained(
model_id, quantization_config=OVWeightQuantizationConfig(bits=8)
)
target_model.save_pretrained(target_model_path)
tokenizer.save_pretrained(target_model_path)
export_tokenizer(tokenizer, target_model_path)
if not os.path.exists(target_model_path):
target_model = OVModelForCausalLM.from_pretrained(
model_id, quantization_config=OVWeightQuantizationConfig(bits=8), cache_dir=WWB_CACHE_PATH
)
target_model.save_pretrained(target_model_path)
tokenizer.save_pretrained(target_model_path)
export_tokenizer(tokenizer, target_model_path)


def teardown_module():
logger.info("Remove models")
shutil.rmtree(tmp_dir)
if SHOULD_CLEANUP:
logger.info("Removing models")
shutil.rmtree(cache_dir)


@pytest.mark.skipif((sys.platform == "darwin"), reason='173169')
Expand Down
28 changes: 15 additions & 13 deletions tools/who_what_benchmark/tests/test_cli_vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import logging
import sys
from test_cli_image import run_wwb
from constants import WWB_CACHE_PATH


logging.basicConfig(level=logging.INFO)
Expand All @@ -19,22 +20,23 @@ def test_vlm_basic(model_id, model_type, tmp_path):
if sys.platform == 'darwin':
pytest.xfail("Ticket 173169")
GT_FILE = tmp_path / "gt.csv"
MODEL_PATH = tmp_path / model_id.replace("/", "--")
MODEL_PATH = WWB_CACHE_PATH.joinpath(model_id.replace("/", "--"))

result = subprocess.run(["optimum-cli", "export",
"openvino", "-m", model_id,
MODEL_PATH, "--task",
"image-text-to-text",
"--trust-remote-code"],
capture_output=True,
text=True,
)
assert result.returncode == 0
if not MODEL_PATH.exists():
result = subprocess.run(["optimum-cli", "export",
"openvino", "-m", model_id,
MODEL_PATH, "--task",
"image-text-to-text",
"--trust-remote-code"],
capture_output=True,
text=True,
)
assert result.returncode == 0

# Collect reference with HF model
run_wwb([
"--base-model",
model_id,
MODEL_PATH,
"--num-samples",
"1",
"--gt-data",
Expand Down Expand Up @@ -74,13 +76,13 @@ def test_vlm_basic(model_id, model_type, tmp_path):
model_type,
"--genai",
"--output",
tmp_path,
WWB_CACHE_PATH,
])

# test w/o models
run_wwb([
"--target-data",
tmp_path / "target.csv",
WWB_CACHE_PATH / "target.csv",
"--num-samples",
"1",
"--gt-data",
Expand Down
Loading