Fix e2e tests writing output to test dir

sjmonson · sjmonson · commit 3bd986b98cd0 · 2025-12-05T11:44:54.000-05:00
Signed-off-by: Samuel Monson &lt;smonson@redhat.com&gt;
diff --git a/tests/e2e/test_max_error_benchmark.py b/tests/e2e/test_max_error_benchmark.py
@@ -35,16 +35,21 @@ def server():
 
 
 @pytest.mark.timeout(30)
-def test_max_error_benchmark(server: VllmSimServer):
+def test_max_error_benchmark(server: VllmSimServer, tmp_path: Path):
     """
     Test that the max error rate constraint is properly triggered when server goes down.
     """
-    report_path = Path("tests/e2e/max_error_benchmarks.json")
+    report_name = "max_error_benchmarks.json"
+    report_path = tmp_path / report_name
     rate = 10
     max_error_rate = 0.1
 
     # Create and configure the guidellm client
-    client = GuidellmClient(target=server.get_url(), output_path=report_path)
+    client = GuidellmClient(
+        target=server.get_url(),
+        output_dir=tmp_path,
+        outputs=report_name,
+    )
 
     try:
         # Start the benchmark
diff --git a/tests/e2e/test_over_saturated_benchmark.py b/tests/e2e/test_over_saturated_benchmark.py
@@ -34,15 +34,20 @@ def server():
 
 
 @pytest.mark.timeout(60)
-def test_over_saturated_benchmark(server: VllmSimServer):
+def test_over_saturated_benchmark(server: VllmSimServer, tmp_path: Path):
     """
     Test over-saturation detection using the --default-over-saturation flag.
     """
-    report_path = Path("tests/e2e/over_saturated_benchmarks.json")
+    report_name = "over_saturated_benchmarks.json"
+    report_path = tmp_path / report_name
     rate = 10
 
     # Create and configure the guidellm client
-    client = GuidellmClient(target=server.get_url(), output_path=report_path)
+    client = GuidellmClient(
+        target=server.get_url(),
+        output_dir=tmp_path,
+        outputs=report_name,
+    )
 
     cleanup_report_file(report_path)
     # Start the benchmark with --default-over-saturation flag
@@ -74,15 +79,22 @@ def test_over_saturated_benchmark(server: VllmSimServer):
 
 
 @pytest.mark.timeout(60)
-def test_over_saturated_benchmark_with_dict_config(server: VllmSimServer):
+def test_over_saturated_benchmark_with_dict_config(
+    server: VllmSimServer, tmp_path: Path
+):
     """
     Test over-saturation detection with dictionary configuration instead of boolean.
     """
-    report_path = Path("tests/e2e/over_saturated_benchmarks_dict.json")
+    report_name = "over_saturated_benchmarks_dict.json"
+    report_path = tmp_path / report_name
     rate = 10
 
     # Create and configure the guidellm client
-    client = GuidellmClient(target=server.get_url(), output_path=report_path)
+    client = GuidellmClient(
+        target=server.get_url(),
+        output_dir=tmp_path,
+        outputs=report_name,
+    )
 
     cleanup_report_file(report_path)
     # Start the benchmark with dictionary configuration for over-saturation
diff --git a/tests/e2e/test_successful_benchmark.py b/tests/e2e/test_successful_benchmark.py
@@ -37,16 +37,21 @@ def server():
 
 @pytest.mark.timeout(30)
 @pytest.mark.sanity
-def test_max_seconds_benchmark(server: VllmSimServer):
+def test_max_seconds_benchmark(server: VllmSimServer, tmp_path: Path):
     """
     Test that the max seconds constraint is properly triggered.
     """
-    report_path = Path("tests/e2e/max_duration_benchmarks.json")
+    report_name = "max_duration_benchmarks.json"
+    report_path = tmp_path / report_name
     rate = 4
     duration = 5
     max_seconds = duration
     # Create and configure the guidellm client
-    client = GuidellmClient(target=server.get_url(), output_path=report_path)
+    client = GuidellmClient(
+        target=server.get_url(),
+        output_dir=tmp_path,
+        outputs=report_name,
+    )
 
     try:
         # Start the benchmark
@@ -80,17 +85,22 @@ def test_max_seconds_benchmark(server: VllmSimServer):
 
 @pytest.mark.timeout(30)
 @pytest.mark.sanity
-def test_max_requests_benchmark(server: VllmSimServer):
+def test_max_requests_benchmark(server: VllmSimServer, tmp_path: Path):
     """
     Test that the max requests constraint is properly triggered.
     """
-    report_path = Path("tests/e2e/max_number_benchmarks.json")
+    report_name = "max_number_benchmarks.json"
+    report_path = tmp_path / report_name
     rate = 4
     duration = 5
     max_requests = rate * duration
 
     # Create and configure the guidellm client
-    client = GuidellmClient(target=server.get_url(), output_path=report_path)
+    client = GuidellmClient(
+        target=server.get_url(),
+        output_dir=tmp_path,
+        outputs=report_name,
+    )
 
     try:
         # Start the benchmark
diff --git a/tests/e2e/utils.py b/tests/e2e/utils.py
@@ -26,15 +26,18 @@ def get_guidellm_executable() -> str:
 class GuidellmClient:
     """Wrapper class for running guidellm benchmark commands."""
 
-    def __init__(self, target: str, output_path: Path):
+    def __init__(
+        self, target: str, output_dir: Path, outputs: str = "benchmarks.json"
+    ) -> None:
         """
         Initialize the guidellm client.
 
         :param target: The target URL for the benchmark
         :param output_path: Path where the benchmark report will be saved
         """
         self.target = target
-        self.output_path = output_path
+        self.output_dir = output_dir
+        self.outputs = outputs
         self.process: subprocess.Popen | None = None
         self.stdout: str | None = None
         self.stderr: str | None = None
@@ -72,7 +75,7 @@ def start_benchmark(
         # Build command components
         cmd_parts = [
             *([f"{k}={v}" for k, v in extra_env.items()] if extra_env else []),
-            "HF_HOME=/tmp/huggingface_cache",
+            "HF_HOME=" + str(self.output_dir / "huggingface_cache"),
             f"{guidellm_exe} benchmark run",
             f'--target "{self.target}"',
             f"--profile {profile}",
@@ -108,7 +111,8 @@ def start_benchmark(
             [
                 f'--data "{data}"',
                 f'--processor "{processor}"',
-                f"--output-path {self.output_path}",
+                f"--output-dir {self.output_dir}",
+                f"--outputs {self.outputs}",
             ]
         )
 
@@ -120,7 +124,7 @@ def start_benchmark(
         logger.info(f"Client command: {command}")
 
         self.process = subprocess.Popen(  # noqa: S603
-            ["/bin/bash", "-c", command],
+            ["/bin/sh", "-c", command],
             stdout=subprocess.PIPE,
             stderr=subprocess.PIPE,
             text=True,