Drop benchmark report delete in e2e

sjmonson · sjmonson · commit 4b3bb9b500c9 · 2025-12-05T11:23:58.000-05:00
Signed-off-by: Samuel Monson &lt;smonson@redhat.com&gt;
diff --git a/tests/e2e/test_max_error_benchmark.py b/tests/e2e/test_max_error_benchmark.py
@@ -8,7 +8,6 @@
     GuidellmClient,
     assert_constraint_triggered,
     assert_no_python_exceptions,
-    cleanup_report_file,
     load_benchmark_report,
 )
 from tests.e2e.vllm_sim_server import VllmSimServer
@@ -51,33 +50,29 @@ def test_max_error_benchmark(server: VllmSimServer, tmp_path: Path):
         outputs=report_name,
     )
 
-    try:
-        # Start the benchmark
-        client.start_benchmark(
-            rate=rate,
-            max_seconds=25,
-            max_error_rate=max_error_rate,
-        )
-
-        # Wait for the benchmark to complete (server will be stopped after 15 seconds)
-        client.wait_for_completion(timeout=30, stop_server_after=15, server=server)
+    # Start the benchmark
+    client.start_benchmark(
+        rate=rate,
+        max_seconds=25,
+        max_error_rate=max_error_rate,
+    )
 
-        # Assert no Python exceptions occurred
-        assert_no_python_exceptions(client.stderr)
+    # Wait for the benchmark to complete (server will be stopped after 15 seconds)
+    client.wait_for_completion(timeout=30, stop_server_after=15, server=server)
 
-        # Load and validate the report
-        report = load_benchmark_report(report_path)
-        benchmark = report["benchmarks"][0]
+    # Assert no Python exceptions occurred
+    assert_no_python_exceptions(client.stderr)
 
-        # Check that the max error rate constraint was triggered
-        assert_constraint_triggered(
-            benchmark,
-            "max_error_rate",
-            {
-                "exceeded_error_rate": True,
-                "current_error_rate": lambda rate: rate >= max_error_rate,
-            },
-        )
+    # Load and validate the report
+    report = load_benchmark_report(report_path)
+    benchmark = report["benchmarks"][0]
 
-    finally:
-        cleanup_report_file(report_path)
+    # Check that the max error rate constraint was triggered
+    assert_constraint_triggered(
+        benchmark,
+        "max_error_rate",
+        {
+            "exceeded_error_rate": True,
+            "current_error_rate": lambda rate: rate >= max_error_rate,
+        },
+    )
diff --git a/tests/e2e/test_over_saturated_benchmark.py b/tests/e2e/test_over_saturated_benchmark.py
@@ -6,7 +6,6 @@
     GuidellmClient,
     assert_constraint_triggered,
     assert_no_python_exceptions,
-    cleanup_report_file,
     load_benchmark_report,
 )
 from tests.e2e.vllm_sim_server import VllmSimServer
@@ -49,7 +48,6 @@ def test_over_saturated_benchmark(server: VllmSimServer, tmp_path: Path):
         outputs=report_name,
     )
 
-    cleanup_report_file(report_path)
     # Start the benchmark with --default-over-saturation flag
     client.start_benchmark(
         rate=rate,
@@ -75,8 +73,6 @@ def test_over_saturated_benchmark(server: VllmSimServer, tmp_path: Path):
         benchmark, "over_saturation", {"is_over_saturated": True}
     )
 
-    cleanup_report_file(report_path)
-
 
 @pytest.mark.timeout(60)
 def test_over_saturated_benchmark_with_dict_config(
@@ -96,7 +92,6 @@ def test_over_saturated_benchmark_with_dict_config(
         outputs=report_name,
     )
 
-    cleanup_report_file(report_path)
     # Start the benchmark with dictionary configuration for over-saturation
     client.start_benchmark(
         rate=rate,
@@ -127,5 +122,3 @@ def test_over_saturated_benchmark_with_dict_config(
     assert_constraint_triggered(
         benchmark, "over_saturation", {"is_over_saturated": True}
     )
-
-    cleanup_report_file(report_path)
diff --git a/tests/e2e/test_successful_benchmark.py b/tests/e2e/test_successful_benchmark.py
@@ -9,7 +9,6 @@
     assert_constraint_triggered,
     assert_no_python_exceptions,
     assert_successful_requests_fields,
-    cleanup_report_file,
     load_benchmark_report,
 )
 from tests.e2e.vllm_sim_server import VllmSimServer
@@ -53,34 +52,28 @@ def test_max_seconds_benchmark(server: VllmSimServer, tmp_path: Path):
         outputs=report_name,
     )
 
-    try:
-        # Start the benchmark
-        client.start_benchmark(
-            rate=rate,
-            max_seconds=max_seconds,
-        )
-
-        # Wait for the benchmark to complete
-        client.wait_for_completion(timeout=30)
+    # Start the benchmark
+    client.start_benchmark(
+        rate=rate,
+        max_seconds=max_seconds,
+    )
 
-        # Assert no Python exceptions occurred
-        assert_no_python_exceptions(client.stderr)
+    # Wait for the benchmark to complete
+    client.wait_for_completion(timeout=30)
 
-        # Load and validate the report
-        report = load_benchmark_report(report_path)
-        benchmark = report["benchmarks"][0]
+    # Assert no Python exceptions occurred
+    assert_no_python_exceptions(client.stderr)
 
-        # Check that the max duration constraint was triggered
-        assert_constraint_triggered(
-            benchmark, "max_seconds", {"duration_exceeded": True}
-        )
+    # Load and validate the report
+    report = load_benchmark_report(report_path)
+    benchmark = report["benchmarks"][0]
 
-        # Validate successful requests have all expected fields
-        successful_requests = benchmark["requests"]["successful"]
-        assert_successful_requests_fields(successful_requests)
+    # Check that the max duration constraint was triggered
+    assert_constraint_triggered(benchmark, "max_seconds", {"duration_exceeded": True})
 
-    finally:
-        cleanup_report_file(report_path)
+    # Validate successful requests have all expected fields
+    successful_requests = benchmark["requests"]["successful"]
+    assert_successful_requests_fields(successful_requests)
 
 
 @pytest.mark.timeout(30)
@@ -102,35 +95,28 @@ def test_max_requests_benchmark(server: VllmSimServer, tmp_path: Path):
         outputs=report_name,
     )
 
-    try:
-        # Start the benchmark
-        client.start_benchmark(
-            rate=rate,
-            max_requests=max_requests,
-        )
-
-        # Wait for the benchmark to complete
-        client.wait_for_completion(timeout=30)
-
-        # Assert no Python exceptions occurred
-        assert_no_python_exceptions(client.stderr)
-
-        # Load and validate the report
-        report = load_benchmark_report(report_path)
-        benchmark = report["benchmarks"][0]
-
-        # Check that the max requests constraint was triggered
-        assert_constraint_triggered(
-            benchmark, "max_requests", {"processed_exceeded": True}
-        )
-
-        # Validate successful requests have all expected fields
-        successful_requests = benchmark["requests"]["successful"]
-        assert len(successful_requests) == max_requests, (
-            f"Expected {max_requests} successful requests, "
-            f"got {len(successful_requests)}"
-        )
-        assert_successful_requests_fields(successful_requests)
+    # Start the benchmark
+    client.start_benchmark(
+        rate=rate,
+        max_requests=max_requests,
+    )
 
-    finally:
-        cleanup_report_file(report_path)
+    # Wait for the benchmark to complete
+    client.wait_for_completion(timeout=30)
+
+    # Assert no Python exceptions occurred
+    assert_no_python_exceptions(client.stderr)
+
+    # Load and validate the report
+    report = load_benchmark_report(report_path)
+    benchmark = report["benchmarks"][0]
+
+    # Check that the max requests constraint was triggered
+    assert_constraint_triggered(benchmark, "max_requests", {"processed_exceeded": True})
+
+    # Validate successful requests have all expected fields
+    successful_requests = benchmark["requests"]["successful"]
+    assert len(successful_requests) == max_requests, (
+        f"Expected {max_requests} successful requests, got {len(successful_requests)}"
+    )
+    assert_successful_requests_fields(successful_requests)
diff --git a/tests/e2e/utils.py b/tests/e2e/utils.py
@@ -341,13 +341,3 @@ def assert_constraint_triggered(
             assert actual_value == expected_value, (
                 f"Expected {key}={expected_value}, got {actual_value}"
             )
-
-
-def cleanup_report_file(report_path: Path) -> None:
-    """
-    Clean up the report file if it exists.
-
-    :param report_path: Path to the report file to remove
-    """
-    if report_path.exists():
-        report_path.unlink()