ethereum · LouisTsai-Csie · Nov 28, 2025 · Dec 1, 2025 · danceratopz · Dec 1, 2025
diff --git a/.github/configs/feature.yaml b/.github/configs/feature.yaml
@@ -11,11 +11,11 @@ develop:
 
 benchmark:
   evm-type: benchmark
-  fill-params: --fork=Prague --gas-benchmark-values 1,10,30,45,60,100,150 -m "benchmark and not state_test" ./tests/benchmark
+  fill-params: --fork=Prague --gas-benchmark-values 1,10,30,45,60,100,150 -m "not state_test" ./tests/benchmark/compute
 
 benchmark_develop:
   evm-type: benchmark
-  fill-params: --fork=Osaka --gas-benchmark-values 1,10,30,60,100,150 -m "benchmark and not state_test" ./tests/benchmark
+  fill-params: --fork=Osaka --gas-benchmark-values 1,10,30,60,100,150 -m "not state_test" ./tests/benchmark/compute
   feature_only: true
 
 bal:

diff --git a/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/benchmarking.py b/packages/testing/src/execution_testing/cli/pytest_commands/plugins/shared/benchmarking.py
@@ -1,5 +1,7 @@
 """The module contains the pytest hooks for the gas benchmark values."""
 
+from pathlib import Path
+
 import pytest
 
 from execution_testing.test_types import Environment, EnvironmentDefaults
@@ -30,31 +32,68 @@ def pytest_addoption(parser: pytest.Parser) -> None:
     )
 
 
+def pytest_ignore_collect(
+    collection_path: Path, config: pytest.Config
+) -> bool | None:
+    """
+    Ignore tests/benchmark/ directory by default unless explicitly specified.
+
+    Returns True to ignore, False to collect, None for default behavior.
+    """
+    # Only handle paths within tests/benchmark/
+    try:
+        collection_path.relative_to(config.rootpath / "tests" / "benchmark")
+    except ValueError:
+        return None
+
+    # Check if benchmark tests explicitly specified in command line arguments
+    benchmark_path = config.rootpath / "tests" / "benchmark"
+    for arg in config.args:
+        arg_path = Path(arg)
+        # Check absolute paths
+        if arg_path.is_absolute():
+            try:
+                arg_path.relative_to(benchmark_path)
+                # Explicitly specified, set op_mode and don't ignore
+                config.op_mode = OpMode.BENCHMARKING  # type: ignore[attr-defined]
+                return False
+            except ValueError:
+                continue
+        # Check relative paths containing 'benchmark'
+        elif "benchmark" in arg:
+            # Explicitly specified, set op_mode and don't ignore
+            config.op_mode = OpMode.BENCHMARKING  # type: ignore[attr-defined]
+            return False
+
+    # Not explicitly specified, ignore by default
+    return True
+
+
 @pytest.hookimpl(tryfirst=True)
 def pytest_configure(config: pytest.Config) -> None:
     """Configure the fill and execute mode to benchmarking."""
     config.addinivalue_line(
         "markers",
         "repricing: Mark test as reference test for gas repricing analysis",
     )
-    if config.getoption("gas_benchmark_value"):
-        config.op_mode = OpMode.BENCHMARKING  # type: ignore[attr-defined]
 
 
 def pytest_collection_modifyitems(
     config: pytest.Config, items: list[pytest.Item]
 ) -> None:
-    """Remove non-repricing tests when --fixed-opcode-count is specified."""
-    fixed_opcode_count = config.getoption("fixed_opcode_count")
-    if not fixed_opcode_count:
-        # If --fixed-opcode-count is not specified, don't filter anything
+    """
+    Filter tests based on repricing marker kwargs when -m repricing is specified.
+
+    When a test has @pytest.mark.repricing(param=value), only the parameterized
+    variant matching those kwargs should be selected.
+    """
+    # Check if -m repricing marker filter was specified
+    markexpr = config.getoption("markexpr", "")
+    if "repricing" not in markexpr:
         return
 
     filtered = []
     for item in items:
-        if not item.get_closest_marker("benchmark"):
-            continue
-
         repricing_marker = item.get_closest_marker("repricing")
         if not repricing_marker:
             continue
@@ -160,7 +199,10 @@ def genesis_environment(request: pytest.FixtureRequest) -> Environment:  # noqa:
     Return an Environment instance with appropriate gas limit based on test
     type.
     """
-    if request.node.get_closest_marker("benchmark") is not None:
+    is_benchmark = (
+        getattr(request.config, "op_mode", False) == OpMode.BENCHMARKING
+    )
+    if is_benchmark:
         return Environment(gas_limit=BENCHMARKING_MAX_GAS)
     return Environment()
 
@@ -171,6 +213,9 @@ def env(request: pytest.FixtureRequest) -> Environment:  # noqa: D103
     Return an Environment instance with appropriate gas limit based on test
     type.
     """
-    if request.node.get_closest_marker("benchmark") is not None:
+    is_benchmark = (
+        getattr(request.config, "op_mode", False) == OpMode.BENCHMARKING
+    )
+    if is_benchmark:
         return Environment(gas_limit=BENCHMARKING_MAX_GAS)
     return Environment()
diff --git a/packages/testing/src/execution_testing/specs/benchmark.py b/packages/testing/src/execution_testing/specs/benchmark.py
@@ -250,6 +250,11 @@ def model_post_init(self, __context: Any, /) -> None:
 
         blocks: List[Block] = self.setup_blocks
 
+        if self.fixed_opcode_count is not None and self.code_generator is None:
+            pytest.skip(
+                "Cannot run fixed opcode count tests without a code generator"
+            )
+
         if self.code_generator is not None:
             # Inject fixed_opcode_count into the code generator if provided
             self.code_generator.fixed_opcode_count = self.fixed_opcode_count

diff --git a/tests/benchmark/conftest.py b/tests/benchmark/conftest.py
@@ -32,62 +32,6 @@ def pytest_generate_tests(metafunc: Any) -> None:
             metafunc.definition.add_marker(benchmark_marker)
 
 
-def pytest_collection_modifyitems(config: Any, items: Any) -> None:
-    """Add the `benchmark` marker to all tests under `./tests/benchmark`."""
-    benchmark_dir = Path(__file__).parent
-    benchmark_marker = pytest.mark.benchmark
-    gen_docs = config.getoption("--gen-docs", default=False)
-
-    if gen_docs:
-        for item in items:
-            if (
-                benchmark_dir in Path(item.fspath).parents
-                and not item.get_closest_marker("benchmark")
-                and not item.get_closest_marker("stateful")
-            ):
-                item.add_marker(benchmark_marker)
-        return
-
-    marker_expr = config.getoption("-m", default="")
-    run_benchmarks = (
-        marker_expr
-        and "benchmark" in marker_expr
-        and "not benchmark" not in marker_expr
-    )
-    run_stateful_tests = (
-        marker_expr
-        and "stateful" in marker_expr
-        and "not stateful" not in marker_expr
-    )
-
-    items_for_removal = []
-    for i, item in enumerate(items):
-        is_in_benchmark_dir = benchmark_dir in Path(item.fspath).parents
-        has_stateful_marker = item.get_closest_marker("stateful")
-        is_benchmark_test = (
-            is_in_benchmark_dir and not has_stateful_marker
-        ) or item.get_closest_marker("benchmark")
-
-        if is_benchmark_test:
-            if is_in_benchmark_dir and not item.get_closest_marker(
-                "benchmark"
-            ):
-                item.add_marker(benchmark_marker)
-            if not run_benchmarks:
-                items_for_removal.append(i)
-        elif run_benchmarks:
-            items_for_removal.append(i)
-        elif (
-            is_in_benchmark_dir
-            and has_stateful_marker
-            and not run_stateful_tests
-        ):
-            items_for_removal.append(i)
-
-    for i in reversed(items_for_removal):
-        items.pop(i)
-
-
 @pytest.fixture
 def tx_gas_limit(fork: Fork, gas_benchmark_value: int) -> int:
     """Return the transaction gas limit cap."""

diff --git a/tests/benchmark/stateful/conftest.py b/tests/benchmark/stateful/conftest.py
@@ -25,50 +25,3 @@ def pytest_generate_tests(metafunc: Any) -> None:
             metafunc.definition.add_marker(
                 pytest.mark.valid_from(DEFAULT_BENCHMARK_FORK)
             )
-
-
-def pytest_collection_modifyitems(config: Any, items: Any) -> None:
-    """Manage stateful test markers and filtering."""
-    state_dir = Path(__file__).parent
-    gen_docs = config.getoption("--gen-docs", default=False)
-
-    if gen_docs:
-        _add_stateful_markers_for_docs(items, state_dir)
-        return
-
-    marker_expr = config.getoption("-m", default="")
-
-    items_to_remove = []
-
-    for i, item in enumerate(items):
-        item_path = Path(item.fspath)
-        is_in_state_dir = state_dir in item_path.parents
-
-        # Add stateful marker to tests in state directory that don't have it
-        if is_in_state_dir and not item.get_closest_marker("stateful"):
-            item.add_marker(pytest.mark.stateful)
-
-        has_stateful_marker = item.get_closest_marker("stateful")
-
-        run_stateful = (
-            marker_expr
-            and ("stateful" in marker_expr)
-            and ("not stateful" not in marker_expr)
-        )
-
-        # When not running stateful tests, remove all stateful tests
-        if not run_stateful and has_stateful_marker:
-            items_to_remove.append(i)
-
-    for i in reversed(items_to_remove):
-        items.pop(i)
-
-
-def _add_stateful_markers_for_docs(items: Any, state_dir: Any) -> None:
-    """Add stateful markers for documentation generation."""
-    for item in items:
-        item_path = Path(item.fspath)
-        if state_dir in item_path.parents and not item.get_closest_marker(
-            "stateful"
-        ):
-            item.add_marker(pytest.mark.stateful)
diff --git a/tox.ini b/tox.ini
@@ -79,7 +79,7 @@ commands =
 description = Fill the tests using EELS (with Python)
 commands =
     fill \
-        -m "not slow and not zkevm and not benchmark" \
+        -m "not slow" \
         -n auto --maxprocesses 10 --dist=loadgroup \
         --basetemp="{temp_dir}/pytest" \
         --log-to "{toxworkdir}/logs" \
@@ -102,7 +102,7 @@ commands =
         --tb=no \
         --show-capture=no \
         --disable-warnings \
-        -m "not slow and not zkevm and not benchmark" \
+        -m "not slow" \
         -n auto --maxprocesses 7 --dist=loadgroup \
         --basetemp="{temp_dir}/pytest" \
         --log-to "{toxworkdir}/logs" \
@@ -117,13 +117,13 @@ commands =
         --generate-all-formats \
         --gas-benchmark-values 1 \
         --evm-bin=evmone-t8n \
-        -m "benchmark and not state_test" \
+        -m "not state_test" \
         -n auto --maxprocesses 10 --dist=loadgroup \
         --basetemp="{temp_dir}/pytest" \
         --log-to "{toxworkdir}/logs" \
         --clean \
         --fork Prague \
-        tests/benchmark
+        tests/benchmark/compute
 
 [testenv:optimized]
 description = Run unit tests for optimized state and ethash