diff --git a/SPECS/ARCHIVE/INDEX.md b/SPECS/ARCHIVE/INDEX.md index e7740e5..c65b74e 100644 --- a/SPECS/ARCHIVE/INDEX.md +++ b/SPECS/ARCHIVE/INDEX.md @@ -1,11 +1,12 @@ # mcpbridge-wrapper Tasks Archive -**Last Updated:** 2026-03-01 (BUG-T8 + REVIEW archived) +**Last Updated:** 2026-03-01 (P2-T2 archived) ## Archived Tasks | Task ID | Folder | Archived | Verdict | |---------|--------|----------|---------| +| P2-T2 | [P2-T2_Self-healing_stale_socket_and_PID_file_recovery/](P2-T2_Self-healing_stale_socket_and_PID_file_recovery/) | 2026-03-01 | PASS | | BUG-T8 | [BUG-T8_Fix_broker_proxy_bridge_exits_after_first_write_due_to_BaseProtocol_missing_drain_helper/](BUG-T8_Fix_broker_proxy_bridge_exits_after_first_write_due_to_BaseProtocol_missing_drain_helper/) | 2026-03-01 | PASS | | P1-T3 | [P1-T3_Improve_MCP_settings_examples_in_README_to_present_broker_setup_first/](P1-T3_Improve_MCP_settings_examples_in_README_to_present_broker_setup_first/) | 2026-03-01 | PASS | | P1-T2 | [P1-T2_Add_Xcode_26_4_known_issue_release_notes_link_to_README/](P1-T2_Add_Xcode_26_4_known_issue_release_notes_link_to_README/) | 2026-02-28 | PASS | @@ -171,6 +172,7 @@ | File | Description | |------|-------------| +| [REVIEW_P2-T2_stale_socket_recovery.md](_Historical/REVIEW_P2-T2_stale_socket_recovery.md) | Review report for P2-T2 | | [Workplan_0.4.0.md](_Historical/Workplan_0.4.0.md) | Archived workplan snapshot for release 0.4.0 | | [REVIEW_p1_t1_version_badge_script_tests.md](_Historical/REVIEW_p1_t1_version_badge_script_tests.md) | Review report for P1-T1 (version badge script/tests) | | [REVIEW_p1_t2_xcode_26_4_known_issue_link.md](_Historical/REVIEW_p1_t2_xcode_26_4_known_issue_link.md) | Review report for P1-T2 | @@ -290,6 +292,7 @@ | Date | Task ID | Action | |------|---------|--------| +| 2026-03-01 | P2-T2 | Archived Self-healing stale socket and PID file recovery (PASS) | | 2026-03-01 | WORKPLAN | Archived Workplan_0.4.0.md to _Historical and reset SPECS/Workplan.md | | 2026-02-28 | P15-T1 | Archived REVIEW_p15_t1_next_release_readiness report | | 2026-02-28 | P15-T1 | Archived Validate_project_readiness_for_the_next_release (PASS) | diff --git a/SPECS/ARCHIVE/P2-T2_Self-healing_stale_socket_and_PID_file_recovery/P2-T2_Self-healing_stale_socket_and_PID_file_recovery.md b/SPECS/ARCHIVE/P2-T2_Self-healing_stale_socket_and_PID_file_recovery/P2-T2_Self-healing_stale_socket_and_PID_file_recovery.md new file mode 100644 index 0000000..a2e19ed --- /dev/null +++ b/SPECS/ARCHIVE/P2-T2_Self-healing_stale_socket_and_PID_file_recovery/P2-T2_Self-healing_stale_socket_and_PID_file_recovery.md @@ -0,0 +1,115 @@ +# P2-T2: Self-healing stale socket and PID file recovery + +**Status:** In Progress +**Priority:** P0 +**Branch:** feature/P2-T2-stale-socket-recovery +**Created:** 2026-03-01 + +--- + +## Problem Statement + +When the broker daemon crashes or is killed, it leaves `broker.sock` and `broker.pid` on disk. The proxy's `_spawn_broker_if_needed` checks `socket_path.exists()` and skips spawning if the socket file is present — even if no process is listening. This silently blocks all future broker mode sessions until the user manually deletes the files. + +--- + +## Root Cause + +In `proxy.py` → `_spawn_broker_if_needed`, line 131-133: + +```python +# Check if socket already exists (race condition: broker started without PID file yet) +if socket_path.exists(): + logger.debug("Broker socket already present; skipping spawn.") + return +``` + +Existence check (`Path.exists()`) does not verify whether anything is actually listening on the socket. A stale socket file left after a crash passes the existence check and prevents spawn. + +--- + +## Solution + +### 1. `proxy.py` — Liveness check in `_spawn_broker_if_needed` + +Replace the plain existence check with a socket connect attempt: + +```python +if socket_path.exists(): + import socket as _socket + try: + with _socket.socket(_socket.AF_UNIX, _socket.SOCK_STREAM) as s: + s.settimeout(1.0) + s.connect(str(socket_path)) + # Connection succeeded → broker is alive + logger.debug("Broker socket present and accepting connections; skipping spawn.") + return + except ConnectionRefusedError: + # Stale socket — broker is not listening + logger.warning( + "Stale socket found (broker not accepting connections); removing stale files." + ) + socket_path.unlink(missing_ok=True) + pid_file.unlink(missing_ok=True) + # Fall through to spawn +``` + +This ensures: +- If broker is alive (socket accepts connections): skip spawn (no change in behaviour) +- If broker is dead (socket refuses connections): remove stale files and proceed with spawn +- `FileNotFoundError` and other OS errors during connect are suppressed — treated as "not alive" (also falls through to spawn path) + +### 2. `daemon.py` — `atexit` cleanup on daemon exit + +The daemon already removes files via `_cleanup_files()` in `stop()`, and `stop()` is called by the SIGTERM/SIGINT handlers in `run_forever()`. However, if the Python interpreter exits abnormally (e.g., unhandled exception, explicit `sys.exit()`), cleanup may be skipped. + +Add an `atexit` registration in `start()`: + +```python +import atexit +atexit.register(self._cleanup_files) +``` + +This ensures `_cleanup_files()` runs even for abnormal exits (excluding SIGKILL, which cannot be intercepted). + +--- + +## Files to Change + +| File | Change | +|------|--------| +| `src/mcpbridge_wrapper/broker/proxy.py` | Replace existence-only socket check with connect-based liveness check | +| `src/mcpbridge_wrapper/broker/daemon.py` | Register `atexit` cleanup in `start()` | + +--- + +## Tests to Add + +In `tests/unit/test_broker_proxy.py` — new class `TestBrokerProxyStaleSocket`: + +1. **`test_stale_socket_triggers_spawn`** — socket file exists but connect raises `ConnectionRefusedError`; verify `Popen` is called and stale files are removed. +2. **`test_live_socket_skips_spawn`** — socket file exists and connect succeeds; verify `Popen` is NOT called. +3. **`test_stale_socket_with_stale_pid_file_triggers_spawn`** — both files exist, connect raises `ConnectionRefusedError`; verify both files are removed and spawn proceeds. + +In `tests/unit/test_broker_daemon.py` — new class `TestBrokerDaemonAtExit`: + +4. **`test_atexit_registered_after_start`** — after `daemon.start()`, `atexit` registry includes `_cleanup_files`. + +--- + +## Acceptance Criteria + +- [ ] After broker crash, next `--broker-spawn` session auto-recovers without manual file removal +- [ ] Liveness check uses `connect()` not `exists()` +- [ ] Daemon registers `atexit` cleanup on `start()` +- [ ] All existing broker tests pass +- [ ] New tests cover the stale-socket scenario and atexit registration +- [ ] `ruff check src/` passes +- [ ] `mypy src/` passes (if configured) +- [ ] Coverage ≥ 90% + +--- + +## Dependencies + +None — can be implemented independently. diff --git a/SPECS/ARCHIVE/P2-T2_Self-healing_stale_socket_and_PID_file_recovery/P2-T2_Validation_Report.md b/SPECS/ARCHIVE/P2-T2_Self-healing_stale_socket_and_PID_file_recovery/P2-T2_Validation_Report.md new file mode 100644 index 0000000..3fe475b --- /dev/null +++ b/SPECS/ARCHIVE/P2-T2_Self-healing_stale_socket_and_PID_file_recovery/P2-T2_Validation_Report.md @@ -0,0 +1,63 @@ +# P2-T2 Validation Report + +**Task:** Self-healing stale socket and PID file recovery +**Date:** 2026-03-01 +**Branch:** feature/P2-T2-stale-socket-recovery +**Verdict:** PASS + +--- + +## Quality Gate Results + +| Gate | Result | Details | +|------|--------|---------| +| `ruff check src/` | ✅ PASS | All checks passed | +| `pytest` | ✅ PASS | 719 passed, 5 skipped | +| `pytest --cov` | ✅ PASS | 91.78% coverage (≥ 90% required) | +| `mypy src/` | N/A | Not configured | + +--- + +## Changes Made + +### `src/mcpbridge_wrapper/broker/proxy.py` + +- Added `import socket` at module level (replaces inline import-inside-function approach) +- Replaced plain `socket_path.exists()` guard in `_spawn_broker_if_needed` with a connect-based liveness check: + - If socket file exists and `connect()` succeeds → broker is alive, skip spawn + - If socket file exists and `connect()` raises `OSError` (covers `ConnectionRefusedError`, `OSError: socket operation on non-socket`, timeouts) → treat as stale, remove both socket and PID files, fall through to spawn + +### `src/mcpbridge_wrapper/broker/daemon.py` + +- Added `import atexit` at module level +- Registered `self._cleanup_files` with `atexit` in `start()` after the transport is successfully started, ensuring socket/PID files are removed even on abnormal interpreter exits (unhandled exceptions, `sys.exit()`). SIGKILL cannot be intercepted and is not covered. + +--- + +## Tests Added / Updated + +### `tests/unit/test_broker_proxy.py` + +- **Updated** `TestBrokerProxyAutoSpawn::test_spawn_noop_when_socket_exists` — now mocks `socket.socket.connect()` to succeed (simulating a live broker) instead of relying on the old existence-only check. + +- **Added** `TestBrokerProxyStaleSocket` class with 3 tests: + - `test_stale_socket_triggers_spawn` — socket file exists, `connect()` raises `ConnectionRefusedError` → `Popen` is called + - `test_stale_socket_removes_files` — socket and PID files are removed before spawn attempt + - `test_live_socket_skips_spawn` — socket file exists, `connect()` succeeds → `Popen` NOT called + +### `tests/unit/test_broker_daemon.py` + +- **Added** `TestBrokerDaemonAtExit` class with 1 test: + - `test_atexit_registered_after_start` — verifies `atexit.register` is called with `_cleanup_files` during `start()` + +--- + +## Acceptance Criteria Status + +- [x] After broker crash, next `--broker-spawn` session auto-recovers without manual file removal +- [x] Liveness check uses `connect()` not `exists()` +- [x] Daemon registers `atexit` cleanup on `start()` +- [x] All existing broker tests pass (719 passed, 5 skipped) +- [x] New tests cover the stale-socket scenario and atexit registration +- [x] `ruff check src/` passes +- [x] Coverage ≥ 90% diff --git a/SPECS/ARCHIVE/_Historical/REVIEW_P2-T2_stale_socket_recovery.md b/SPECS/ARCHIVE/_Historical/REVIEW_P2-T2_stale_socket_recovery.md new file mode 100644 index 0000000..dd6769a --- /dev/null +++ b/SPECS/ARCHIVE/_Historical/REVIEW_P2-T2_stale_socket_recovery.md @@ -0,0 +1,69 @@ +## REVIEW REPORT — P2-T2 Stale Socket Recovery + +**Scope:** origin/main..HEAD +**Files:** 4 (proxy.py, daemon.py, test_broker_proxy.py, test_broker_daemon.py) +**Date:** 2026-03-01 + +--- + +### Summary Verdict + +- [ ] Approve +- [x] Approve with comments +- [ ] Request changes +- [ ] Block + +--- + +### Critical Issues + +None. + +--- + +### Secondary Issues + +**[Low] Blocking socket connect inside async context** + +In `proxy.py` → `_spawn_broker_if_needed`, the liveness check uses `socket.socket(AF_UNIX, ...)` with `s.settimeout(1.0)` — a synchronous blocking call inside an `async def` function. If the daemon is hung (not refusing but also not accepting), the event loop is blocked for up to 1 second. + +In practice, Unix domain socket connects on the local machine are instantaneous; the timeout protects against edge cases. For broker mode, the 1-second block is acceptable given that: +- `_spawn_broker_if_needed` is called once per proxy session start +- The hang scenario (listening socket not accepting) is extremely rare with broker + +**Fix (optional, lower priority):** Could be converted to an async probe via `asyncio.open_unix_connection` with `asyncio.wait_for`. Defer unless profiling shows impact. + +--- + +**[Nit] atexit handler not deregistered after clean stop** + +In `daemon.py`, `atexit.register(self._cleanup_files)` is called on successful start. After `stop()` completes and removes files, the atexit handler remains registered. On interpreter exit after a clean shutdown, `_cleanup_files` runs again on already-absent files. + +This is safe because `_cleanup_files` uses `unlink(missing_ok=True)`. However, repeated registration across multiple `start()`/`stop()` cycles (not currently possible due to PID file locking, but possible in tests) would accumulate atexit registrations. + +**Fix (optional):** Use `atexit.unregister(self._cleanup_files)` in `stop()`. Low priority — current behavior is safe. + +--- + +### Architectural Notes + +- The catch-all `except OSError` in `_spawn_broker_if_needed` handles `ConnectionRefusedError`, `FileNotFoundError` (TOCTOU: socket disappears between `exists()` and `connect()`), and `OSError: [Errno 38] Socket operation on non-socket` (connecting to a regular file). This is the correct breadth of coverage. +- The atexit approach complements the existing SIGTERM/SIGINT handlers in `run_forever()`. The combination means clean files are guaranteed on: normal exit, `sys.exit()`, SIGTERM, SIGINT. Only SIGKILL (non-interceptable) leaves stale files — which the new proxy liveness check now handles automatically. +- The existing `test_spawn_noop_when_socket_exists` test was correctly updated to mock `socket.connect()` success rather than relying on a plain `touch()`'d file (which would have produced an `OSError` with the new code). + +--- + +### Tests + +- 4 new tests added (3 for stale socket proxy scenarios, 1 for atexit daemon registration). +- 1 existing test updated to reflect new liveness-check semantics. +- All 719 tests pass; coverage 91.78% (≥ 90%). +- **Gap (Low priority):** No explicit test for TOCTOU race (socket file disappears between `exists()` and `connect()`). Covered implicitly by `except OSError` but not explicitly tested. + +--- + +### Next Steps + +- **P2-T3** (P1, now unblocked by P2-T2 ✅): Fix double-spawn race condition with `fcntl.flock` spawn lock +- **Optional** (Nit): Add `atexit.unregister` in `daemon.stop()` if test-cycle isolation becomes an issue +- **Optional** (Low): Convert blocking socket probe to async `open_unix_connection` + `wait_for` diff --git a/SPECS/INPROGRESS/next.md b/SPECS/INPROGRESS/next.md index 44fec02..1bb60f4 100644 --- a/SPECS/INPROGRESS/next.md +++ b/SPECS/INPROGRESS/next.md @@ -1,10 +1,17 @@ # No Active Task -**Status:** Idle — BUG-T8 archived. Select the next task from `SPECS/Workplan.md`. +**Status:** Idle — P2-T2 archived. Select the next task from `SPECS/Workplan.md`. ## Recently Archived +- **P2-T2** — Self-healing stale socket and PID file recovery (2026-03-01, PASS) - **BUG-T8** — Fix broker proxy bridge exits after first write due to BaseProtocol missing _drain_helper (2026-03-01, PASS) - **P1-T3** — Improve MCP settings examples in README to present broker setup first (2026-03-01, PASS) - **P1-T2** — Add Xcode 26.4 known issue release-notes link to README (2026-02-28, PASS) - **P1-T1** — Add the version badge in the README.md (2026-02-28, PASS) + +## Suggested Next Tasks + +- **P2-T1** (P1) — Replace --broker-spawn/--broker-connect with single --broker flag +- **P2-T3** (P1) — Fix double-spawn race condition when MCP client toggles rapidly (depends on P2-T2 ✅) +- **P2-T4** (P1) — Surface broker unavailability as JSON-RPC error instead of silent timeout diff --git a/SPECS/Workplan.md b/SPECS/Workplan.md index ea237bc..52eb7d0 100644 --- a/SPECS/Workplan.md +++ b/SPECS/Workplan.md @@ -71,7 +71,8 @@ Add new tasks using the canonical template in [TASK_TEMPLATE.md](TASK_TEMPLATE.m - [ ] All MCP settings examples in README and DocC use `--broker` - [ ] All existing tests pass -#### ⬜️ P2-T2: Self-healing stale socket and PID file recovery +#### ✅ P2-T2: Self-healing stale socket and PID file recovery +- **Status:** ✅ Completed (2026-03-01) - **Description:** When the broker daemon crashes or is killed, it leaves `broker.sock` and `broker.pid` on disk. The proxy's `_spawn_broker_if_needed` checks `socket_path.exists()` and skips spawning if the socket file is present — even if no process is listening. This silently blocks all future broker mode sessions until the user manually deletes the files. Fix by validating socket liveness via `connect()` before concluding a broker is running: if `connect()` fails with `ConnectionRefusedError`, treat both files as stale, remove them, and proceed with spawn. Also clean up socket file on daemon exit via `atexit`/signal handler. - **Priority:** P0 - **Dependencies:** none @@ -80,10 +81,10 @@ Add new tasks using the canonical template in [TASK_TEMPLATE.md](TASK_TEMPLATE.m - `src/mcpbridge_wrapper/broker/proxy.py` — liveness check in `_spawn_broker_if_needed` - `src/mcpbridge_wrapper/broker/daemon.py` — socket cleanup on exit - **Acceptance Criteria:** - - [ ] After broker crash, next `--broker-spawn` (or `--broker`) session auto-recovers without manual file removal - - [ ] Liveness check uses `connect()` not `exists()` - - [ ] Daemon removes `broker.sock` on clean exit and on SIGTERM - - [ ] All existing broker tests pass + - [x] After broker crash, next `--broker-spawn` (or `--broker`) session auto-recovers without manual file removal + - [x] Liveness check uses `connect()` not `exists()` + - [x] Daemon removes `broker.sock` on clean exit and on SIGTERM + - [x] All existing broker tests pass #### ⬜️ P2-T3: Fix double-spawn race condition when MCP client toggles rapidly - **Description:** When an MCP client (e.g. Zed) toggles the connection off/on quickly, two proxy processes start simultaneously. Both check for a running broker, find none, and both spawn a daemon. Two competing daemons fight over the socket path: one wins, the other crashes. The losing proxy's client gets no broker and shows 0 tools. Fix with a filesystem lock (e.g. `fcntl.flock` on the PID file) so only one spawn attempt proceeds at a time; the second waiter detects the winner's daemon and connects. diff --git a/src/mcpbridge_wrapper/broker/daemon.py b/src/mcpbridge_wrapper/broker/daemon.py index 3bd553c..fa36712 100644 --- a/src/mcpbridge_wrapper/broker/daemon.py +++ b/src/mcpbridge_wrapper/broker/daemon.py @@ -19,6 +19,7 @@ from __future__ import annotations import asyncio +import atexit import contextlib import json import logging @@ -134,6 +135,10 @@ async def start(self) -> None: await self._rollback_startup() raise + # Ensure socket/PID files are removed even on abnormal interpreter exit + # (e.g. unhandled exception, sys.exit). SIGKILL cannot be intercepted. + atexit.register(self._cleanup_files) + self._state = BrokerState.READY logger.info( "Broker READY (upstream PID %s)", diff --git a/src/mcpbridge_wrapper/broker/proxy.py b/src/mcpbridge_wrapper/broker/proxy.py index 980a0b6..e49a3d7 100644 --- a/src/mcpbridge_wrapper/broker/proxy.py +++ b/src/mcpbridge_wrapper/broker/proxy.py @@ -18,6 +18,7 @@ import contextlib import logging import os +import socket import sys from mcpbridge_wrapper.broker.types import BrokerConfig @@ -127,10 +128,23 @@ async def _spawn_broker_if_needed(self) -> None: except (ValueError, ProcessLookupError, PermissionError): logger.debug("Stale PID file; will spawn broker.") - # Check if socket already exists (race condition: broker started without PID file yet) + # Check if socket already exists and is actually alive. + # A stale socket file left after a crash passes exists() but refuses connections. if socket_path.exists(): - logger.debug("Broker socket already present; skipping spawn.") - return + try: + with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as s: + s.settimeout(1.0) + s.connect(str(socket_path)) + # Connection succeeded — broker is alive + logger.debug("Broker socket present and accepting connections; skipping spawn.") + return + except OSError: + logger.warning( + "Stale socket found (broker not accepting connections); removing stale files." + ) + socket_path.unlink(missing_ok=True) + pid_file.unlink(missing_ok=True) + # Fall through to spawn logger.info("Spawning broker daemon…") import subprocess diff --git a/tests/unit/test_broker_daemon.py b/tests/unit/test_broker_daemon.py index a3642b4..4952723 100644 --- a/tests/unit/test_broker_daemon.py +++ b/tests/unit/test_broker_daemon.py @@ -958,3 +958,36 @@ async def test_transport_start_failure_sets_stop_events(self, tmp_path: Path) -> assert daemon.state == BrokerState.STOPPED assert daemon._stop_event.is_set() assert daemon._stopped_event.is_set() + + +# --------------------------------------------------------------------------- +# P2-T2: atexit cleanup registration +# --------------------------------------------------------------------------- + + +class TestBrokerDaemonAtExit: + @pytest.mark.asyncio + async def test_atexit_registered_after_start(self, tmp_path: Path) -> None: + """After start(), _cleanup_files is registered with atexit.""" + cfg = _make_config(tmp_path) + daemon = BrokerDaemon(cfg) + proc = _make_mock_process() + + registered: list[object] = [] + + def _fake_register(fn: object, *args: object, **kwargs: object) -> None: + registered.append(fn) + + with patch( + "mcpbridge_wrapper.broker.daemon.asyncio.create_subprocess_exec", + new=AsyncMock(return_value=proc), + ), patch("mcpbridge_wrapper.broker.daemon.atexit.register", side_effect=_fake_register): + await daemon.start() + + # _cleanup_files must have been registered + assert daemon._cleanup_files in registered + + # Cleanup + daemon._stop_event.set() + if daemon._read_task and not daemon._read_task.done(): + daemon._read_task.cancel() diff --git a/tests/unit/test_broker_proxy.py b/tests/unit/test_broker_proxy.py index f046525..a88809c 100644 --- a/tests/unit/test_broker_proxy.py +++ b/tests/unit/test_broker_proxy.py @@ -254,13 +254,20 @@ async def test_spawn_noop_when_pid_file_live(self, tmp_path: Path) -> None: @pytest.mark.asyncio async def test_spawn_noop_when_socket_exists(self, tmp_path: Path) -> None: - """_spawn_broker_if_needed does nothing when socket file already exists.""" + """_spawn_broker_if_needed does nothing when socket file exists and broker is alive.""" cfg = _make_config(tmp_path) - cfg.socket_path.touch() # simulate running broker + cfg.socket_path.touch() # simulate socket file present proxy = BrokerProxy(cfg, auto_spawn=True, connect_timeout=0.1) - with patch("subprocess.Popen") as mock_popen: + # Mock socket.connect to succeed (broker is alive) + mock_sock = MagicMock() + mock_sock.__enter__ = MagicMock(return_value=mock_sock) + mock_sock.__exit__ = MagicMock(return_value=False) + + with patch("mcpbridge_wrapper.broker.proxy.socket.socket", return_value=mock_sock), patch( + "subprocess.Popen" + ) as mock_popen: await proxy._spawn_broker_if_needed() mock_popen.assert_not_called() @@ -304,6 +311,76 @@ def _fake_exists(path_obj: Path) -> bool: assert cmd[3:] == ["--broker-daemon", "--web-ui", "--web-ui-port", "9090"] +# --------------------------------------------------------------------------- +# Stale socket recovery (P2-T2) +# --------------------------------------------------------------------------- + + +class TestBrokerProxyStaleSocket: + @pytest.mark.asyncio + async def test_stale_socket_triggers_spawn(self, tmp_path: Path) -> None: + """When socket exists but connect raises OSError, spawn proceeds.""" + cfg = _make_config(tmp_path) + cfg.socket_path.touch() # stale socket file + + proxy = BrokerProxy(cfg, auto_spawn=True, connect_timeout=0.3) + + mock_sock = MagicMock() + mock_sock.__enter__ = MagicMock(return_value=mock_sock) + mock_sock.__exit__ = MagicMock(return_value=False) + mock_sock.connect.side_effect = ConnectionRefusedError + + with patch("mcpbridge_wrapper.broker.proxy.socket.socket", return_value=mock_sock), patch( + "subprocess.Popen" + ) as mock_popen, pytest.raises(TimeoutError): + # Socket never appears after spawn, so TimeoutError is expected + await proxy._spawn_broker_if_needed() + + mock_popen.assert_called_once() + + @pytest.mark.asyncio + async def test_stale_socket_removes_files(self, tmp_path: Path) -> None: + """Stale socket and PID files are removed before attempting spawn.""" + cfg = _make_config(tmp_path) + cfg.socket_path.touch() + cfg.pid_file.write_text("99999") # stale PID file + + proxy = BrokerProxy(cfg, auto_spawn=True, connect_timeout=0.3) + + mock_sock = MagicMock() + mock_sock.__enter__ = MagicMock(return_value=mock_sock) + mock_sock.__exit__ = MagicMock(return_value=False) + mock_sock.connect.side_effect = ConnectionRefusedError + + with patch("mcpbridge_wrapper.broker.proxy.socket.socket", return_value=mock_sock), patch( + "subprocess.Popen" + ), pytest.raises(TimeoutError): + await proxy._spawn_broker_if_needed() + + assert not cfg.socket_path.exists() + assert not cfg.pid_file.exists() + + @pytest.mark.asyncio + async def test_live_socket_skips_spawn(self, tmp_path: Path) -> None: + """When socket exists and connect succeeds, spawn is skipped.""" + cfg = _make_config(tmp_path) + cfg.socket_path.touch() + + proxy = BrokerProxy(cfg, auto_spawn=True, connect_timeout=0.1) + + mock_sock = MagicMock() + mock_sock.__enter__ = MagicMock(return_value=mock_sock) + mock_sock.__exit__ = MagicMock(return_value=False) + # connect() returns None (success) + + with patch("mcpbridge_wrapper.broker.proxy.socket.socket", return_value=mock_sock), patch( + "subprocess.Popen" + ) as mock_popen: + await proxy._spawn_broker_if_needed() + + mock_popen.assert_not_called() + + # --------------------------------------------------------------------------- # _parse_broker_args # ---------------------------------------------------------------------------