Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ We distinguish between **internal modules** (under packages with `_` prefix, e.g
* Standard library and internal imports don't need underscore prefix
* Only prefix symbols that are truly private to the module itself (e.g. `_context_var` for a module-private ContextVar)

### Imports

Prefer top-level imports. Only use local (in-function) imports when truly necessary — e.g. to break circular dependencies or to defer a heavy import that isn't always needed.

### Type Annotations

Avoid `Any` whenever feasible. Use specific types — including concrete types from third-party libraries. Only use `Any` when the type is truly generic and no downstream code needs to downcast it.
Expand Down
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,26 @@ embedding:
| xml | | `.xml` |
| yaml | | `.yaml`, `.yml` |

### Custom Database Location

By default, index databases (`cocoindex.db` and `target_sqlite.db`) live alongside settings in `<project>/.cocoindex_code/`. When running in Docker, you may want the databases on the container's native filesystem for performance (LMDB doesn't work well on mounted volumes) while keeping the source code and settings on a mounted volume.

Set `COCOINDEX_CODE_DB_PATH_MAPPING` to remap database locations by path prefix:

```bash
COCOINDEX_CODE_DB_PATH_MAPPING=/workspace=/db-files
```

With this mapping, a project at `/workspace/myrepo` stores its databases in `/db-files/myrepo/` instead of `/workspace/myrepo/.cocoindex_code/`. Settings files remain in the original location.

Multiple mappings are comma-separated and resolved in order (first match wins):

```bash
COCOINDEX_CODE_DB_PATH_MAPPING=/workspace=/db-files,/workspace2=/db-files2
```

Both source and target must be absolute paths. If no mapping matches, the default location is used.

## Troubleshooting

Run `ccc doctor` to diagnose common issues. It checks your settings, daemon health, embedding model, file matching, and index status — all in one command.
Expand Down
16 changes: 14 additions & 2 deletions src/cocoindex_code/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
default_user_settings,
find_parent_with_marker,
find_project_root,
resolve_db_dir,
save_project_settings,
save_user_settings,
user_settings_path,
Expand Down Expand Up @@ -389,10 +390,11 @@ def reset(
"""Reset project databases and optionally remove settings."""
project_root = require_project_root()
cocoindex_dir = project_root / ".cocoindex_code"
db_dir = resolve_db_dir(project_root)

db_files = [
cocoindex_dir / "cocoindex.db",
cocoindex_dir / "target_sqlite.db",
db_dir / "cocoindex.db",
db_dir / "target_sqlite.db",
]
settings_file = cocoindex_dir / "settings.yml"

Expand Down Expand Up @@ -436,6 +438,12 @@ def reset(
f.unlink(missing_ok=True)

if all_:
# Remove db_dir if empty and different from cocoindex_dir
if db_dir != cocoindex_dir:
try:
db_dir.rmdir()
except OSError:
pass # Not empty or doesn't exist
# Remove .cocoindex_code/ if empty
try:
cocoindex_dir.rmdir()
Expand Down Expand Up @@ -539,6 +547,10 @@ def doctor() -> None:
other_keys = [k for k in env_resp.env_names if k not in settings_keys]
if other_keys:
_typer.echo(f" Other env vars in daemon: {', '.join(sorted(other_keys))}")
if env_resp.db_path_mappings:
_typer.echo(" DB path mappings:")
for m in env_resp.db_path_mappings:
_typer.echo(f" {m.source} \u2192 {m.target}")
except Exception as e:
_print_error(f"Failed to get daemon env: {e}")

Expand Down
6 changes: 4 additions & 2 deletions src/cocoindex_code/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from dataclasses import dataclass
from pathlib import Path

from .settings import resolve_db_dir

_DEFAULT_MODEL = "sbert/sentence-transformers/all-MiniLM-L6-v2"


Expand Down Expand Up @@ -96,8 +98,8 @@ def from_env(cls) -> Config:
_DEFAULT_MODEL,
)

# Index directory is always under the root
index_dir = root / ".cocoindex_code"
# Index directory: apply DB path mapping if configured
index_dir = resolve_db_dir(root)

# Device: auto-detect CUDA or use env override
device = os.environ.get("COCOINDEX_CODE_DEVICE")
Expand Down
10 changes: 9 additions & 1 deletion src/cocoindex_code/daemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
from .settings import (
global_settings_mtime_us,
load_user_settings,
resolve_db_dir,
user_settings_dir,
)
from .shared import Embedder, create_embedder
Expand Down Expand Up @@ -345,7 +346,7 @@ async def _check_index_status(project_root_str: str) -> DoctorCheckResult:
from cocoindex.connectors import sqlite as coco_sqlite

project_root = Path(project_root_str)
db_path = project_root / ".cocoindex_code" / "target_sqlite.db"
db_path = resolve_db_dir(project_root) / "target_sqlite.db"
details = [f"Index: {db_path}"]

if not db_path.exists():
Expand Down Expand Up @@ -441,9 +442,16 @@ async def _dispatch(
return StopResponse(ok=True)

if isinstance(req, DaemonEnvRequest):
from .protocol import DbPathMappingEntry
from .settings import get_db_path_mappings

return DaemonEnvResponse(
env_names=sorted(os.environ.keys()),
settings_env_names=settings_env_names,
db_path_mappings=[
DbPathMappingEntry(source=str(m.source), target=str(m.target))
for m in get_db_path_mappings()
],
)

if isinstance(req, DoctorRequest):
Expand Down
14 changes: 9 additions & 5 deletions src/cocoindex_code/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
SearchResult,
)
from .query import query_codebase
from .settings import resolve_db_dir
from .shared import (
CODEBASE_DIR,
EMBEDDER,
Expand Down Expand Up @@ -170,7 +171,7 @@ async def search(
offset: int = 0,
) -> list[SearchResult]:
"""Search within this project."""
target_db = self._project_root / ".cocoindex_code" / "target_sqlite.db"
target_db = resolve_db_dir(self._project_root) / "target_sqlite.db"
results = await query_codebase(
query=query,
target_sqlite_db_path=target_db,
Expand Down Expand Up @@ -254,11 +255,14 @@ async def create(
indexer loads them fresh from disk on every run so that user edits
take effect without restarting the daemon.
"""
index_dir = project_root / ".cocoindex_code"
index_dir.mkdir(parents=True, exist_ok=True)
settings_dir = project_root / ".cocoindex_code"
settings_dir.mkdir(parents=True, exist_ok=True)

cocoindex_db_path = index_dir / "cocoindex.db"
target_sqlite_db_path = index_dir / "target_sqlite.db"
db_dir = resolve_db_dir(project_root)
db_dir.mkdir(parents=True, exist_ok=True)

cocoindex_db_path = db_dir / "cocoindex.db"
target_sqlite_db_path = db_dir / "target_sqlite.db"

settings = coco.Settings.from_env(cocoindex_db_path)

Expand Down
6 changes: 6 additions & 0 deletions src/cocoindex_code/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,15 @@ class DoctorResponse(_msgspec.Struct, tag="doctor"):
final: bool = False


class DbPathMappingEntry(_msgspec.Struct):
source: str
target: str


class DaemonEnvResponse(_msgspec.Struct, tag="daemon_env"):
env_names: list[str]
settings_env_names: list[str]
db_path_mappings: list[DbPathMappingEntry] = []


class ErrorResponse(_msgspec.Struct, tag="error"):
Expand Down
80 changes: 78 additions & 2 deletions src/cocoindex_code/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
Expand Down Expand Up @@ -115,14 +116,89 @@ def default_project_settings() -> ProjectSettings:
_SETTINGS_FILE_NAME = "settings.yml" # project-level
_USER_SETTINGS_FILE_NAME = "global_settings.yml" # user-level

_ENV_DB_PATH_MAPPING = "COCOINDEX_CODE_DB_PATH_MAPPING"


@dataclass
class DbPathMapping:
source: Path
target: Path


_db_path_mapping: list[DbPathMapping] | None = None


def _parse_db_path_mapping() -> list[DbPathMapping]:
"""Parse ``COCOINDEX_CODE_DB_PATH_MAPPING`` env var.

Format: ``/src1=/dst1,/src2=/dst2``
Both source and target must be absolute paths.
"""
raw = os.environ.get(_ENV_DB_PATH_MAPPING, "")
if not raw.strip():
return []

mappings: list[DbPathMapping] = []
for entry in raw.split(","):
entry = entry.strip()
if not entry:
continue
parts = entry.split("=", 1)
if len(parts) != 2 or not parts[0] or not parts[1]:
raise ValueError(
f"{_ENV_DB_PATH_MAPPING}: invalid entry {entry!r}, expected format 'source=target'"
)
source = Path(parts[0])
target = Path(parts[1])
if not source.is_absolute():
raise ValueError(
f"{_ENV_DB_PATH_MAPPING}: source path must be absolute, got {source!r}"
)
if not target.is_absolute():
raise ValueError(
f"{_ENV_DB_PATH_MAPPING}: target path must be absolute, got {target!r}"
)
mappings.append(DbPathMapping(source=source.resolve(), target=target.resolve()))
return mappings


def resolve_db_dir(project_root: Path) -> Path:
"""Return the directory for database files given a project root.

Applies ``COCOINDEX_CODE_DB_PATH_MAPPING`` if set, otherwise falls back
to ``project_root / ".cocoindex_code"``.
"""
global _db_path_mapping # noqa: PLW0603
if _db_path_mapping is None:
_db_path_mapping = _parse_db_path_mapping()

resolved = project_root.resolve()
for mapping in _db_path_mapping:
if resolved == mapping.source or resolved.is_relative_to(mapping.source):
rel = resolved.relative_to(mapping.source)
return mapping.target / rel
return project_root / _SETTINGS_DIR_NAME


def get_db_path_mappings() -> list[DbPathMapping]:
"""Return the parsed DB path mappings from ``COCOINDEX_CODE_DB_PATH_MAPPING``."""
global _db_path_mapping # noqa: PLW0603
if _db_path_mapping is None:
_db_path_mapping = _parse_db_path_mapping()
return list(_db_path_mapping)


def _reset_db_path_mapping_cache() -> None:
"""Reset the cached mapping (for tests)."""
global _db_path_mapping # noqa: PLW0603
_db_path_mapping = None


def user_settings_dir() -> Path:
"""Return ``~/.cocoindex_code/``.

Respects ``COCOINDEX_CODE_DIR`` env var for overriding the base directory.
"""
import os

override = os.environ.get("COCOINDEX_CODE_DIR")
if override:
return Path(override)
Expand Down
82 changes: 82 additions & 0 deletions tests/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from cocoindex_code.cli import app
from cocoindex_code.client import stop_daemon
from cocoindex_code.settings import (
_reset_db_path_mapping_cache,
default_project_settings,
find_parent_with_marker,
save_project_settings,
Expand Down Expand Up @@ -580,6 +581,87 @@ def test_session_daemon_restart_missing_global_settings() -> None:
assert "User settings not found" in result.output


# ---------------------------------------------------------------------------
# DB path mapping tests
# ---------------------------------------------------------------------------


@pytest.fixture()
def e2e_project_with_db_mapping() -> Iterator[tuple[Path, Path]]:
"""Set up a project with COCOINDEX_CODE_DB_PATH_MAPPING pointing to a separate db dir.

Yields (project_dir, db_base_dir).
"""
base_dir = Path(tempfile.mkdtemp(prefix="ccc_e2e_"))
project_dir = base_dir / "workspace" / "myproject"
project_dir.mkdir(parents=True)
db_base_dir = base_dir / "db-files"
db_base_dir.mkdir()

(project_dir / "main.py").write_text(SAMPLE_MAIN_PY)
(project_dir / ".git").mkdir()

old_env = {
k: os.environ.get(k) for k in ("COCOINDEX_CODE_DIR", "COCOINDEX_CODE_DB_PATH_MAPPING")
}
os.environ["COCOINDEX_CODE_DIR"] = str(base_dir)
workspace = str(base_dir / "workspace")
os.environ["COCOINDEX_CODE_DB_PATH_MAPPING"] = f"{workspace}={db_base_dir}"
_reset_db_path_mapping_cache()
old_cwd = os.getcwd()
os.chdir(project_dir)

try:
yield project_dir, db_base_dir
finally:
os.chdir(project_dir)
runner.invoke(app, ["reset", "--all", "-f"])
stop_daemon()
os.chdir(old_cwd)
_reset_db_path_mapping_cache()
for k, v in old_env.items():
if v is None:
os.environ.pop(k, None)
else:
os.environ[k] = v


def test_session_db_path_mapping(
e2e_project_with_db_mapping: tuple[Path, Path],
) -> None:
"""Init → index → verify databases are in the mapped directory → search works."""
project_dir, db_base_dir = e2e_project_with_db_mapping
mapped_db_dir = db_base_dir / "myproject"

# Init
result = runner.invoke(app, ["init"], catch_exceptions=False)
assert result.exit_code == 0, result.output

# Settings should be in the project dir, NOT the mapped dir
assert (project_dir / ".cocoindex_code" / "settings.yml").exists()

# Index
result = runner.invoke(app, ["index"], catch_exceptions=False)
assert result.exit_code == 0, result.output

# Databases should be in the mapped directory
assert (mapped_db_dir / "target_sqlite.db").exists()
# Databases should NOT be in the project's .cocoindex_code dir
assert not (project_dir / ".cocoindex_code" / "target_sqlite.db").exists()

# Search should work
result = runner.invoke(app, ["search", "fibonacci"], catch_exceptions=False)
assert result.exit_code == 0, result.output
assert "main.py" in result.output

# Reset should clean databases from the mapped dir
result = runner.invoke(app, ["reset", "-f"], catch_exceptions=False)
assert result.exit_code == 0
assert not (mapped_db_dir / "target_sqlite.db").exists()
# Settings still in place
assert (project_dir / ".cocoindex_code" / "settings.yml").exists()


# ---------------------------------------------------------------------------
# Unit tests (not session-based)
# ---------------------------------------------------------------------------
Expand Down
Loading
Loading