diff --git a/.github/workflows/prek.yml b/.github/workflows/prek.yml index 3565d162..467bb87f 100644 --- a/.github/workflows/prek.yml +++ b/.github/workflows/prek.yml @@ -6,22 +6,80 @@ on: branches: [main] permissions: - contents: read + contents: write + +env: + CI_BASE_IMAGE: "pytorch/pytorch:2.9.0-cuda12.8-cudnn9-devel" + CI_PYTHON_MM: "3.11" + CI_UV_CACHE_RELEASE_TAG: "prek-uv-cache" + CI_UV_CACHE_ASSET_PREFIX: "prek-uv-cache" + UV_CACHE_DIR: "/root/.cache/uv" + UV_LINK_MODE: "copy" + TORCH_CUDA_ARCH_LIST: "8.0" jobs: - quality-checks: + cache-status: runs-on: art-large-runner + outputs: + cache-hit: ${{ steps.check.outputs.cache-hit }} + fingerprint: ${{ steps.fingerprint.outputs.fingerprint }} + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Compute expected uv cache fingerprint + id: fingerprint + run: | + fp="$(python3 scripts/ci/compute_uv_fingerprint.py \ + --pyproject pyproject.toml \ + --uv-lock uv.lock \ + --base-image "${CI_BASE_IMAGE}" \ + --python-mm "${CI_PYTHON_MM}")" + echo "fingerprint=${fp}" >> "${GITHUB_OUTPUT}" + echo "Expected uv cache fingerprint: ${fp}" + + - name: Check if uv cache exists + id: check + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + fingerprint="${{ steps.fingerprint.outputs.fingerprint }}" + part_prefix="${CI_UV_CACHE_ASSET_PREFIX}-${fingerprint}.tar.zst.part-" + release_api="https://api.github.com/repos/${GITHUB_REPOSITORY}/releases/tags/${CI_UV_CACHE_RELEASE_TAG}" + + release_json="$(curl -fsSL \ + -H "Authorization: Bearer ${GITHUB_TOKEN}" \ + -H "Accept: application/vnd.github+json" \ + "${release_api}" || true)" + + if [ -z "${release_json}" ]; then + echo "Cache release '${CI_UV_CACHE_RELEASE_TAG}' not found." + echo "cache-hit=false" >> "${GITHUB_OUTPUT}" + exit 0 + fi + + hit="$(RELEASE_JSON="${release_json}" PART_PREFIX="${part_prefix}" python3 -c " + import json, os, re + payload = json.loads(os.environ['RELEASE_JSON']) + prefix = os.environ['PART_PREFIX'] + pattern = re.compile(r'^' + re.escape(prefix) + r'(\d{3})$') + parts = sorted( + int(m.group(1)) + for a in payload.get('assets', []) + for m in [pattern.match(a.get('name', ''))] + if m and a.get('id') is not None + ) + print('true' if parts and parts == list(range(len(parts))) else 'false') + ")" + echo "cache-hit=${hit}" >> "${GITHUB_OUTPUT}" + echo "Cache hit: ${hit}" + + build-cache: + needs: cache-status + if: needs.cache-status.outputs.cache-hit != 'true' + runs-on: art-cache-builder container: image: pytorch/pytorch:2.9.0-cuda12.8-cudnn9-devel - env: - CI_BASE_IMAGE: "pytorch/pytorch:2.9.0-cuda12.8-cudnn9-devel" - CI_PYTHON_MM: "3.11" - CI_UV_CACHE_RELEASE_TAG: "prek-uv-cache" - CI_UV_CACHE_ASSET_PREFIX: "prek-uv-cache" - UV_CACHE_DIR: "/root/.cache/uv" - UV_LINK_MODE: "copy" - TORCH_CUDA_ARCH_LIST: "8.0" - steps: - name: Install CI dependencies run: | @@ -31,6 +89,17 @@ jobs: curl -LsSf https://astral.sh/uv/install.sh | sh echo "/root/.local/bin" >> "${GITHUB_PATH}" + - name: Install gh CLI + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + GH_DL_URL="$(curl -fsSL \ + -H "Authorization: Bearer ${GH_TOKEN}" \ + https://api.github.com/repos/cli/cli/releases/latest \ + | python3 -c "import json,sys;r=json.load(sys.stdin);print([a['browser_download_url'] for a in r['assets'] if a['name'].endswith('_linux_amd64.tar.gz')][0])")" + curl -fsSL "${GH_DL_URL}" | tar xz --strip-components=1 -C /usr/local + gh version + - name: Checkout code uses: actions/checkout@v4 @@ -38,23 +107,42 @@ jobs: run: | git config --global --add safe.directory "${GITHUB_WORKSPACE}" - - name: Compute expected uv cache fingerprint - id: expected-uv-fingerprint + - name: Build and upload uv cache + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | - fp="$(python3 scripts/ci/compute_uv_fingerprint.py \ - --pyproject pyproject.toml \ - --uv-lock uv.lock \ + bash scripts/ci/build_and_push_uv_cache.sh \ --base-image "${CI_BASE_IMAGE}" \ - --python-mm "${CI_PYTHON_MM}")" - echo "fingerprint=${fp}" >> "${GITHUB_OUTPUT}" - echo "Expected uv cache fingerprint: ${fp}" + --python-mm "${CI_PYTHON_MM}" + + quality-checks: + needs: [cache-status, build-cache] + if: ${{ !failure() && !cancelled() }} + runs-on: art-large-runner + container: + image: pytorch/pytorch:2.9.0-cuda12.8-cudnn9-devel + steps: + - name: Install CI dependencies + run: | + apt-get update + apt-get install -y --no-install-recommends ca-certificates curl git zstd + rm -rf /var/lib/apt/lists/* + curl -LsSf https://astral.sh/uv/install.sh | sh + echo "/root/.local/bin" >> "${GITHUB_PATH}" + + - name: Checkout code + uses: actions/checkout@v4 + + - name: Mark workspace as a safe git directory + run: | + git config --global --add safe.directory "${GITHUB_WORKSPACE}" - name: Restore prebuilt uv cache env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | release_api="https://api.github.com/repos/${GITHUB_REPOSITORY}/releases/tags/${CI_UV_CACHE_RELEASE_TAG}" - fingerprint="${{ steps.expected-uv-fingerprint.outputs.fingerprint }}" + fingerprint="${{ needs.cache-status.outputs.fingerprint }}" part_prefix="${CI_UV_CACHE_ASSET_PREFIX}-${fingerprint}.tar.zst.part-" release_json="$(curl -fsSL \ @@ -64,14 +152,12 @@ jobs: if [ -z "${release_json}" ]; then echo "::error::Missing cache release '${CI_UV_CACHE_RELEASE_TAG}'." - echo "::error::Build and upload cache with: bash scripts/ci/build_and_push_uv_cache.sh" exit 1 fi part_selection_file="/tmp/uv-cache-part-selection.txt" if ! RELEASE_JSON="${release_json}" PART_PREFIX="${part_prefix}" python3 -c "import json, os, re, sys; payload=json.loads(os.environ['RELEASE_JSON']); part_prefix=os.environ['PART_PREFIX']; pattern=re.compile(r'^' + re.escape(part_prefix) + r'(\\d{3})$'); parts=[]; [parts.append((int(m.group(1)), int(a.get('id')), a.get('name'))) for a in payload.get('assets', []) for m in [pattern.match(a.get('name', ''))] if m and a.get('id') is not None]; parts.sort(key=lambda x: x[0]); indices=[p[0] for p in parts]; expected=list(range(len(parts))); print('\\n'.join(f'{asset_id} {name}' for _, asset_id, name in parts)) if parts and indices == expected else (_ for _ in ()).throw(SystemExit(2 if not parts else 3))" > "${part_selection_file}"; then echo "::error::No complete uv cache part set found for prefix '${part_prefix}'." - echo "::error::Build and upload cache with: bash scripts/ci/build_and_push_uv_cache.sh" exit 1 fi diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index dca91399..fc589c13 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -37,35 +37,19 @@ uv run prek run pytest These checks are automatically run in CI for all pull requests. If your PR fails these checks, re-run the corresponding `prek` hook locally and commit any fixes. -### CI uv Cache Refresh +### CI uv Cache The PR `prek` workflow uses a prebuilt full `uv` cache (stored as a GitHub release asset) to avoid rebuilding heavy dependencies on every run. -To refresh the cache after dependency changes, ensure your branch is rebased or merged with main, or checkout the PR merge branch, then run: +The cache is keyed by a fingerprint computed from `pyproject.toml`, `uv.lock`, the base Docker image, and the Python version. When dependencies change, the fingerprint changes and CI automatically rebuilds the cache using Docker Buildx and uploads it for future runs. The first CI run after a dependency change will be slower while the cache is built. -```bash -bash scripts/ci/build_and_push_uv_cache.sh -``` - -This command builds a full cache archive locally (using `uv sync --frozen --all-extras --group dev --no-install-project`) and uploads a fingerprinted part set: - -- `prek-uv-cache-.tar.zst.part-000` -- `prek-uv-cache-.tar.zst.part-001` -- ... - -The script also prunes old immutable cache assets (keeps newest 4 by default). -It requires GitHub CLI authentication (`gh auth login`) and should be run in an environment compatible with CI (same base CUDA image/toolchain). - -You can override native-build parallelism while preparing cache: +To manually rebuild the cache (e.g., if the automatic build fails), run: ```bash -bash scripts/ci/build_and_push_uv_cache.sh --build-jobs 2 +bash scripts/ci/build_and_push_uv_cache.sh ``` -By default, `--build-jobs auto` is used and resolves from available CPU and memory. -By default, cache parts are split at `1900 MiB`; override with `--part-size-mb ` if needed. - -CI computes the expected cache fingerprint from `pyproject.toml`, `uv.lock`, base image, Python version, and cache asset layout contract. If no matching cache part set exists, CI fails fast and tells you to refresh cache with the script above. +This requires GitHub CLI authentication (`gh auth login`) and should be run in an environment compatible with CI (same base CUDA image/toolchain). ### Release Process