spencerschrock · spencerschrock · Jul 8, 2025 · Jul 18, 2025 · Jul 21, 2025 · Jul 18, 2025
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -55,7 +55,7 @@ jobs:
         with:
           containerfiles: |
             ./benchmarks/Containerfile
-          image: ghcr.io/sigstore/model-transparency-benchmarks
+          image: ghcr.io/spencerschrock/model-transparency-benchmarks
           tags: "latest ${{ steps.config.outputs.head }}"
           archs: amd64
           oci: false
@@ -75,28 +75,26 @@ jobs:
           image: ${{ steps.build_image.outputs.image }}
           tags: ${{ steps.build_image.outputs.tags }}
           registry: ghcr.io
-  submit-cloud-batch:
-    needs: publish-benchmark-container
-    runs-on: ubuntu-latest
-    permissions:
-      id-token: 'write'
-    env:
-      MODEL: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
-      TAG: ${{needs.publish-benchmark-container.outputs.head}}
-    steps:
-      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
-        with:
-          ref: ${{needs.publish-benchmark-container.outputs.head}}
-      - uses: google-github-actions/auth@ba79af03959ebeac9769e648f473a284504d9193 # v2.1.10
-        with:
-          workload_identity_provider: projects/306323169285/locations/global/workloadIdentityPools/github-actions-pool/providers/github-actions-provider
-          service_account: 'model-transparency-gha@sigstore-infra-playground.iam.gserviceaccount.com'
-      - run: |
-          export OUTPUT_FILE=$(date --utc +%Y%m%d%H%M%S)_$TAG.json
-          gcloud batch jobs submit \
-            --job-prefix=bench \
-            --project sigstore-infra-playground \
-            --location us-central1 \
-            --config - <<EOF
-            $(envsubst '$TAG','$MODEL','$OUTPUT_FILE' < benchmarks/cloud_batch.json)
-            EOF
+  # submit-cloud-batch:
+  #   needs: publish-benchmark-container
+  #   runs-on: ubuntu-latest
+  #   permissions:
+  #     id-token: 'write'
+  #   env:
+  #     TAG: ${{needs.publish-benchmark-container.outputs.head}}
+  #   steps:
+  #     - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+  #       with:
+  #         ref: ${{needs.publish-benchmark-container.outputs.head}}
+  #     - uses: google-github-actions/auth@ba79af03959ebeac9769e648f473a284504d9193 # v2.1.10
+  #       with:
+  #         workload_identity_provider: projects/306323169285/locations/global/workloadIdentityPools/github-actions-pool/providers/github-actions-provider
+  #         service_account: 'model-transparency-gha@sigstore-infra-playground.iam.gserviceaccount.com'
+  #     - run: |
+  #         gcloud batch jobs submit \
+  #           --job-prefix=bench \
+  #           --project sigstore-infra-playground \
+  #           --location us-central1 \
+  #           --config - <<EOF
+  #           $(envsubst '$TAG' < benchmarks/cloud_batch.json)
+  #           EOF
diff --git a/benchmarks/Containerfile b/benchmarks/Containerfile
@@ -14,7 +14,7 @@
 
 FROM python:3.13-slim
 
-RUN python -m pip install --no-cache-dir hatch huggingface_hub[cli]
+RUN python -m pip install --no-cache-dir hatch
 
 COPY pyproject.toml LICENSE README.md ./
 COPY src ./src

diff --git a/benchmarks/cloud_batch.json b/benchmarks/cloud_batch.json
@@ -0,0 +1,62 @@
+{
+    "taskGroups": [
+        {
+            "taskSpec": {
+                "runnables": [
+                    {
+                        "container": {
+                            "imageUri": "ghcr.io/spencerschrock/model-transparency-benchmarks:${TAG}",
+                            "entrypoint": "/bin/sh",
+                            "commands": [
+                                "-c",
+                                "benchmarks/run.sh /mnt/disks/models /mnt/disks/gcs ${TAG}"
+                            ]
+                        }
+                    }
+                ],
+                "computeResource": {
+                    "cpuMilli": 16000,
+                    "memoryMib": 65536
+                },
+                "volumes": [
+                    {
+                    "gcs": {
+                        "remotePath": "model-transparency-benchmarks"
+                    },
+                    "mountPath": "/mnt/disks/gcs"
+                    },
+                    {
+                        "deviceName": "models",
+                        "mountPath": "/mnt/disks/models",
+                        "mountOptions": "rw,async"
+                    }
+                ],
+                "maxRetryCount": 0,
+                "maxRunDuration": "3600s"
+            },
+            "taskCount": 1,
+            "parallelism": 1
+        }
+    ],
+    "allocationPolicy": {
+        "instances": [
+            {
+                "policy": {
+                    "machineType": "c2d-standard-16",
+                    "disks": [
+                        {
+                            "newDisk": {
+                                "sizeGb": 375,
+                                "type": "local-ssd"
+                            },
+                            "deviceName": "models"
+                        }
+                    ]
+                }
+            }
+        ]
+    },
+    "logsPolicy": {
+        "destination": "CLOUD_LOGGING"
+    }
+}
diff --git a/benchmarks/exp_hash.py b/benchmarks/exp_hash.py
@@ -58,16 +58,6 @@ def build_parser() -> argparse.ArgumentParser:
     return parser
 
 
-def _human_size(size: int) -> str:
-    if size >= GB:
-        return str(size / GB) + " GB"
-    elif size >= MB:
-        return str(size / MB) + " MB"
-    elif size >= KB:
-        return str(size / KB) + " KB"
-    return str(size) + " B"
-
-
 def _get_hasher(hash_algorithm: str) -> hashing.StreamingHashEngine:
     # TODO: Once Python 3.9 support is deprecated revert to using `match`
     if hash_algorithm == "sha256":

diff --git a/benchmarks/run.sh b/benchmarks/run.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+set -euxo pipefail
+
+MODEL_DIR=$1
+OUTPUT_DIR=$2
+REVISION=$3
+FILENAME_BASE=$OUTPUT_DIR/$(date --utc +%Y%m%d%H%M%S)_$REVISION
+
+for SIZE in 32 256; do
+    for FILES in 64 512; do
+        MODEL=${SIZE}gb_${FILES}files
+        MODEL_PATH=$MODEL_DIR/$MODEL
+        mkdir -p "$MODEL_PATH"
+        SIZE_BYTES=$((SIZE * 1024 * 1024 * 1024))
+        hatch run bench.py3.11:generate dir --root "$MODEL_PATH" -n "$FILES" "$SIZE_BYTES"
+        hatch run bench.py3.11:python benchmarks/time_serialize.py "$MODEL_PATH" \
+            --output="${FILENAME_BASE}_${MODEL}.json"
+        rm -r "${MODEL_PATH}"
+    done
+done
+
+