Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 24 additions & 26 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ jobs:
with:
containerfiles: |
./benchmarks/Containerfile
image: ghcr.io/sigstore/model-transparency-benchmarks
image: ghcr.io/spencerschrock/model-transparency-benchmarks
tags: "latest ${{ steps.config.outputs.head }}"
archs: amd64
oci: false
Expand All @@ -75,28 +75,26 @@ jobs:
image: ${{ steps.build_image.outputs.image }}
tags: ${{ steps.build_image.outputs.tags }}
registry: ghcr.io
submit-cloud-batch:
needs: publish-benchmark-container
runs-on: ubuntu-latest
permissions:
id-token: 'write'
env:
MODEL: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
TAG: ${{needs.publish-benchmark-container.outputs.head}}
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
ref: ${{needs.publish-benchmark-container.outputs.head}}
- uses: google-github-actions/auth@ba79af03959ebeac9769e648f473a284504d9193 # v2.1.10
with:
workload_identity_provider: projects/306323169285/locations/global/workloadIdentityPools/github-actions-pool/providers/github-actions-provider
service_account: 'model-transparency-gha@sigstore-infra-playground.iam.gserviceaccount.com'
- run: |
export OUTPUT_FILE=$(date --utc +%Y%m%d%H%M%S)_$TAG.json
gcloud batch jobs submit \
--job-prefix=bench \
--project sigstore-infra-playground \
--location us-central1 \
--config - <<EOF
$(envsubst '$TAG','$MODEL','$OUTPUT_FILE' < benchmarks/cloud_batch.json)
EOF
# submit-cloud-batch:
# needs: publish-benchmark-container
# runs-on: ubuntu-latest
# permissions:
# id-token: 'write'
# env:
# TAG: ${{needs.publish-benchmark-container.outputs.head}}
# steps:
# - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
# with:
# ref: ${{needs.publish-benchmark-container.outputs.head}}
# - uses: google-github-actions/auth@ba79af03959ebeac9769e648f473a284504d9193 # v2.1.10
# with:
# workload_identity_provider: projects/306323169285/locations/global/workloadIdentityPools/github-actions-pool/providers/github-actions-provider
# service_account: 'model-transparency-gha@sigstore-infra-playground.iam.gserviceaccount.com'
# - run: |
# gcloud batch jobs submit \
# --job-prefix=bench \
# --project sigstore-infra-playground \
# --location us-central1 \
# --config - <<EOF
# $(envsubst '$TAG' < benchmarks/cloud_batch.json)
# EOF
2 changes: 1 addition & 1 deletion benchmarks/Containerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

FROM python:3.13-slim

RUN python -m pip install --no-cache-dir hatch huggingface_hub[cli]
RUN python -m pip install --no-cache-dir hatch

COPY pyproject.toml LICENSE README.md ./
COPY src ./src
Expand Down
62 changes: 62 additions & 0 deletions benchmarks/cloud_batch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
{
"taskGroups": [
{
"taskSpec": {
"runnables": [
{
"container": {
"imageUri": "ghcr.io/spencerschrock/model-transparency-benchmarks:${TAG}",
"entrypoint": "/bin/sh",
"commands": [
"-c",
"benchmarks/run.sh /mnt/disks/models /mnt/disks/gcs ${TAG}"
]
}
}
],
"computeResource": {
"cpuMilli": 16000,
"memoryMib": 65536
},
"volumes": [
{
"gcs": {
"remotePath": "model-transparency-benchmarks"
},
"mountPath": "/mnt/disks/gcs"
},
{
"deviceName": "models",
"mountPath": "/mnt/disks/models",
"mountOptions": "rw,async"
}
],
"maxRetryCount": 0,
"maxRunDuration": "3600s"
},
"taskCount": 1,
"parallelism": 1
}
],
"allocationPolicy": {
"instances": [
{
"policy": {
"machineType": "c2d-standard-16",
"disks": [
{
"newDisk": {
"sizeGb": 375,
"type": "local-ssd"
},
"deviceName": "models"
}
]
}
}
]
},
"logsPolicy": {
"destination": "CLOUD_LOGGING"
}
}
10 changes: 0 additions & 10 deletions benchmarks/exp_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,6 @@ def build_parser() -> argparse.ArgumentParser:
return parser


def _human_size(size: int) -> str:
if size >= GB:
return str(size / GB) + " GB"
elif size >= MB:
return str(size / MB) + " MB"
elif size >= KB:
return str(size / KB) + " KB"
return str(size) + " B"


def _get_hasher(hash_algorithm: str) -> hashing.StreamingHashEngine:
# TODO: Once Python 3.9 support is deprecated revert to using `match`
if hash_algorithm == "sha256":
Expand Down
22 changes: 22 additions & 0 deletions benchmarks/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash
set -euxo pipefail

MODEL_DIR=$1
OUTPUT_DIR=$2
REVISION=$3
FILENAME_BASE=$OUTPUT_DIR/$(date --utc +%Y%m%d%H%M%S)_$REVISION

for SIZE in 32 256; do
for FILES in 64 512; do
MODEL=${SIZE}gb_${FILES}files
MODEL_PATH=$MODEL_DIR/$MODEL
mkdir -p "$MODEL_PATH"
SIZE_BYTES=$((SIZE * 1024 * 1024 * 1024))
hatch run bench.py3.11:generate dir --root "$MODEL_PATH" -n "$FILES" "$SIZE_BYTES"
hatch run bench.py3.11:python benchmarks/time_serialize.py "$MODEL_PATH" \
--output="${FILENAME_BASE}_${MODEL}.json"
rm -r "${MODEL_PATH}"
done
done