Skip to content

Commit c566568

Browse files
use generated models instead of real models
Signed-off-by: Spencer Schrock <[email protected]>
1 parent 85869f3 commit c566568

File tree

4 files changed

+25
-13
lines changed

4 files changed

+25
-13
lines changed

.github/workflows/bench.yml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,6 @@ jobs:
8181
permissions:
8282
id-token: 'write'
8383
env:
84-
MODEL: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B
8584
TAG: ${{needs.publish-benchmark-container.outputs.head}}
8685
steps:
8786
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
@@ -92,11 +91,10 @@ jobs:
9291
workload_identity_provider: projects/306323169285/locations/global/workloadIdentityPools/github-actions-pool/providers/github-actions-provider
9392
service_account: 'model-transparency-gha@sigstore-infra-playground.iam.gserviceaccount.com'
9493
- run: |
95-
export OUTPUT_FILE=$(date --utc +%Y%m%d%H%M%S)_$TAG.json
9694
gcloud batch jobs submit \
9795
--job-prefix=bench \
9896
--project sigstore-infra-playground \
9997
--location us-central1 \
10098
--config - <<EOF
101-
$(envsubst '$TAG','$MODEL','$OUTPUT_FILE' < benchmarks/cloud_batch.json)
99+
$(envsubst '$TAG' < benchmarks/cloud_batch.json)
102100
EOF

benchmarks/Containerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
FROM python:3.13-slim
1616

17-
RUN python -m pip install --no-cache-dir hatch huggingface_hub[cli]
17+
RUN python -m pip install --no-cache-dir hatch
1818

1919
COPY pyproject.toml LICENSE README.md ./
2020
COPY src ./src

benchmarks/cloud_batch.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"entrypoint": "/bin/sh",
1010
"commands": [
1111
"-c",
12-
"benchmarks/run.sh ${MODEL} /mnt/disks/models /mnt/disks/gcs/${OUTPUT_FILE}"
12+
"benchmarks/run.sh /mnt/disks/models /mnt/disks/gcs ${TAG}"
1313
]
1414
}
1515
}

benchmarks/run.sh

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,25 @@
11
#!/bin/bash
22
set -euxo pipefail
33

4-
MODEL=$1
5-
MODEL_DIR=$2
6-
MODEL_PATH=$MODEL_DIR/$(echo $MODEL | cut --delimiter='/' --fields=2-)
7-
OUTPUT_FILE=$3
8-
9-
huggingface-cli download $MODEL --local-dir "$MODEL_PATH"
10-
hatch run bench.py3.11:python benchmarks/time_serialize.py "$MODEL_PATH" \
11-
--output=$OUTPUT_FILE
4+
MODEL_DIR=$1
5+
OUTPUT_DIR=$2
6+
REVISION=$3
7+
FILENAME_BASE=$OUTPUT_DIR/$(date --utc +%Y%m%d%H%M%S)_$REVISION
8+
9+
for SIZE in 32 48 128; do
10+
MODEL=${SIZE}gb
11+
MODEL_PATH=$MODEL_DIR/$MODEL
12+
mkdir -p MODEL_PATH
13+
14+
# simulate a handful of small metadata files in the repository
15+
hatch run bench.py3.11:generate dir --root "$MODEL_PATH" -n 8 16384
16+
# followed by model shards which are 8GiB each
17+
N=$((${SIZE}/8))
18+
SIZE_BYTES=$(($SIZE * 1024 * 1024 * 1024))
19+
hatch run bench.py3.11:generate dir --root "$MODEL_PATH" -n "$N" "$SIZE_BYTES"
20+
21+
hatch run bench.py3.11:python benchmarks/time_serialize.py "$MODEL_PATH" \
22+
--output="${FILENAME_BASE}_${MODEL}.json"
23+
done
24+
25+

0 commit comments

Comments
 (0)