Skip to content

Commit 20e5feb

Browse files
bench: add workflow to build benchmark container (#484)
* add containerfile for benchmark image Signed-off-by: Spencer Schrock <[email protected]> * add benchmark trigger workflow This is modeled after the OpenSSF Scorecard `scdiff` workflow, which looks for comments from repository members. This requires the developer triggering the benchmark to have their membership in the Sigstore organization public. This approach gains flexibility compared to a label trigger as additional arguments can be provided after the /bench command. Signed-off-by: Spencer Schrock <[email protected]> * add a timing wrapper around model serialization Signed-off-by: Spencer Schrock <[email protected]> * add runner script and job configuration Signed-off-by: Spencer Schrock <[email protected]> * use generated models instead of real models Signed-off-by: Spencer Schrock <[email protected]> * matrix small/large model with few/many files Signed-off-by: Spencer Schrock <[email protected]> * add workflow_dispatch trigger Signed-off-by: Spencer Schrock <[email protected]> * add terminating newline Signed-off-by: Spencer Schrock <[email protected]> --------- Signed-off-by: Spencer Schrock <[email protected]>
1 parent d725654 commit 20e5feb

File tree

6 files changed

+289
-0
lines changed

6 files changed

+289
-0
lines changed

.github/workflows/bench.yml

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
name: model_signing benchmarks
2+
on:
3+
issue_comment:
4+
types: [created]
5+
workflow_dispatch:
6+
7+
permissions: {}
8+
9+
jobs:
10+
publish-benchmark-container:
11+
if: ${{ github.event_name == 'workflow_dispatch' }} || ${{ (github.event.issue.pull_request) && (startsWith(github.event.comment.body, '/bench')) }}
12+
runs-on: [ubuntu-latest]
13+
permissions:
14+
packages: write
15+
outputs:
16+
head: ${{ steps.config.outputs.head }}
17+
steps:
18+
- name: Validate and configure benchmark
19+
id: config
20+
env:
21+
COMMENT_BODY: ${{ github.event.comment.body }}
22+
uses: actions/github-script@60a0d83039c74a4aee543508d2ffcb1c3799cdea # v7.0.1
23+
with:
24+
script: |
25+
if (context.eventName === 'workflow_dispatch') {
26+
core.setOutput('head', context.sha)
27+
return
28+
}
29+
30+
const allowedAssociations = ["COLLABORATOR", "MEMBER", "OWNER"];
31+
authorAssociation = '${{ github.event.comment.author_association }}'
32+
if (!allowedAssociations.includes(authorAssociation)) {
33+
core.setFailed("You don't have access to run the benchmarks");
34+
return
35+
}
36+
37+
const response = await github.rest.pulls.get({
38+
owner: context.repo.owner,
39+
repo: context.repo.repo,
40+
pull_number: context.issue.number,
41+
})
42+
43+
// avoid race condition between comment and fetching PR head sha
44+
const commentTime = new Date('${{ github.event.comment.created_at }}');
45+
const prTime = new Date(response.data.head.repo.pushed_at)
46+
if (prTime >= commentTime) {
47+
core.setFailed("The PR may have been updated since the benchmark request, " +
48+
"please review any changes and relaunch if safe.");
49+
return
50+
}
51+
52+
core.setOutput('head', response.data.head.sha)
53+
54+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
55+
with:
56+
ref: ${{ steps.config.outputs.head }}
57+
58+
- name: Build Image
59+
id: build_image
60+
uses: redhat-actions/buildah-build@7a95fa7ee0f02d552a32753e7414641a04307056 # v2.13
61+
with:
62+
containerfiles: |
63+
./benchmarks/Containerfile
64+
image: ghcr.io/sigstore/model-transparency-benchmarks
65+
tags: "latest ${{ steps.config.outputs.head }}"
66+
archs: amd64
67+
oci: false
68+
69+
- name: Login to GitHub Container Registry
70+
uses: docker/login-action@74a5d142397b4f367a81961eba4e8cd7edddf772 # v3.4.0
71+
id: registry_login
72+
with:
73+
registry: ghcr.io
74+
username: ${{ github.actor }}
75+
password: ${{ secrets.GITHUB_TOKEN }}
76+
77+
- name: Push To GHCR
78+
uses: redhat-actions/push-to-registry@5ed88d269cf581ea9ef6dd6806d01562096bee9c # v2.8
79+
id: push
80+
with:
81+
image: ${{ steps.build_image.outputs.image }}
82+
tags: ${{ steps.build_image.outputs.tags }}
83+
registry: ghcr.io
84+
submit-cloud-batch:
85+
needs: publish-benchmark-container
86+
runs-on: ubuntu-latest
87+
permissions:
88+
id-token: 'write'
89+
env:
90+
TAG: ${{needs.publish-benchmark-container.outputs.head}}
91+
steps:
92+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
93+
with:
94+
ref: ${{needs.publish-benchmark-container.outputs.head}}
95+
- uses: google-github-actions/auth@ba79af03959ebeac9769e648f473a284504d9193 # v2.1.10
96+
with:
97+
workload_identity_provider: projects/306323169285/locations/global/workloadIdentityPools/github-actions-pool/providers/github-actions-provider
98+
service_account: 'model-transparency-gha@sigstore-infra-playground.iam.gserviceaccount.com'
99+
- run: |
100+
gcloud batch jobs submit \
101+
--job-prefix=bench \
102+
--project sigstore-infra-playground \
103+
--location us-central1 \
104+
--config - <<EOF
105+
$(envsubst '$TAG' < benchmarks/cloud_batch.json)
106+
EOF

benchmarks/Containerfile

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Copyright 2025 The Sigstore Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
FROM python:3.13-slim
16+
17+
RUN python -m pip install --no-cache-dir hatch
18+
19+
COPY pyproject.toml LICENSE README.md ./
20+
COPY src ./src
21+
COPY benchmarks ./benchmarks
22+
23+
RUN hatch env create bench.py3.11
24+
25+
ENTRYPOINT [ "bash" ]

benchmarks/cloud_batch.json

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
{
2+
"taskGroups": [
3+
{
4+
"taskSpec": {
5+
"runnables": [
6+
{
7+
"container": {
8+
"imageUri": "ghcr.io/sigstore/model-transparency-benchmarks:${TAG}",
9+
"entrypoint": "/bin/sh",
10+
"commands": [
11+
"-c",
12+
"benchmarks/run.sh /mnt/disks/models /mnt/disks/gcs ${TAG}"
13+
]
14+
}
15+
}
16+
],
17+
"computeResource": {
18+
"cpuMilli": 16000,
19+
"memoryMib": 65536
20+
},
21+
"volumes": [
22+
{
23+
"gcs": {
24+
"remotePath": "model-transparency-benchmarks"
25+
},
26+
"mountPath": "/mnt/disks/gcs"
27+
},
28+
{
29+
"deviceName": "models",
30+
"mountPath": "/mnt/disks/models",
31+
"mountOptions": "rw,async"
32+
}
33+
],
34+
"maxRetryCount": 0,
35+
"maxRunDuration": "7200s"
36+
},
37+
"taskCount": 1,
38+
"parallelism": 1
39+
}
40+
],
41+
"allocationPolicy": {
42+
"instances": [
43+
{
44+
"policy": {
45+
"machineType": "c2d-standard-16",
46+
"disks": [
47+
{
48+
"newDisk": {
49+
"sizeGb": 375,
50+
"type": "local-ssd"
51+
},
52+
"deviceName": "models"
53+
}
54+
]
55+
}
56+
}
57+
]
58+
},
59+
"logsPolicy": {
60+
"destination": "CLOUD_LOGGING"
61+
}
62+
}

benchmarks/run.sh

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#!/bin/bash
2+
set -euxo pipefail
3+
4+
MODEL_DIR=$1
5+
OUTPUT_DIR=$2
6+
REVISION=$3
7+
FILENAME_BASE=$OUTPUT_DIR/$(date --utc +%Y%m%d%H%M%S)_$REVISION
8+
9+
for SIZE in 32 256; do
10+
for FILES in 64 512; do
11+
MODEL=${SIZE}gb_${FILES}files
12+
MODEL_PATH=$MODEL_DIR/$MODEL
13+
mkdir -p "$MODEL_PATH"
14+
SIZE_BYTES=$((SIZE * 1024 * 1024 * 1024))
15+
hatch run bench.py3.11:generate dir --root "$MODEL_PATH" -n "$FILES" "$SIZE_BYTES"
16+
hatch run bench.py3.11:python benchmarks/time_serialize.py "$MODEL_PATH" \
17+
--output="${FILENAME_BASE}_${MODEL}.json"
18+
rm -r "${MODEL_PATH}"
19+
done
20+
done
21+
22+

benchmarks/time_serialize.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# Copyright 2025 The Sigstore Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
16+
"""Script for timing model serialization benchmarks."""
17+
18+
import argparse
19+
import json
20+
import sys
21+
import time
22+
23+
import cpuinfo
24+
import psutil
25+
import serialize
26+
27+
28+
def build_parser() -> argparse.ArgumentParser:
29+
"""Builds the command line parser to benchmark serializing models."""
30+
parser = argparse.ArgumentParser(description="model benchmark data")
31+
32+
parser.add_argument("path", help="path to model")
33+
34+
parser.add_argument(
35+
"--repeat",
36+
help="how many times to repeat each model",
37+
type=int,
38+
default=6,
39+
)
40+
41+
parser.add_argument("--output", "-o", help="path for result file")
42+
43+
return parser
44+
45+
46+
if __name__ == "__main__":
47+
args = build_parser().parse_args()
48+
49+
serialize_args = serialize.build_parser().parse_args(
50+
[args.path, "--use_shards"]
51+
)
52+
53+
results = dict()
54+
results["model"] = args.path
55+
results["ram"] = psutil.virtual_memory().total
56+
57+
times = list()
58+
for _ in range(args.repeat):
59+
st = time.time()
60+
payload = serialize.run(serialize_args)
61+
en = time.time()
62+
times.append(en - st)
63+
64+
results["times"] = times
65+
results["cpu"] = cpuinfo.get_cpu_info()
66+
67+
if args.output:
68+
with open(args.output, "w", encoding="utf-8") as f:
69+
json.dump(results, f, ensure_ascii=False, indent=4)
70+
else:
71+
json.dump(results, sys.stdout, ensure_ascii=False, indent=4)

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ Use `hatch run +py=3... bench:chunk ${args}` to benchmark the chunk size paramet
9797
"""
9898
extra-dependencies = [
9999
"numpy",
100+
"psutil",
101+
"py-cpuinfo",
100102
]
101103

102104
[[tool.hatch.envs.bench.matrix]]
@@ -125,6 +127,7 @@ description = """Custom environment for pytype.
125127
Use `hatch run type:check` to check types.
126128
"""
127129
extra-dependencies = [
130+
"py-cpuinfo",
128131
"pytest",
129132
"pytype",
130133
]

0 commit comments

Comments
 (0)