diff --git a/.circleci/config.yml b/.circleci/config.yml deleted file mode 100644 index a841c1584..000000000 --- a/.circleci/config.yml +++ /dev/null @@ -1,81 +0,0 @@ -version: 2.1 - -orbs: - python: circleci/python@1.4.0 - -jobs: - linting: - executor: python/default - steps: - - checkout - - restore_cache: - key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }} - - run: - command: | - sudo apt update && sudo apt install libcurl4-openssl-dev - name: Install curl-config from Ubuntu APT - - run: - command: | - python3 install.py --aws --azure --gcp --no-local - name: Install pip dependencies - - run: - command: | - . python-venv/bin/activate - black sebs --check --config .black.toml - name: Python code formatting with black - - run: - command: | - . python-venv/bin/activate - flake8 sebs --config=.flake8.cfg --tee --output-file flake-reports - name: Python code lint with flake8 - - run: - command: | - . python-venv/bin/activate - mypy sebs --config-file=.mypy.ini - name: Python static code verification with mypy - - store_artifacts: - path: flake-reports - destination: flake-reports - test-aws: - executor: python/default - steps: - - checkout - - setup_remote_docker - - restore_cache: - key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }} - - run: - command: | - if [[ -d $HOME/docker ]]; - then - ls $HOME/docker/*.tar.gz | xargs -I {file} sh -c "zcat {file} | docker load"; - else - docker pull mcopik/serverless-benchmarks:build.aws.python.3.7 - docker pull mcopik/serverless-benchmarks:build.aws.nodejs.12.x - fi - name: Load Docker images - - run: - command: | - python3 install.py --aws - name: Install pip dependencies - - run: - command: | - mkdir -p $HOME/docker - docker images mcopik/serverless-benchmarks --filter='dangling=false' --format '{{.Repository}}:{{.Tag}} {{.ID}}' |\ - xargs -n 2 -t sh -c 'test -e $HOME/docker/$1.tar.gz || docker save $0 | gzip -2 > $HOME/docker/$1.tar.gz' - name: Save Docker images - - save_cache: - key: deps1-{{ .Branch }}-{{ checksum "requirements.txt" }} - paths: - - "sebs-virtualenv" - - $HOME/docker - - run: - command: | - . sebs-virtualenv/bin/activate - tests/test_runner.py --deployment aws - name: Execute AWS tests - -workflows: - main: - jobs: - - linting - diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 000000000..1043be62e --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,55 @@ +name: Lint + +on: + push: + pull_request: + +jobs: + linting: + runs-on: ubuntu-latest + + steps: + - name: Check out code + uses: actions/checkout@v4 + + - name: Set up Python + id: setup-python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Cache virtualenv + uses: actions/cache@v4 + with: + path: python-venv + key: venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('requirements.txt') }}-${{ github.ref_name }} + restore-keys: | + venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('requirements.txt') }}- + venv-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}- + + - name: Install system packages + run: | + sudo apt-get update + sudo apt-get install -y libcurl4-openssl-dev + + - name: Install Python dependencies (via install.py) + run: | + python3 install.py --no-aws --no-azure --no-gcp --no-openwhisk --no-local + + - name: Black (check) + run: | + . python-venv/bin/activate + black benchmarks --check --config .black.toml + + - name: Flake8 (lint) + run: | + . python-venv/bin/activate + # write to file and echo to stdout (requires flake8 with --tee support) + flake8 benchmarks --config=.flake8.cfg --tee --output-file flake-reports + + - name: Upload flake report + if: always() + uses: actions/upload-artifact@v4 + with: + name: flake-reports + path: flake-reports diff --git a/.gitmodules b/.gitmodules index 4feae9bfb..c33a17880 100644 --- a/.gitmodules +++ b/.gitmodules @@ -3,4 +3,4 @@ url = https://github.com/mcopik/pypapi.git [submodule "benchmarks-data"] path = benchmarks-data - url = https://github.com/spcl/serverless-benchmarks-data.git + url = https://github.com/McLavish/serverless-benchmarks-data-dphpc.git diff --git a/.mypy.ini b/.mypy.ini index e202650ed..636105bfa 100644 --- a/.mypy.ini +++ b/.mypy.ini @@ -3,6 +3,9 @@ [mypy-docker] ignore_missing_imports = True +[mypy-docker.*] +ignore_missing_imports = True + [mypy-tzlocal] ignore_missing_imports = True diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 000000000..58f8adb8d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,30 @@ +# .pre-commit-config.yaml +repos: + - repo: local + hooks: + - id: flake8-local + name: flake8 (project env) + language: python + additional_dependencies: ["flake8==7.1.1"] + entry: flake8 + args: ["--config=.flake8.cfg"] + types: [python] + files: ^(sebs/|benchmarks/) + - repo: local + hooks: + - id: black-check-local + name: black --check (project env) + language: python + additional_dependencies: ["black==22.8.0"] + entry: black + args: ["--config=.black.toml", "--check", "--diff"] + types: [python] + files: ^(sebs/|benchmarks/) + # - repo: local + # hooks: + # - id: mypy-local + # name: mypy (project venv) + # language: system + # entry: bash -lc 'python -m mypy --config-file=.mypy.ini sebs' + # types: [python] + diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 000000000..127ae8a76 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,15 @@ +{ + "[python]": { + "editor.defaultFormatter": "ms-python.black-formatter", + "editor.formatOnSave": true + }, + + "black-formatter.importStrategy": "fromEnvironment", + "black-formatter.path": [], + "black-formatter.args": ["--config=.black.toml"], + + "flake8.importStrategy": "fromEnvironment", + "flake8.path": [], + "flake8.args": ["--config=.flake8.cfg"], + "flake8.enabled": true +} diff --git a/benchmarks-data b/benchmarks-data index 6a17a460f..25c2bb40b 160000 --- a/benchmarks-data +++ b/benchmarks-data @@ -1 +1 @@ -Subproject commit 6a17a460f289e166abb47ea6298fb939e80e8beb +Subproject commit 25c2bb40b8bde342395534b534ba62f8f0ff3549 diff --git a/benchmarks/000.microbenchmarks/010.sleep/input.py b/benchmarks/000.microbenchmarks/010.sleep/input.py index 041d2ba7f..af0427a6c 100644 --- a/benchmarks/000.microbenchmarks/010.sleep/input.py +++ b/benchmarks/000.microbenchmarks/010.sleep/input.py @@ -1,12 +1,11 @@ +size_generators = {"test": 1, "small": 100, "large": 1000} -size_generators = { - 'test' : 1, - 'small' : 100, - 'large': 1000 -} def buckets_count(): return (0, 0) -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - return { 'sleep': size_generators[size] } + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"sleep": size_generators[size]} diff --git a/benchmarks/000.microbenchmarks/010.sleep/python/function.py b/benchmarks/000.microbenchmarks/010.sleep/python/function.py index 7dda59a57..64be15557 100644 --- a/benchmarks/000.microbenchmarks/010.sleep/python/function.py +++ b/benchmarks/000.microbenchmarks/010.sleep/python/function.py @@ -1,9 +1,9 @@ - from time import sleep + def handler(event): # start timing - sleep_time = event.get('sleep') + sleep_time = event.get("sleep") sleep(sleep_time) - return { 'result': sleep_time } + return {"result": sleep_time} diff --git a/benchmarks/000.microbenchmarks/020.network-benchmark/input.py b/benchmarks/000.microbenchmarks/020.network-benchmark/input.py index 0d969bc74..8f43ffc5a 100644 --- a/benchmarks/000.microbenchmarks/020.network-benchmark/input.py +++ b/benchmarks/000.microbenchmarks/020.network-benchmark/input.py @@ -2,10 +2,12 @@ def buckets_count(): return 0, 1 -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): return { - 'bucket': { - 'bucket': benchmarks_bucket, - 'output': output_paths[0], + "bucket": { + "bucket": benchmarks_bucket, + "output": output_paths[0], }, } diff --git a/benchmarks/000.microbenchmarks/020.network-benchmark/python/function.py b/benchmarks/000.microbenchmarks/020.network-benchmark/python/function.py index eb8ccdcf2..58c376a2d 100644 --- a/benchmarks/000.microbenchmarks/020.network-benchmark/python/function.py +++ b/benchmarks/000.microbenchmarks/020.network-benchmark/python/function.py @@ -1,27 +1,26 @@ import csv -import json import os.path import socket from datetime import datetime -from time import sleep from . import storage + def handler(event): - request_id = event['request-id'] - address = event['server-address'] - port = event['server-port'] - repetitions = event['repetitions'] - output_bucket = event.get('bucket').get('bucket') - output_prefix = event.get('bucket').get('output') + request_id = event["request-id"] + address = event["server-address"] + port = event["server-port"] + repetitions = event["repetitions"] + output_bucket = event.get("bucket").get("bucket") + output_prefix = event.get("bucket").get("output") times = [] i = 0 socket.setdefaulttimeout(3) server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - server_socket.bind(('', 0)) - message = request_id.encode('utf-8') + server_socket.bind(("", 0)) + message = request_id.encode("utf-8") adr = (address, port) consecutive_failures = 0 while i < repetitions + 1: @@ -43,16 +42,16 @@ def handler(event): consecutive_failures = 0 server_socket.settimeout(2) server_socket.close() - + if consecutive_failures != 5: - with open('/tmp/data.csv', 'w', newline='') as csvfile: - writer = csv.writer(csvfile, delimiter=',') - writer.writerow(["id", "client_send", "client_rcv"]) + with open("/tmp/data.csv", "w", newline="") as csvfile: + writer = csv.writer(csvfile, delimiter=",") + writer.writerow(["id", "client_send", "client_rcv"]) for row in times: writer.writerow(row) - + client = storage.storage.get_instance() - filename = 'results-{}.csv'.format(request_id) - key = client.upload(output_bucket, os.path.join(output_prefix, filename), '/tmp/data.csv') + filename = "results-{}.csv".format(request_id) + key = client.upload(output_bucket, os.path.join(output_prefix, filename), "/tmp/data.csv") - return { 'result': key } + return {"result": key} diff --git a/benchmarks/000.microbenchmarks/030.clock-synchronization/input.py b/benchmarks/000.microbenchmarks/030.clock-synchronization/input.py index 427215380..8f43ffc5a 100644 --- a/benchmarks/000.microbenchmarks/030.clock-synchronization/input.py +++ b/benchmarks/000.microbenchmarks/030.clock-synchronization/input.py @@ -1,12 +1,13 @@ - - def buckets_count(): return 0, 1 -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): return { - 'bucket': { - 'bucket': benchmarks_bucket, - 'output': output_paths[0], + "bucket": { + "bucket": benchmarks_bucket, + "output": output_paths[0], }, } diff --git a/benchmarks/000.microbenchmarks/030.clock-synchronization/python/function.py b/benchmarks/000.microbenchmarks/030.clock-synchronization/python/function.py index 9ffd978ae..9cf93eccf 100644 --- a/benchmarks/000.microbenchmarks/030.clock-synchronization/python/function.py +++ b/benchmarks/000.microbenchmarks/030.clock-synchronization/python/function.py @@ -1,28 +1,27 @@ import csv -import json import os import socket from datetime import datetime -from time import sleep from . import storage + def handler(event): - request_id = event['request-id'] - address = event['server-address'] - port = event['server-port'] - repetitions = event['repetitions'] - output_bucket = event.get('bucket').get('bucket') - output_prefix = event.get('bucket').get('output') + request_id = event["request-id"] + address = event["server-address"] + port = event["server-port"] + repetitions = event["repetitions"] + output_bucket = event.get("bucket").get("bucket") + output_prefix = event.get("bucket").get("output") times = [] print("Starting communication with {}:{}".format(address, port)) i = 0 socket.setdefaulttimeout(4) server_socket = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - server_socket.bind(('', 0)) - message = request_id.encode('utf-8') + server_socket.bind(("", 0)) + message = request_id.encode("utf-8") adr = (address, port) consecutive_failures = 0 measurements_not_smaller = 0 @@ -43,11 +42,13 @@ def handler(event): if i > 0: times.append([i, send_begin, recv_end]) cur_time = recv_end - send_begin - print("Time {} Min Time {} NotSmaller {}".format(cur_time, cur_min, measurements_not_smaller)) + print( + "Time {} Min Time {} NotSmaller {}".format(cur_time, cur_min, measurements_not_smaller) + ) if cur_time > cur_min and cur_min > 0: measurements_not_smaller += 1 if measurements_not_smaller == repetitions: - message = "stop".encode('utf-8') + message = "stop".encode("utf-8") server_socket.sendto(message, adr) break else: @@ -57,18 +58,18 @@ def handler(event): consecutive_failures = 0 server_socket.settimeout(4) server_socket.close() - + if consecutive_failures != 5: - with open('/tmp/data.csv', 'w', newline='') as csvfile: - writer = csv.writer(csvfile, delimiter=',') - writer.writerow(["id", "client_send", "client_rcv"]) + with open("/tmp/data.csv", "w", newline="") as csvfile: + writer = csv.writer(csvfile, delimiter=",") + writer.writerow(["id", "client_send", "client_rcv"]) for row in times: writer.writerow(row) - + client = storage.storage.get_instance() - filename = 'results-{}.csv'.format(request_id) - key = client.upload(output_bucket, os.path.join(output_prefix, filename), '/tmp/data.csv') + filename = "results-{}.csv".format(request_id) + key = client.upload(output_bucket, os.path.join(output_prefix, filename), "/tmp/data.csv") else: key = None - return { 'result': {'bucket-key': key, 'timestamp': event['income-timestamp']} } + return {"result": {"bucket-key": key, "timestamp": event["income-timestamp"]}} diff --git a/benchmarks/000.microbenchmarks/040.server-reply/input.py b/benchmarks/000.microbenchmarks/040.server-reply/input.py index 041d2ba7f..af0427a6c 100644 --- a/benchmarks/000.microbenchmarks/040.server-reply/input.py +++ b/benchmarks/000.microbenchmarks/040.server-reply/input.py @@ -1,12 +1,11 @@ +size_generators = {"test": 1, "small": 100, "large": 1000} -size_generators = { - 'test' : 1, - 'small' : 100, - 'large': 1000 -} def buckets_count(): return (0, 0) -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - return { 'sleep': size_generators[size] } + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"sleep": size_generators[size]} diff --git a/benchmarks/000.microbenchmarks/040.server-reply/python/function.py b/benchmarks/000.microbenchmarks/040.server-reply/python/function.py index fb5b57aa3..4c2a294ba 100644 --- a/benchmarks/000.microbenchmarks/040.server-reply/python/function.py +++ b/benchmarks/000.microbenchmarks/040.server-reply/python/function.py @@ -1,11 +1,10 @@ - import socket -from time import sleep + def handler(event): # start timing - addr = (event.get('ip-address'), event.get('port')) + addr = (event.get("ip-address"), event.get("port")) socket.setdefaulttimeout(20) s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) s.connect(addr) diff --git a/benchmarks/000.microbenchmarks/050.gpu-cache-latency/config.json b/benchmarks/000.microbenchmarks/050.gpu-cache-latency/config.json new file mode 100644 index 000000000..0ec4c3353 --- /dev/null +++ b/benchmarks/000.microbenchmarks/050.gpu-cache-latency/config.json @@ -0,0 +1,12 @@ +{ + "timeout": 300, + "memory": 1024, + "languages": ["python"], + "modules": [], + "name": "050.gpu-cache-latency", + "runtime": "python3", + "handler": "python/function.handler", + "dockerfile": "Dockerfile", + "data_dir": "../../benchmarks-data/050.gpu-cache-latency", + "datasets": ["tiny", "small", "large"] +} diff --git a/benchmarks/000.microbenchmarks/050.gpu-cache-latency/input.py b/benchmarks/000.microbenchmarks/050.gpu-cache-latency/input.py new file mode 100644 index 000000000..dcbe70729 --- /dev/null +++ b/benchmarks/000.microbenchmarks/050.gpu-cache-latency/input.py @@ -0,0 +1,30 @@ +# benchmarks/000.microbenchmarks/050.gpu-cache-latency/input.py + +# You can tune these as you like later +size_generators = { + "test": {"working_set_bytes": 1 << 16, "iterations": 10_000}, + "small": {"working_set_bytes": 1 << 20, "iterations": 100_000}, + "large": {"working_set_bytes": 1 << 24, "iterations": 1_000_000}, +} + +def generate_input( + data_dir, # path to benchmark data dir (unused here) + size, # "test" | "small" | "large" + benchmarks_bucket, # storage bucket (unused locally) + input_paths, # list of input paths (unused here) + output_paths, # list of output paths (unused here) + upload_func, # function to upload data (unused here) + nosql_func # function to access NoSQL (unused here) +): + """ + SeBS calls this to get the JSON-like dict that becomes event['input'] + for the function. + """ + cfg = size_generators[size] + + return { + "working_set_bytes": cfg["working_set_bytes"], + "pattern": "random", # or "sequential", "stride_4", etc. + "iterations": cfg["iterations"], + "seed": 42 + } diff --git a/benchmarks/000.microbenchmarks/050.gpu-cache-latency/python/function.py b/benchmarks/000.microbenchmarks/050.gpu-cache-latency/python/function.py new file mode 100644 index 000000000..369644a99 --- /dev/null +++ b/benchmarks/000.microbenchmarks/050.gpu-cache-latency/python/function.py @@ -0,0 +1,115 @@ +# benchmarks/000.microbenchmarks/050.gpu-cache-latency/python/function.py + +import time +import math +import torch + + +def build_next_indices(n: int, pattern: str, device: torch.device, seed: int = 42): + """ + Build the 'next' array with the given pattern, similar to your C++ version. + """ + if n <= 0: + n = 1 + + idx = torch.empty(n, dtype=torch.long) + + if pattern == "sequential": + idx = (torch.arange(n, dtype=torch.long) + 1) % n + elif pattern.startswith("stride_"): + stride = int(pattern.split("_", 1)[1]) + idx = (torch.arange(n, dtype=torch.long) + stride) % n + elif pattern == "random": + # deterministic permutation + g = torch.Generator() + g.manual_seed(seed) + perm = torch.randperm(n, generator=g) + idx[perm] = perm.roll(-1) + else: + raise ValueError(f"Unknown pattern '{pattern}'") + + return idx.to(device) + + +def pointer_chase(working_set_bytes: int, pattern: str, iterations: int, seed: int = 42): + """ + Pointer-chase microbenchmark, implemented in PyTorch. + Uses GPU if available; otherwise falls back to CPU. + """ + + # Number of ints in the working set + n = max(1, working_set_bytes // 4) + + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + + next_idx = build_next_indices(n, pattern, device, seed=seed) + + cur = torch.tensor(0, dtype=torch.long, device=device) + acc = torch.tensor(0, dtype=torch.long, device=device) + + # Warmup (like your C++ version) + warmup_iters = min(iterations, 1024) + for _ in range(warmup_iters): + cur = next_idx[cur] + acc = acc + cur + + # Measure time + if device.type == "cuda": + torch.cuda.synchronize() + start_event = torch.cuda.Event(enable_timing=True) + end_event = torch.cuda.Event(enable_timing=True) + + start_event.record() + for _ in range(iterations): + cur = next_idx[cur] + acc = acc + cur + end_event.record() + torch.cuda.synchronize() + + elapsed_ms = start_event.elapsed_time(end_event) # ms + total_seconds = elapsed_ms / 1000.0 + else: + start_time = time.perf_counter() + for _ in range(iterations): + cur = next_idx[cur] + acc = acc + cur + total_seconds = time.perf_counter() - start_time + + avg_ns = (total_seconds * 1e9 / iterations) if iterations > 0 else 0.0 + + return { + "working_set_bytes": int(working_set_bytes), + "pattern": pattern, + "iterations": int(iterations), + "device": str(device), + "total_seconds": total_seconds, + "avg_ns_per_step": avg_ns, + "sink": int(acc.item()), + } + + +def handler(event, context=None): + """ + Entry point for SeBS. + + For Python benchmarks, SeBS passes: + event = { + "input": { ...whatever generate_input returned... }, + ... + } + We must return: { "result": } + """ + + params = event.get("input", {}) + + working_set_bytes = int(params.get("working_set_bytes", 1 << 20)) + pattern = params.get("pattern", "random") + iterations = int(params.get("iterations", 100_000)) + seed = int(params.get("seed", 42)) + + result = pointer_chase(working_set_bytes, pattern, iterations, seed=seed) + + # SeBS expects this shape + return { + "result": result + } diff --git a/benchmarks/000.microbenchmarks/050.gpu-cache-latency/python/requirements.txt b/benchmarks/000.microbenchmarks/050.gpu-cache-latency/python/requirements.txt new file mode 100644 index 000000000..f9b2aa91e --- /dev/null +++ b/benchmarks/000.microbenchmarks/050.gpu-cache-latency/python/requirements.txt @@ -0,0 +1,2 @@ +torch +numpy \ No newline at end of file diff --git a/benchmarks/100.webapps/110.dynamic-html/input.py b/benchmarks/100.webapps/110.dynamic-html/input.py index 98dac88b2..c20154ec3 100644 --- a/benchmarks/100.webapps/110.dynamic-html/input.py +++ b/benchmarks/100.webapps/110.dynamic-html/input.py @@ -1,11 +1,9 @@ +size_generators = {"test": 10, "small": 1000, "large": 100000} -size_generators = { - 'test' : 10, - 'small' : 1000, - 'large': 100000 -} -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - input_config = {'username': 'testname'} - input_config['random_len'] = size_generators[size] +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + input_config = {"username": "testname"} + input_config["random_len"] = size_generators[size] return input_config diff --git a/benchmarks/100.webapps/110.dynamic-html/python/function.py b/benchmarks/100.webapps/110.dynamic-html/python/function.py index 7c990f4eb..6f7b42bc5 100644 --- a/benchmarks/100.webapps/110.dynamic-html/python/function.py +++ b/benchmarks/100.webapps/110.dynamic-html/python/function.py @@ -1,22 +1,21 @@ -from datetime import datetime -from random import sample +from datetime import datetime +from random import sample from os import path -from time import time -import os from jinja2 import Template SCRIPT_DIR = path.abspath(path.join(path.dirname(__file__))) + def handler(event): # start timing - name = event.get('username') - size = event.get('random_len') + name = event.get("username") + size = event.get("random_len") cur_time = datetime.now() random_numbers = sample(range(0, 1000000), size) - template = Template( open(path.join(SCRIPT_DIR, 'templates', 'template.html'), 'r').read()) - html = template.render(username = name, cur_time = cur_time, random_numbers = random_numbers) + template = Template(open(path.join(SCRIPT_DIR, "templates", "template.html"), "r").read()) + html = template.render(username=name, cur_time=cur_time, random_numbers=random_numbers) # end timing - # dump stats - return {'result': html} + # dump stats + return {"result": html} diff --git a/benchmarks/100.webapps/120.uploader/input.py b/benchmarks/100.webapps/120.uploader/input.py index ce6169ccb..7aafb2b22 100644 --- a/benchmarks/100.webapps/120.uploader/input.py +++ b/benchmarks/100.webapps/120.uploader/input.py @@ -1,19 +1,25 @@ - url_generators = { # source: mlperf fake_imagenet.sh. 230 kB - 'test' : 'https://upload.wikimedia.org/wikipedia/commons/thumb/e/e7/Jammlich_crop.jpg/800px-Jammlich_crop.jpg', + "test": ( + "https://upload.wikimedia.org/wikipedia/commons/thumb/e/e7/" + "Jammlich_crop.jpg/800px-Jammlich_crop.jpg" + ), # video: HPX source code, 6.7 MB - 'small': 'https://github.com/STEllAR-GROUP/hpx/archive/refs/tags/1.4.0.zip', + "small": "https://github.com/STEllAR-GROUP/hpx/archive/refs/tags/1.4.0.zip", # resnet model from pytorch. 98M - 'large': 'https://download.pytorch.org/models/resnet50-19c8e357.pth' + "large": "https://download.pytorch.org/models/resnet50-19c8e357.pth", } + def buckets_count(): return (0, 1) -def generate_input(data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func): - input_config = {'object': {}, 'bucket': {}} - input_config['object']['url'] = url_generators[size] - input_config['bucket']['bucket'] = benchmarks_bucket - input_config['bucket']['output'] = output_buckets[0] + +def generate_input( + data_dir, size, benchmarks_bucket, input_buckets, output_buckets, upload_func, nosql_func +): + input_config = {"object": {}, "bucket": {}} + input_config["object"]["url"] = url_generators[size] + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["output"] = output_buckets[0] return input_config diff --git a/benchmarks/100.webapps/120.uploader/python/function.py b/benchmarks/100.webapps/120.uploader/python/function.py index d032bbdb6..cb17131f1 100755 --- a/benchmarks/100.webapps/120.uploader/python/function.py +++ b/benchmarks/100.webapps/120.uploader/python/function.py @@ -1,26 +1,29 @@ - import datetime import os import urllib.request from . import storage + client = storage.storage.get_instance() -SEBS_USER_AGENT = "SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2" +SEBS_USER_AGENT = ( + "SeBS/1.2 (https://github.com/spcl/serverless-benchmarks) SeBS Benchmark Suite/1.2" +) + def handler(event): - bucket = event.get('bucket').get('bucket') - output_prefix = event.get('bucket').get('output') - url = event.get('object').get('url') + bucket = event.get("bucket").get("bucket") + output_prefix = event.get("bucket").get("output") + url = event.get("object").get("url") name = os.path.basename(url) - download_path = '/tmp/{}'.format(name) + download_path = "/tmp/{}".format(name) process_begin = datetime.datetime.now() req = urllib.request.Request(url) - req.add_header('User-Agent', SEBS_USER_AGENT) - with open(download_path, 'wb') as f: + req.add_header("User-Agent", SEBS_USER_AGENT) + with open(download_path, "wb") as f: with urllib.request.urlopen(req) as response: f.write(response.read()) size = os.path.getsize(download_path) @@ -33,16 +36,12 @@ def handler(event): process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) return { - 'result': { - 'bucket': bucket, - 'url': url, - 'key': key_name - }, - 'measurement': { - 'download_time': 0, - 'download_size': 0, - 'upload_time': upload_time, - 'upload_size': size, - 'compute_time': process_time - } + "result": {"bucket": bucket, "url": url, "key": key_name}, + "measurement": { + "download_time": 0, + "download_size": 0, + "upload_time": upload_time, + "upload_size": size, + "compute_time": process_time, + }, } diff --git a/benchmarks/200.multimedia/210.thumbnailer/input.py b/benchmarks/200.multimedia/210.thumbnailer/input.py index 8943effed..6f04bfafb 100644 --- a/benchmarks/200.multimedia/210.thumbnailer/input.py +++ b/benchmarks/200.multimedia/210.thumbnailer/input.py @@ -1,9 +1,12 @@ -import glob, os +import glob +import os + def buckets_count(): return (1, 1) -''' + +""" Generate test, small and large workload for thumbnailer. :param data_dir: directory where benchmark data is placed @@ -11,19 +14,23 @@ def buckets_count(): :param input_buckets: input storage containers for this benchmark :param output_buckets: :param upload_func: upload function taking three params(bucket_idx, key, filepath) -''' -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): +""" + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): - for file in glob.glob(os.path.join(data_dir, '*.jpg')): + for file in glob.glob(os.path.join(data_dir, "*.jpg")): img = os.path.relpath(file, data_dir) upload_func(0, img, file) - #TODO: multiple datasets - input_config = {'object': {}, 'bucket': {}} - input_config['object']['key'] = img - input_config['object']['width'] = 200 - input_config['object']['height'] = 200 - input_config['bucket']['bucket'] = benchmarks_bucket - input_config['bucket']['input'] = input_paths[0] - input_config['bucket']['output'] = output_paths[0] + # TODO: multiple datasets + input_config = {"object": {}, "bucket": {}} + input_config["object"]["key"] = img + input_config["object"]["width"] = 200 + input_config["object"]["height"] = 200 + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["input"] = input_paths[0] + input_config["bucket"]["output"] = output_paths[0] return input_config diff --git a/benchmarks/200.multimedia/210.thumbnailer/python/function.py b/benchmarks/200.multimedia/210.thumbnailer/python/function.py index 20527067b..2df0a7bfb 100755 --- a/benchmarks/200.multimedia/210.thumbnailer/python/function.py +++ b/benchmarks/200.multimedia/210.thumbnailer/python/function.py @@ -1,44 +1,45 @@ import datetime import io import os -import sys -import uuid from urllib.parse import unquote_plus from PIL import Image from . import storage + client = storage.storage.get_instance() # Disk-based solution -#def resize_image(image_path, resized_path, w, h): +# def resize_image(image_path, resized_path, w, h): # with Image.open(image_path) as image: # image.thumbnail((w,h)) # image.save(resized_path) + # Memory-based solution def resize_image(image_bytes, w, h): with Image.open(io.BytesIO(image_bytes)) as image: - image.thumbnail((w,h)) + image.thumbnail((w, h)) out = io.BytesIO() - image.save(out, format='jpeg') + image.save(out, format="jpeg") # necessary to rewind to the beginning of the buffer out.seek(0) return out + def handler(event): - - bucket = event.get('bucket').get('bucket') - input_prefix = event.get('bucket').get('input') - output_prefix = event.get('bucket').get('output') - key = unquote_plus(event.get('object').get('key')) - width = event.get('object').get('width') - height = event.get('object').get('height') + + bucket = event.get("bucket").get("bucket") + input_prefix = event.get("bucket").get("input") + output_prefix = event.get("bucket").get("output") + key = unquote_plus(event.get("object").get("key")) + width = event.get("object").get("width") + height = event.get("object").get("height") # UUID to handle multiple calls - #download_path = '/tmp/{}-{}'.format(uuid.uuid4(), key) - #upload_path = '/tmp/resized-{}'.format(key) - #client.download(input_bucket, key, download_path) - #resize_image(download_path, upload_path, width, height) - #client.upload(output_bucket, key, upload_path) + # download_path = '/tmp/{}-{}'.format(uuid.uuid4(), key) + # upload_path = '/tmp/resized-{}'.format(key) + # client.download(input_bucket, key, download_path) + # resize_image(download_path, upload_path, width, height) + # client.upload(output_bucket, key, upload_path) download_begin = datetime.datetime.now() img = client.download_stream(bucket, os.path.join(input_prefix, key)) download_end = datetime.datetime.now() @@ -56,15 +57,12 @@ def handler(event): upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': { - 'bucket': bucket, - 'key': key_name - }, - 'measurement': { - 'download_time': download_time, - 'download_size': len(img), - 'upload_time': upload_time, - 'upload_size': resized_size, - 'compute_time': process_time - } + "result": {"bucket": bucket, "key": key_name}, + "measurement": { + "download_time": download_time, + "download_size": len(img), + "upload_time": upload_time, + "upload_size": resized_size, + "compute_time": process_time, + }, } diff --git a/benchmarks/200.multimedia/220.video-processing/input.py b/benchmarks/200.multimedia/220.video-processing/input.py index 6da31647f..86c7191cb 100644 --- a/benchmarks/200.multimedia/220.video-processing/input.py +++ b/benchmarks/200.multimedia/220.video-processing/input.py @@ -1,9 +1,12 @@ -import glob, os +import glob +import os + def buckets_count(): return (1, 1) -''' + +""" Generate test, small and large workload for thumbnailer. :param data_dir: directory where benchmark data is placed @@ -11,17 +14,21 @@ def buckets_count(): :param input_buckets: input storage containers for this benchmark :param output_buckets: :param upload_func: upload function taking three params(bucket_idx, key, filepath) -''' -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - for file in glob.glob(os.path.join(data_dir, '*.mp4')): +""" + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + for file in glob.glob(os.path.join(data_dir, "*.mp4")): img = os.path.relpath(file, data_dir) upload_func(0, img, file) - #TODO: multiple datasets - input_config = {'object': {}, 'bucket': {}} - input_config['object']['key'] = img - input_config['object']['op'] = 'watermark' - input_config['object']['duration'] = 1 - input_config['bucket']['bucket'] = benchmarks_bucket - input_config['bucket']['input'] = input_paths[0] - input_config['bucket']['output'] = output_paths[0] + # TODO: multiple datasets + input_config = {"object": {}, "bucket": {}} + input_config["object"]["key"] = img + input_config["object"]["op"] = "watermark" + input_config["object"]["duration"] = 1 + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["input"] = input_paths[0] + input_config["bucket"]["output"] = output_paths[0] return input_config diff --git a/benchmarks/200.multimedia/220.video-processing/python/function.py b/benchmarks/200.multimedia/220.video-processing/python/function.py index 9f8a869aa..ab132ba2e 100755 --- a/benchmarks/200.multimedia/220.video-processing/python/function.py +++ b/benchmarks/200.multimedia/220.video-processing/python/function.py @@ -7,62 +7,84 @@ from . import storage + client = storage.storage.get_instance() SCRIPT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__))) + def call_ffmpeg(args): - ret = subprocess.run([os.path.join(SCRIPT_DIR, 'ffmpeg', 'ffmpeg'), '-y'] + args, - #subprocess might inherit Lambda's input for some reason - stdin=subprocess.DEVNULL, - stdout=subprocess.PIPE, stderr=subprocess.STDOUT + ret = subprocess.run( + [os.path.join(SCRIPT_DIR, "ffmpeg", "ffmpeg"), "-y"] + args, + # subprocess might inherit Lambda's input for some reason + stdin=subprocess.DEVNULL, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, ) if ret.returncode != 0: - print('Invocation of ffmpeg failed!') - print('Out: ', ret.stdout.decode('utf-8')) + print("Invocation of ffmpeg failed!") + print("Out: ", ret.stdout.decode("utf-8")) raise RuntimeError() + # https://superuser.com/questions/556029/how-do-i-convert-a-video-to-gif-using-ffmpeg-with-reasonable-quality def to_gif(video, duration, event): - output = '/tmp/processed-{}.gif'.format(os.path.basename(video)) - call_ffmpeg(["-i", video, - "-t", - "{0}".format(duration), - "-vf", - "fps=10,scale=320:-1:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse", - "-loop", "0", - output]) + output = "/tmp/processed-{}.gif".format(os.path.basename(video)) + call_ffmpeg( + [ + "-i", + video, + "-t", + "{0}".format(duration), + "-vf", + "fps=10,scale=320:-1:flags=lanczos,split[s0][s1];[s0]palettegen[p];[s1][p]paletteuse", + "-loop", + "0", + output, + ] + ) return output + # https://devopstar.com/2019/01/28/serverless-watermark-using-aws-lambda-layers-ffmpeg/ def watermark(video, duration, event): - output = '/tmp/processed-{}'.format(os.path.basename(video)) + output = "/tmp/processed-{}".format(os.path.basename(video)) watermark_file = os.path.dirname(os.path.realpath(__file__)) - call_ffmpeg([ - "-i", video, - "-i", os.path.join(watermark_file, os.path.join('resources', 'watermark.png')), - "-t", "{0}".format(duration), - "-filter_complex", "overlay=main_w/2-overlay_w/2:main_h/2-overlay_h/2", - output]) + call_ffmpeg( + [ + "-i", + video, + "-i", + os.path.join(watermark_file, os.path.join("resources", "watermark.png")), + "-t", + "{0}".format(duration), + "-filter_complex", + "overlay=main_w/2-overlay_w/2:main_h/2-overlay_h/2", + output, + ] + ) return output + def transcode_mp3(video, duration, event): pass -operations = { 'transcode' : transcode_mp3, 'extract-gif' : to_gif, 'watermark' : watermark } + +operations = {"transcode": transcode_mp3, "extract-gif": to_gif, "watermark": watermark} + def handler(event): - bucket = event.get('bucket').get('bucket') - input_prefix = event.get('bucket').get('input') - output_prefix = event.get('bucket').get('output') - key = event.get('object').get('key') - duration = event.get('object').get('duration') - op = event.get('object').get('op') - download_path = '/tmp/{}'.format(key) + bucket = event.get("bucket").get("bucket") + input_prefix = event.get("bucket").get("input") + output_prefix = event.get("bucket").get("output") + key = event.get("object").get("key") + duration = event.get("object").get("duration") + op = event.get("object").get("op") + download_path = "/tmp/{}".format(key) # Restore executable permission - ffmpeg_binary = os.path.join(SCRIPT_DIR, 'ffmpeg', 'ffmpeg') + ffmpeg_binary = os.path.join(SCRIPT_DIR, "ffmpeg", "ffmpeg") # needed on Azure but read-only filesystem on AWS try: st = os.stat(ffmpeg_binary) @@ -89,16 +111,12 @@ def handler(event): upload_time = (upload_stop - upload_begin) / datetime.timedelta(microseconds=1) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': { - 'bucket': bucket, - 'key': upload_key - }, - 'measurement': { - 'download_time': download_time, - 'download_size': download_size, - 'upload_time': upload_time, - 'upload_size': upload_size, - 'compute_time': process_time - } - } - + "result": {"bucket": bucket, "key": upload_key}, + "measurement": { + "download_time": download_time, + "download_size": download_size, + "upload_time": upload_time, + "upload_size": upload_size, + "compute_time": process_time, + }, + } diff --git a/benchmarks/300.utilities/311.compression/input.py b/benchmarks/300.utilities/311.compression/input.py index 5f88bc91a..e9e706bd5 100644 --- a/benchmarks/300.utilities/311.compression/input.py +++ b/benchmarks/300.utilities/311.compression/input.py @@ -1,4 +1,5 @@ -import glob, os +import os + def buckets_count(): return (1, 1) @@ -9,11 +10,12 @@ def upload_files(data_root, data_dir, upload_func): for root, dirs, files in os.walk(data_dir): prefix = os.path.relpath(root, data_root) for file in files: - file_name = prefix + '/' + file + file_name = prefix + "/" + file filepath = os.path.join(root, file) upload_func(0, file_name, filepath) -''' + +""" Generate test, small and large workload for compression test. :param data_dir: directory where benchmark data is placed @@ -21,8 +23,12 @@ def upload_files(data_root, data_dir, upload_func): :param input_buckets: input storage containers for this benchmark :param output_buckets: :param upload_func: upload function taking three params(bucket_idx, key, filepath) -''' -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): +""" + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): # upload different datasets datasets = [] @@ -30,9 +36,9 @@ def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, datasets.append(dir) upload_files(data_dir, os.path.join(data_dir, dir), upload_func) - input_config = {'object': {}, 'bucket': {}} - input_config['object']['key'] = datasets[0] - input_config['bucket']['bucket'] = benchmarks_bucket - input_config['bucket']['input'] = input_paths[0] - input_config['bucket']['output'] = output_paths[0] + input_config = {"object": {}, "bucket": {}} + input_config["object"]["key"] = datasets[0] + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["input"] = input_paths[0] + input_config["bucket"]["output"] = output_paths[0] return input_config diff --git a/benchmarks/300.utilities/311.compression/python/function.py b/benchmarks/300.utilities/311.compression/python/function.py index f758e14e4..8ceb52d2f 100755 --- a/benchmarks/300.utilities/311.compression/python/function.py +++ b/benchmarks/300.utilities/311.compression/python/function.py @@ -1,13 +1,13 @@ import datetime -import io import os import shutil import uuid -import zlib from . import storage + client = storage.storage.get_instance() + def parse_directory(directory): size = 0 @@ -16,13 +16,14 @@ def parse_directory(directory): size += os.path.getsize(os.path.join(root, file)) return size + def handler(event): - - bucket = event.get('bucket').get('bucket') - input_prefix = event.get('bucket').get('input') - output_prefix = event.get('bucket').get('output') - key = event.get('object').get('key') - download_path = '/tmp/{}-{}'.format(key, uuid.uuid4()) + + bucket = event.get("bucket").get("bucket") + input_prefix = event.get("bucket").get("input") + output_prefix = event.get("bucket").get("output") + key = event.get("object").get("key") + download_path = "/tmp/{}-{}".format(key, uuid.uuid4()) os.makedirs(download_path) s3_download_begin = datetime.datetime.now() @@ -31,29 +32,27 @@ def handler(event): size = parse_directory(download_path) compress_begin = datetime.datetime.now() - shutil.make_archive(os.path.join(download_path, key), 'zip', root_dir=download_path) + shutil.make_archive(os.path.join(download_path, key), "zip", root_dir=download_path) compress_end = datetime.datetime.now() s3_upload_begin = datetime.datetime.now() - archive_name = '{}.zip'.format(key) + archive_name = "{}.zip".format(key) archive_size = os.path.getsize(os.path.join(download_path, archive_name)) - key_name = client.upload(bucket, os.path.join(output_prefix, archive_name), os.path.join(download_path, archive_name)) + key_name = client.upload( + bucket, os.path.join(output_prefix, archive_name), os.path.join(download_path, archive_name) + ) s3_upload_stop = datetime.datetime.now() download_time = (s3_download_stop - s3_download_begin) / datetime.timedelta(microseconds=1) upload_time = (s3_upload_stop - s3_upload_begin) / datetime.timedelta(microseconds=1) process_time = (compress_end - compress_begin) / datetime.timedelta(microseconds=1) return { - 'result': { - 'bucket': bucket, - 'key': key_name - }, - 'measurement': { - 'download_time': download_time, - 'download_size': size, - 'upload_time': upload_time, - 'upload_size': archive_size, - 'compute_time': process_time - } - } - + "result": {"bucket": bucket, "key": key_name}, + "measurement": { + "download_time": download_time, + "download_size": size, + "upload_time": upload_time, + "upload_size": archive_size, + "compute_time": process_time, + }, + } diff --git a/benchmarks/400.inference/411.image-recognition/input.py b/benchmarks/400.inference/411.image-recognition/input.py index 45d7215a6..c5ce190d0 100644 --- a/benchmarks/400.inference/411.image-recognition/input.py +++ b/benchmarks/400.inference/411.image-recognition/input.py @@ -1,18 +1,21 @@ -import glob, os +import os + def buckets_count(): return (2, 0) + def upload_files(data_root, data_dir, upload_func): for root, dirs, files in os.walk(data_dir): prefix = os.path.relpath(root, data_root) for file in files: - file_name = prefix + '/' + file + file_name = prefix + "/" + file filepath = os.path.join(root, file) upload_func(0, file_name, filepath) -''' + +""" Generate test, small and large workload for compression test. :param data_dir: directory where benchmark data is placed @@ -20,25 +23,29 @@ def upload_files(data_root, data_dir, upload_func): :param input_buckets: input storage containers for this benchmark :param output_buckets: :param upload_func: upload function taking three params(bucket_idx, key, filepath) -''' -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): +""" + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): # upload model - model_name = 'resnet50-19c8e357.pth' - upload_func(0, model_name, os.path.join(data_dir, 'model', model_name)) + model_name = "resnet50-19c8e357.pth" + upload_func(0, model_name, os.path.join(data_dir, "model", model_name)) input_images = [] - resnet_path = os.path.join(data_dir, 'fake-resnet') - with open(os.path.join(resnet_path, 'val_map.txt'), 'r') as f: + resnet_path = os.path.join(data_dir, "fake-resnet") + with open(os.path.join(resnet_path, "val_map.txt"), "r") as f: for line in f: img, img_class = line.split() input_images.append((img, img_class)) upload_func(1, img, os.path.join(resnet_path, img)) - - input_config = {'object': {}, 'bucket': {}} - input_config['object']['model'] = model_name - input_config['object']['input'] = input_images[0][0] - input_config['bucket']['bucket'] = benchmarks_bucket - input_config['bucket']['input'] = input_paths[1] - input_config['bucket']['model'] = input_paths[0] + + input_config = {"object": {}, "bucket": {}} + input_config["object"]["model"] = model_name + input_config["object"]["input"] = input_images[0][0] + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["input"] = input_paths[1] + input_config["bucket"]["model"] = input_paths[0] return input_config diff --git a/benchmarks/400.inference/411.image-recognition/python/function.py b/benchmarks/400.inference/411.image-recognition/python/function.py index 411386419..0cfa1c57f 100644 --- a/benchmarks/400.inference/411.image-recognition/python/function.py +++ b/benchmarks/400.inference/411.image-recognition/python/function.py @@ -1,14 +1,20 @@ - -import datetime, json, os, uuid +import datetime +import json +import os +import uuid # Extract zipped torch model - used in Python 3.8 and 3.9 # The reason is that torch versions supported for these Python # versions are too large for Lambda packages. -if os.path.exists('function/torch.zip'): - import zipfile, sys +if os.path.exists("function/torch.zip"): + import sys + import zipfile + # we cannot write to the read-only filesystem - zipfile.ZipFile('function/torch.zip').extractall('/tmp/') - sys.path.append(os.path.join(os.path.dirname(__file__), '/tmp/.python_packages/lib/site-packages')) + zipfile.ZipFile("function/torch.zip").extractall("/tmp/") + sys.path.append( + os.path.join(os.path.dirname(__file__), "/tmp/.python_packages/lib/site-packages") + ) from PIL import Image import torch @@ -16,21 +22,23 @@ from torchvision.models import resnet50 from . import storage + client = storage.storage.get_instance() SCRIPT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__))) -class_idx = json.load(open(os.path.join(SCRIPT_DIR, "imagenet_class_index.json"), 'r')) +class_idx = json.load(open(os.path.join(SCRIPT_DIR, "imagenet_class_index.json"), "r")) idx2label = [class_idx[str(k)][1] for k in range(len(class_idx))] model = None + def handler(event): - - bucket = event.get('bucket').get('bucket') - input_prefix = event.get('bucket').get('input') - model_prefix = event.get('bucket').get('model') - key = event.get('object').get('input') - model_key = event.get('object').get('model') - download_path = '/tmp/{}-{}'.format(key, uuid.uuid4()) + + bucket = event.get("bucket").get("bucket") + input_prefix = event.get("bucket").get("input") + model_prefix = event.get("bucket").get("model") + key = event.get("object").get("input") + model_key = event.get("object").get("model") + download_path = "/tmp/{}-{}".format(key, uuid.uuid4()) image_download_begin = datetime.datetime.now() image_path = download_path @@ -40,7 +48,7 @@ def handler(event): global model if not model: model_download_begin = datetime.datetime.now() - model_path = os.path.join('/tmp', model_key) + model_path = os.path.join("/tmp", model_key) client.download(bucket, os.path.join(model_prefix, model_key), model_path) model_download_end = datetime.datetime.now() model_process_begin = datetime.datetime.now() @@ -53,36 +61,38 @@ def handler(event): model_download_end = model_download_begin model_process_begin = datetime.datetime.now() model_process_end = model_process_begin - + process_begin = datetime.datetime.now() input_image = Image.open(image_path) - preprocess = transforms.Compose([ - transforms.Resize(256), - transforms.CenterCrop(224), - transforms.ToTensor(), - transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), - ]) + preprocess = transforms.Compose( + [ + transforms.Resize(256), + transforms.CenterCrop(224), + transforms.ToTensor(), + transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + ] + ) input_tensor = preprocess(input_image) - input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model + input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model output = model(input_batch) _, index = torch.max(output, 1) - # The output has unnormalized scores. To get probabilities, you can run a softmax on it. - prob = torch.nn.functional.softmax(output[0], dim=0) - _, indices = torch.sort(output, descending = True) ret = idx2label[index] process_end = datetime.datetime.now() - download_time = (image_download_end- image_download_begin) / datetime.timedelta(microseconds=1) - model_download_time = (model_download_end - model_download_begin) / datetime.timedelta(microseconds=1) - model_process_time = (model_process_end - model_process_begin) / datetime.timedelta(microseconds=1) + download_time = (image_download_end - image_download_begin) / datetime.timedelta(microseconds=1) + model_download_time = (model_download_end - model_download_begin) / datetime.timedelta( + microseconds=1 + ) + model_process_time = (model_process_end - model_process_begin) / datetime.timedelta( + microseconds=1 + ) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': {'idx': index.item(), 'class': ret}, - 'measurement': { - 'download_time': download_time + model_download_time, - 'compute_time': process_time + model_process_time, - 'model_time': model_process_time, - 'model_download_time': model_download_time - } - } - + "result": {"idx": index.item(), "class": ret}, + "measurement": { + "download_time": download_time + model_download_time, + "compute_time": process_time + model_process_time, + "model_time": model_process_time, + "model_download_time": model_download_time, + }, + } diff --git a/benchmarks/400.inference/412.language-bert/config.json b/benchmarks/400.inference/412.language-bert/config.json new file mode 100644 index 000000000..94ede7925 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 512, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/400.inference/412.language-bert/input.py b/benchmarks/400.inference/412.language-bert/input.py new file mode 100644 index 000000000..9af7ecb56 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/input.py @@ -0,0 +1,33 @@ +import os + + +def buckets_count(): + # model bucket and text bucket + return (2, 0) + + +def upload_files(data_root, data_dir, upload_func): + for root, _, files in os.walk(data_dir): + prefix = os.path.relpath(root, data_root) + for file in files: + filepath = os.path.join(root, file) + relative_key = os.path.join(prefix, file) + upload_func(0, relative_key, filepath) + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + model_archive = "bert-tiny-onnx.tar.gz" + upload_func(0, model_archive, os.path.join(data_dir, "model", model_archive)) + + text_filename = "sentences.jsonl" + upload_func(1, text_filename, os.path.join(data_dir, "text", text_filename)) + + input_config = {"object": {}, "bucket": {}} + input_config["object"]["model"] = model_archive + input_config["object"]["input"] = text_filename + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["model"] = input_paths[0] + input_config["bucket"]["text"] = input_paths[1] + return input_config diff --git a/benchmarks/400.inference/412.language-bert/python/function.py b/benchmarks/400.inference/412.language-bert/python/function.py new file mode 100644 index 000000000..7e4f981ef --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/function.py @@ -0,0 +1,157 @@ +import datetime +import json +import os +import tarfile +import uuid +from typing import Dict, List, Optional + +import numpy as np +import onnxruntime as ort +from tokenizers import Tokenizer + +from . import storage + +client = storage.storage.get_instance() + +MODEL_ARCHIVE = "bert-tiny-onnx.tar.gz" +MODEL_DIRECTORY = "/tmp/bert_language_model" +MODEL_SUBDIR = "bert-tiny-onnx" + +_session: Optional[ort.InferenceSession] = None +_tokenizer: Optional[Tokenizer] = None +_labels: Optional[Dict[int, str]] = None + + +def _ensure_model(bucket: str, model_prefix: str): + """ + Lazily download and initialize the ONNX model and tokenizer. + """ + global _session, _tokenizer, _labels + + model_path = os.path.join(MODEL_DIRECTORY, MODEL_SUBDIR) + model_download_begin = datetime.datetime.now() + model_download_end = model_download_begin + + if _session is None or _tokenizer is None or _labels is None: + if not os.path.exists(model_path): + os.makedirs(MODEL_DIRECTORY, exist_ok=True) + archive_path = os.path.join("/tmp", f"{uuid.uuid4()}-{MODEL_ARCHIVE}") + client.download(bucket, os.path.join(model_prefix, MODEL_ARCHIVE), archive_path) + model_download_end = datetime.datetime.now() + + with tarfile.open(archive_path, "r:gz") as tar: + tar.extractall(MODEL_DIRECTORY) + os.remove(archive_path) + else: + model_download_begin = datetime.datetime.now() + model_download_end = model_download_begin + + model_process_begin = datetime.datetime.now() + tokenizer_path = os.path.join(model_path, "tokenizer.json") + _tokenizer = Tokenizer.from_file(tokenizer_path) + _tokenizer.enable_truncation(max_length=128) + _tokenizer.enable_padding(length=128) + + label_map_path = os.path.join(model_path, "label_map.json") + with open(label_map_path, "r") as f: + raw_labels = json.load(f) + _labels = {int(idx): label for idx, label in raw_labels.items()} + + onnx_path = os.path.join(model_path, "model.onnx") + + available = ort.get_available_providers() + if "CUDAExecutionProvider" not in available: + raise RuntimeError(f"CUDAExecutionProvider unavailable (have: {available})") + + _session = ort.InferenceSession(onnx_path, providers=["CUDAExecutionProvider"]) + model_process_end = datetime.datetime.now() + else: + model_process_begin = datetime.datetime.now() + model_process_end = model_process_begin + + model_download_time = (model_download_end - model_download_begin) / datetime.timedelta( + microseconds=1 + ) + model_process_time = (model_process_end - model_process_begin) / datetime.timedelta( + microseconds=1 + ) + + return model_download_time, model_process_time + + +def _prepare_inputs(sentences: List[str]): + assert _tokenizer is not None + + encodings = _tokenizer.encode_batch(sentences) + + input_ids = np.array([enc.ids for enc in encodings], dtype=np.int64) + attention_mask = np.array([enc.attention_mask for enc in encodings], dtype=np.int64) + token_type_ids = np.array( + [enc.type_ids if enc.type_ids else [0] * len(enc.ids) for enc in encodings], + dtype=np.int64, + ) + + return { + "input_ids": input_ids, + "attention_mask": attention_mask, + "token_type_ids": token_type_ids, + } + + +def _softmax(logits: np.ndarray) -> np.ndarray: + shifted = logits - np.max(logits, axis=1, keepdims=True) + exp = np.exp(shifted) + return exp / np.sum(exp, axis=1, keepdims=True) + + +def handler(event): + bucket = event.get("bucket", {}).get("bucket") + model_prefix = event.get("bucket", {}).get("model") + text_prefix = event.get("bucket", {}).get("text") + text_key = event.get("object", {}).get("input") + + download_begin = datetime.datetime.now() + text_download_path = os.path.join("/tmp", f"{uuid.uuid4()}-{os.path.basename(text_key)}") + client.download(bucket, os.path.join(text_prefix, text_key), text_download_path) + download_end = datetime.datetime.now() + + model_download_time, model_process_time = _ensure_model(bucket, model_prefix) + assert _session is not None and _labels is not None and _tokenizer is not None + + with open(text_download_path, "r") as f: + sentences = [json.loads(line)["text"] for line in f if line.strip()] + + os.remove(text_download_path) + + inference_begin = datetime.datetime.now() + inputs = _prepare_inputs(sentences) + outputs = _session.run(None, inputs) + logits = outputs[0] + probabilities = _softmax(logits) + inference_end = datetime.datetime.now() + + results = [] + for sentence, probs in zip(sentences, probabilities): + label_idx = int(np.argmax(probs)) + label = _labels.get(label_idx, str(label_idx)) + results.append( + { + "text": sentence, + "label": label, + "confidence": float(probs[label_idx]), + "raw_scores": probs.tolist(), + } + ) + + download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) + compute_time = (inference_end - inference_begin) / datetime.timedelta(microseconds=1) + + return { + "result": {"predictions": results}, + "measurement": { + "download_time": download_time + model_download_time, + "compute_time": compute_time + model_process_time, + "model_time": model_process_time, + "model_download_time": model_download_time, + }, + } diff --git a/benchmarks/400.inference/412.language-bert/python/init.sh b/benchmarks/400.inference/412.language-bert/python/init.sh new file mode 100755 index 000000000..160852abe --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/init.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +# No additional initialization required for the BERT inference benchmark. diff --git a/benchmarks/400.inference/412.language-bert/python/package.sh b/benchmarks/400.inference/412.language-bert/python/package.sh new file mode 100644 index 000000000..edb27ebe0 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/package.sh @@ -0,0 +1,35 @@ +# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo + +PACKAGE_DIR=$1 +echo "Original size $(du -sh $1 | cut -f1)" + +CUR_DIR=$(pwd) +cd $1 +# cleaning libs +rm -rf external +find . -type d -name "tests" -exec rm -rf {} + +find . -type d -name "test" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/torch/*" -exec rm -rf {} + + +# cleaning +# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip + +rm -r pip >/dev/null +rm -r pip-* >/dev/null +rm -r wheel >/dev/null +rm -r wheel-* >/dev/null +rm easy_install.py >/dev/null +find . -name \*.pyc -delete +cd ${CUR_DIR} +echo "Stripped size $(du -sh $1 | cut -f1)" + +TORCH_DIR=".python_packages/lib/site-packages/torch" +if [ -d "$1/${TORCH_DIR}" ]; then + cd $1 + zip -qr torch.zip ${TORCH_DIR} + rm -rf ${TORCH_DIR} + cd ${CUR_DIR} + echo "Torch-zipped size $(du -sh $1 | cut -f1)" +fi diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt b/benchmarks/400.inference/412.language-bert/python/requirements.txt new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.10 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.11 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.8 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 new file mode 100644 index 000000000..67a8c1e18 --- /dev/null +++ b/benchmarks/400.inference/412.language-bert/python/requirements.txt.3.9 @@ -0,0 +1,3 @@ +numpy==1.24.4 +onnxruntime-gpu==1.16.3 +tokenizers==0.13.3 diff --git a/benchmarks/400.inference/413.image-classification/config.json b/benchmarks/400.inference/413.image-classification/config.json new file mode 100644 index 000000000..94ede7925 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 512, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/400.inference/413.image-classification/input.py b/benchmarks/400.inference/413.image-classification/input.py new file mode 100644 index 000000000..99e8bc4b3 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/input.py @@ -0,0 +1,51 @@ +import os + + +def buckets_count(): + return (2, 0) + + +def upload_files(data_root, data_dir, upload_func): + + for root, dirs, files in os.walk(data_dir): + prefix = os.path.relpath(root, data_root) + for file in files: + file_name = prefix + "/" + file + filepath = os.path.join(root, file) + upload_func(0, file_name, filepath) + + +""" + Generate test, small and large workload for compression test. + + :param data_dir: directory where benchmark data is placed + :param size: workload size + :param input_buckets: input storage containers for this benchmark + :param output_buckets: + :param upload_func: upload function taking three params(bucket_idx, key, filepath) +""" + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + + # upload model + model_name = "resnet50.tar.gz" + upload_func(0, model_name, os.path.join(data_dir, "model", model_name)) + + input_images = [] + resnet_path = os.path.join(data_dir, "data") + with open(os.path.join(resnet_path, "val_map.txt"), "r") as f: + for line in f: + img, img_class = line.split() + input_images.append((img, img_class)) + upload_func(1, img, os.path.join(resnet_path, img)) + + input_config = {"object": {}, "bucket": {}} + input_config["object"]["model"] = model_name + input_config["object"]["input"] = input_images[0][0] + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["input"] = input_paths[1] + input_config["bucket"]["model"] = input_paths[0] + return input_config diff --git a/benchmarks/400.inference/413.image-classification/python/function.py b/benchmarks/400.inference/413.image-classification/python/function.py new file mode 100644 index 000000000..64795612d --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/function.py @@ -0,0 +1,178 @@ +import datetime +import json +import os +import shutil +import tarfile +import uuid +from typing import List, Optional, Tuple + +import numpy as np +import onnxruntime as ort +from PIL import Image + +from . import storage + +client = storage.storage.get_instance() + +SCRIPT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__))) +class_idx = json.load(open(os.path.join(SCRIPT_DIR, "imagenet_class_index.json"), "r")) +idx2label = [class_idx[str(k)][1] for k in range(len(class_idx))] + +MODEL_ARCHIVE = "resnet50.tar.gz" +MODEL_DIRECTORY = "/tmp/image_classification_model" +MODEL_SUBDIR = "resnet50" + +_session: Optional[ort.InferenceSession] = None +_session_input: Optional[str] = None +_session_output: Optional[str] = None +_cached_model_key: Optional[str] = None + +_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32) +_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32) + + +def _ensure_model(bucket: str, model_prefix: str, model_key: str) -> Tuple[float, float]: + """ + Lazily download, extract, and initialize the ONNX ResNet model. + """ + global _session, _session_input, _session_output, _cached_model_key + + effective_model_key = model_key or MODEL_ARCHIVE + model_download_begin = datetime.datetime.now() + model_download_end = model_download_begin + + if _session is None or _cached_model_key != effective_model_key: + archive_basename = os.path.basename(effective_model_key) + archive_path = os.path.join("/tmp", f"{uuid.uuid4()}-{archive_basename}") + model_dir = os.path.join(MODEL_DIRECTORY, MODEL_SUBDIR) + + if os.path.exists(model_dir): + shutil.rmtree(model_dir) + os.makedirs(MODEL_DIRECTORY, exist_ok=True) + + client.download(bucket, os.path.join(model_prefix, effective_model_key), archive_path) + model_download_end = datetime.datetime.now() + + with tarfile.open(archive_path, "r:gz") as tar: + tar.extractall(MODEL_DIRECTORY) + os.remove(archive_path) + + model_process_begin = datetime.datetime.now() + onnx_path = os.path.join(model_dir, "model.onnx") + if not os.path.exists(onnx_path): + raise FileNotFoundError(f"Expected ONNX model at {onnx_path}") + + available = ort.get_available_providers() + if "CUDAExecutionProvider" not in available: + raise RuntimeError(f"CUDAExecutionProvider unavailable (providers: {available})") + + _session = ort.InferenceSession(onnx_path, providers=["CUDAExecutionProvider"]) + _session_input = _session.get_inputs()[0].name + _session_output = _session.get_outputs()[0].name + _cached_model_key = effective_model_key + model_process_end = datetime.datetime.now() + else: + model_process_begin = datetime.datetime.now() + model_process_end = model_process_begin + + model_download_time = (model_download_end - model_download_begin) / datetime.timedelta( + microseconds=1 + ) + model_process_time = (model_process_end - model_process_begin) / datetime.timedelta( + microseconds=1 + ) + + return model_download_time, model_process_time + + +def _resize_shorter_side(image: Image.Image, size: int) -> Image.Image: + width, height = image.size + if width < height: + new_width = size + new_height = int(round(size * height / width)) + else: + new_height = size + new_width = int(round(size * width / height)) + resample = getattr(Image, "Resampling", Image).BILINEAR + return image.resize((new_width, new_height), resample=resample) + + +def _center_crop(image: Image.Image, size: int) -> Image.Image: + width, height = image.size + left = max(0, int(round((width - size) / 2))) + top = max(0, int(round((height - size) / 2))) + right = left + size + bottom = top + size + return image.crop((left, top, right, bottom)) + + +def _prepare_tensor(image_path: str) -> np.ndarray: + image = Image.open(image_path).convert("RGB") + image = _resize_shorter_side(image, 256) + image = _center_crop(image, 224) + + np_image = np.asarray(image).astype(np.float32) / 255.0 + np_image = (np_image - _MEAN) / _STD + np_image = np.transpose(np_image, (2, 0, 1)) + return np_image[np.newaxis, :] + + +def _softmax(logits: np.ndarray) -> np.ndarray: + shifted = logits - np.max(logits, axis=1, keepdims=True) + exp = np.exp(shifted) + return exp / np.sum(exp, axis=1, keepdims=True) + + +def _run_inference(batch: np.ndarray) -> Tuple[int, float, List[int]]: + assert _session is not None and _session_input is not None and _session_output is not None + + outputs = _session.run([_session_output], {_session_input: batch}) + logits = outputs[0] + probs = _softmax(logits) + top1_idx = int(np.argmax(probs, axis=1)[0]) + top1_conf = float(probs[0, top1_idx]) + top5_idx = np.argsort(probs[0])[::-1][:5].tolist() + + return top1_idx, top1_conf, top5_idx + + +def handler(event): + bucket = event.get("bucket", {}).get("bucket") + input_prefix = event.get("bucket", {}).get("input") + model_prefix = event.get("bucket", {}).get("model") + key = event.get("object", {}).get("input") + model_key = event.get("object", {}).get("model") + + download_path = os.path.join("/tmp", f"{uuid.uuid4()}-{os.path.basename(key)}") + image_download_begin = datetime.datetime.now() + client.download(bucket, os.path.join(input_prefix, key), download_path) + image_download_end = datetime.datetime.now() + + model_download_time, model_process_time = _ensure_model(bucket, model_prefix, model_key) + + inference_begin = datetime.datetime.now() + input_batch = _prepare_tensor(download_path) + top1_idx, top1_conf, top5_idx = _run_inference(input_batch) + inference_end = datetime.datetime.now() + + os.remove(download_path) + + download_time = (image_download_end - image_download_begin) / datetime.timedelta(microseconds=1) + compute_time = (inference_end - inference_begin) / datetime.timedelta(microseconds=1) + # gpu_time_ms = 0.0 + + return { + "result": { + "idx": top1_idx, + "class": idx2label[top1_idx], + "confidence": top1_conf, + "top5_idx": top5_idx, + }, + "measurement": { + "download_time": download_time + model_download_time, + "compute_time": compute_time + model_process_time, + "model_time": model_process_time, + "model_download_time": model_download_time, + # "gpu_time_ms": round(gpu_time_ms, 3), + }, + } diff --git a/benchmarks/400.inference/413.image-classification/python/imagenet_class_index.json b/benchmarks/400.inference/413.image-classification/python/imagenet_class_index.json new file mode 100755 index 000000000..5fe0dfefc --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/imagenet_class_index.json @@ -0,0 +1 @@ +{"0": ["n01440764", "tench"], "1": ["n01443537", "goldfish"], "2": ["n01484850", "great_white_shark"], "3": ["n01491361", "tiger_shark"], "4": ["n01494475", "hammerhead"], "5": ["n01496331", "electric_ray"], "6": ["n01498041", "stingray"], "7": ["n01514668", "cock"], "8": ["n01514859", "hen"], "9": ["n01518878", "ostrich"], "10": ["n01530575", "brambling"], "11": ["n01531178", "goldfinch"], "12": ["n01532829", "house_finch"], "13": ["n01534433", "junco"], "14": ["n01537544", "indigo_bunting"], "15": ["n01558993", "robin"], "16": ["n01560419", "bulbul"], "17": ["n01580077", "jay"], "18": ["n01582220", "magpie"], "19": ["n01592084", "chickadee"], "20": ["n01601694", "water_ouzel"], "21": ["n01608432", "kite"], "22": ["n01614925", "bald_eagle"], "23": ["n01616318", "vulture"], "24": ["n01622779", "great_grey_owl"], "25": ["n01629819", "European_fire_salamander"], "26": ["n01630670", "common_newt"], "27": ["n01631663", "eft"], "28": ["n01632458", "spotted_salamander"], "29": ["n01632777", "axolotl"], "30": ["n01641577", "bullfrog"], "31": ["n01644373", "tree_frog"], "32": ["n01644900", "tailed_frog"], "33": ["n01664065", "loggerhead"], "34": ["n01665541", "leatherback_turtle"], "35": ["n01667114", "mud_turtle"], "36": ["n01667778", "terrapin"], "37": ["n01669191", "box_turtle"], "38": ["n01675722", "banded_gecko"], "39": ["n01677366", "common_iguana"], "40": ["n01682714", "American_chameleon"], "41": ["n01685808", "whiptail"], "42": ["n01687978", "agama"], "43": ["n01688243", "frilled_lizard"], "44": ["n01689811", "alligator_lizard"], "45": ["n01692333", "Gila_monster"], "46": ["n01693334", "green_lizard"], "47": ["n01694178", "African_chameleon"], "48": ["n01695060", "Komodo_dragon"], "49": ["n01697457", "African_crocodile"], "50": ["n01698640", "American_alligator"], "51": ["n01704323", "triceratops"], "52": ["n01728572", "thunder_snake"], "53": ["n01728920", "ringneck_snake"], "54": ["n01729322", "hognose_snake"], "55": ["n01729977", "green_snake"], "56": ["n01734418", "king_snake"], "57": ["n01735189", "garter_snake"], "58": ["n01737021", "water_snake"], "59": ["n01739381", "vine_snake"], "60": ["n01740131", "night_snake"], "61": ["n01742172", "boa_constrictor"], "62": ["n01744401", "rock_python"], "63": ["n01748264", "Indian_cobra"], "64": ["n01749939", "green_mamba"], "65": ["n01751748", "sea_snake"], "66": ["n01753488", "horned_viper"], "67": ["n01755581", "diamondback"], "68": ["n01756291", "sidewinder"], "69": ["n01768244", "trilobite"], "70": ["n01770081", "harvestman"], "71": ["n01770393", "scorpion"], "72": ["n01773157", "black_and_gold_garden_spider"], "73": ["n01773549", "barn_spider"], "74": ["n01773797", "garden_spider"], "75": ["n01774384", "black_widow"], "76": ["n01774750", "tarantula"], "77": ["n01775062", "wolf_spider"], "78": ["n01776313", "tick"], "79": ["n01784675", "centipede"], "80": ["n01795545", "black_grouse"], "81": ["n01796340", "ptarmigan"], "82": ["n01797886", "ruffed_grouse"], "83": ["n01798484", "prairie_chicken"], "84": ["n01806143", "peacock"], "85": ["n01806567", "quail"], "86": ["n01807496", "partridge"], "87": ["n01817953", "African_grey"], "88": ["n01818515", "macaw"], "89": ["n01819313", "sulphur-crested_cockatoo"], "90": ["n01820546", "lorikeet"], "91": ["n01824575", "coucal"], "92": ["n01828970", "bee_eater"], "93": ["n01829413", "hornbill"], "94": ["n01833805", "hummingbird"], "95": ["n01843065", "jacamar"], "96": ["n01843383", "toucan"], "97": ["n01847000", "drake"], "98": ["n01855032", "red-breasted_merganser"], "99": ["n01855672", "goose"], "100": ["n01860187", "black_swan"], "101": ["n01871265", "tusker"], "102": ["n01872401", "echidna"], "103": ["n01873310", "platypus"], "104": ["n01877812", "wallaby"], "105": ["n01882714", "koala"], "106": ["n01883070", "wombat"], "107": ["n01910747", "jellyfish"], "108": ["n01914609", "sea_anemone"], "109": ["n01917289", "brain_coral"], "110": ["n01924916", "flatworm"], "111": ["n01930112", "nematode"], "112": ["n01943899", "conch"], "113": ["n01944390", "snail"], "114": ["n01945685", "slug"], "115": ["n01950731", "sea_slug"], "116": ["n01955084", "chiton"], "117": ["n01968897", "chambered_nautilus"], "118": ["n01978287", "Dungeness_crab"], "119": ["n01978455", "rock_crab"], "120": ["n01980166", "fiddler_crab"], "121": ["n01981276", "king_crab"], "122": ["n01983481", "American_lobster"], "123": ["n01984695", "spiny_lobster"], "124": ["n01985128", "crayfish"], "125": ["n01986214", "hermit_crab"], "126": ["n01990800", "isopod"], "127": ["n02002556", "white_stork"], "128": ["n02002724", "black_stork"], "129": ["n02006656", "spoonbill"], "130": ["n02007558", "flamingo"], "131": ["n02009229", "little_blue_heron"], "132": ["n02009912", "American_egret"], "133": ["n02011460", "bittern"], "134": ["n02012849", "crane"], "135": ["n02013706", "limpkin"], "136": ["n02017213", "European_gallinule"], "137": ["n02018207", "American_coot"], "138": ["n02018795", "bustard"], "139": ["n02025239", "ruddy_turnstone"], "140": ["n02027492", "red-backed_sandpiper"], "141": ["n02028035", "redshank"], "142": ["n02033041", "dowitcher"], "143": ["n02037110", "oystercatcher"], "144": ["n02051845", "pelican"], "145": ["n02056570", "king_penguin"], "146": ["n02058221", "albatross"], "147": ["n02066245", "grey_whale"], "148": ["n02071294", "killer_whale"], "149": ["n02074367", "dugong"], "150": ["n02077923", "sea_lion"], "151": ["n02085620", "Chihuahua"], "152": ["n02085782", "Japanese_spaniel"], "153": ["n02085936", "Maltese_dog"], "154": ["n02086079", "Pekinese"], "155": ["n02086240", "Shih-Tzu"], "156": ["n02086646", "Blenheim_spaniel"], "157": ["n02086910", "papillon"], "158": ["n02087046", "toy_terrier"], "159": ["n02087394", "Rhodesian_ridgeback"], "160": ["n02088094", "Afghan_hound"], "161": ["n02088238", "basset"], "162": ["n02088364", "beagle"], "163": ["n02088466", "bloodhound"], "164": ["n02088632", "bluetick"], "165": ["n02089078", "black-and-tan_coonhound"], "166": ["n02089867", "Walker_hound"], "167": ["n02089973", "English_foxhound"], "168": ["n02090379", "redbone"], "169": ["n02090622", "borzoi"], "170": ["n02090721", "Irish_wolfhound"], "171": ["n02091032", "Italian_greyhound"], "172": ["n02091134", "whippet"], "173": ["n02091244", "Ibizan_hound"], "174": ["n02091467", "Norwegian_elkhound"], "175": ["n02091635", "otterhound"], "176": ["n02091831", "Saluki"], "177": ["n02092002", "Scottish_deerhound"], "178": ["n02092339", "Weimaraner"], "179": ["n02093256", "Staffordshire_bullterrier"], "180": ["n02093428", "American_Staffordshire_terrier"], "181": ["n02093647", "Bedlington_terrier"], "182": ["n02093754", "Border_terrier"], "183": ["n02093859", "Kerry_blue_terrier"], "184": ["n02093991", "Irish_terrier"], "185": ["n02094114", "Norfolk_terrier"], "186": ["n02094258", "Norwich_terrier"], "187": ["n02094433", "Yorkshire_terrier"], "188": ["n02095314", "wire-haired_fox_terrier"], "189": ["n02095570", "Lakeland_terrier"], "190": ["n02095889", "Sealyham_terrier"], "191": ["n02096051", "Airedale"], "192": ["n02096177", "cairn"], "193": ["n02096294", "Australian_terrier"], "194": ["n02096437", "Dandie_Dinmont"], "195": ["n02096585", "Boston_bull"], "196": ["n02097047", "miniature_schnauzer"], "197": ["n02097130", "giant_schnauzer"], "198": ["n02097209", "standard_schnauzer"], "199": ["n02097298", "Scotch_terrier"], "200": ["n02097474", "Tibetan_terrier"], "201": ["n02097658", "silky_terrier"], "202": ["n02098105", "soft-coated_wheaten_terrier"], "203": ["n02098286", "West_Highland_white_terrier"], "204": ["n02098413", "Lhasa"], "205": ["n02099267", "flat-coated_retriever"], "206": ["n02099429", "curly-coated_retriever"], "207": ["n02099601", "golden_retriever"], "208": ["n02099712", "Labrador_retriever"], "209": ["n02099849", "Chesapeake_Bay_retriever"], "210": ["n02100236", "German_short-haired_pointer"], "211": ["n02100583", "vizsla"], "212": ["n02100735", "English_setter"], "213": ["n02100877", "Irish_setter"], "214": ["n02101006", "Gordon_setter"], "215": ["n02101388", "Brittany_spaniel"], "216": ["n02101556", "clumber"], "217": ["n02102040", "English_springer"], "218": ["n02102177", "Welsh_springer_spaniel"], "219": ["n02102318", "cocker_spaniel"], "220": ["n02102480", "Sussex_spaniel"], "221": ["n02102973", "Irish_water_spaniel"], "222": ["n02104029", "kuvasz"], "223": ["n02104365", "schipperke"], "224": ["n02105056", "groenendael"], "225": ["n02105162", "malinois"], "226": ["n02105251", "briard"], "227": ["n02105412", "kelpie"], "228": ["n02105505", "komondor"], "229": ["n02105641", "Old_English_sheepdog"], "230": ["n02105855", "Shetland_sheepdog"], "231": ["n02106030", "collie"], "232": ["n02106166", "Border_collie"], "233": ["n02106382", "Bouvier_des_Flandres"], "234": ["n02106550", "Rottweiler"], "235": ["n02106662", "German_shepherd"], "236": ["n02107142", "Doberman"], "237": ["n02107312", "miniature_pinscher"], "238": ["n02107574", "Greater_Swiss_Mountain_dog"], "239": ["n02107683", "Bernese_mountain_dog"], "240": ["n02107908", "Appenzeller"], "241": ["n02108000", "EntleBucher"], "242": ["n02108089", "boxer"], "243": ["n02108422", "bull_mastiff"], "244": ["n02108551", "Tibetan_mastiff"], "245": ["n02108915", "French_bulldog"], "246": ["n02109047", "Great_Dane"], "247": ["n02109525", "Saint_Bernard"], "248": ["n02109961", "Eskimo_dog"], "249": ["n02110063", "malamute"], "250": ["n02110185", "Siberian_husky"], "251": ["n02110341", "dalmatian"], "252": ["n02110627", "affenpinscher"], "253": ["n02110806", "basenji"], "254": ["n02110958", "pug"], "255": ["n02111129", "Leonberg"], "256": ["n02111277", "Newfoundland"], "257": ["n02111500", "Great_Pyrenees"], "258": ["n02111889", "Samoyed"], "259": ["n02112018", "Pomeranian"], "260": ["n02112137", "chow"], "261": ["n02112350", "keeshond"], "262": ["n02112706", "Brabancon_griffon"], "263": ["n02113023", "Pembroke"], "264": ["n02113186", "Cardigan"], "265": ["n02113624", "toy_poodle"], "266": ["n02113712", "miniature_poodle"], "267": ["n02113799", "standard_poodle"], "268": ["n02113978", "Mexican_hairless"], "269": ["n02114367", "timber_wolf"], "270": ["n02114548", "white_wolf"], "271": ["n02114712", "red_wolf"], "272": ["n02114855", "coyote"], "273": ["n02115641", "dingo"], "274": ["n02115913", "dhole"], "275": ["n02116738", "African_hunting_dog"], "276": ["n02117135", "hyena"], "277": ["n02119022", "red_fox"], "278": ["n02119789", "kit_fox"], "279": ["n02120079", "Arctic_fox"], "280": ["n02120505", "grey_fox"], "281": ["n02123045", "tabby"], "282": ["n02123159", "tiger_cat"], "283": ["n02123394", "Persian_cat"], "284": ["n02123597", "Siamese_cat"], "285": ["n02124075", "Egyptian_cat"], "286": ["n02125311", "cougar"], "287": ["n02127052", "lynx"], "288": ["n02128385", "leopard"], "289": ["n02128757", "snow_leopard"], "290": ["n02128925", "jaguar"], "291": ["n02129165", "lion"], "292": ["n02129604", "tiger"], "293": ["n02130308", "cheetah"], "294": ["n02132136", "brown_bear"], "295": ["n02133161", "American_black_bear"], "296": ["n02134084", "ice_bear"], "297": ["n02134418", "sloth_bear"], "298": ["n02137549", "mongoose"], "299": ["n02138441", "meerkat"], "300": ["n02165105", "tiger_beetle"], "301": ["n02165456", "ladybug"], "302": ["n02167151", "ground_beetle"], "303": ["n02168699", "long-horned_beetle"], "304": ["n02169497", "leaf_beetle"], "305": ["n02172182", "dung_beetle"], "306": ["n02174001", "rhinoceros_beetle"], "307": ["n02177972", "weevil"], "308": ["n02190166", "fly"], "309": ["n02206856", "bee"], "310": ["n02219486", "ant"], "311": ["n02226429", "grasshopper"], "312": ["n02229544", "cricket"], "313": ["n02231487", "walking_stick"], "314": ["n02233338", "cockroach"], "315": ["n02236044", "mantis"], "316": ["n02256656", "cicada"], "317": ["n02259212", "leafhopper"], "318": ["n02264363", "lacewing"], "319": ["n02268443", "dragonfly"], "320": ["n02268853", "damselfly"], "321": ["n02276258", "admiral"], "322": ["n02277742", "ringlet"], "323": ["n02279972", "monarch"], "324": ["n02280649", "cabbage_butterfly"], "325": ["n02281406", "sulphur_butterfly"], "326": ["n02281787", "lycaenid"], "327": ["n02317335", "starfish"], "328": ["n02319095", "sea_urchin"], "329": ["n02321529", "sea_cucumber"], "330": ["n02325366", "wood_rabbit"], "331": ["n02326432", "hare"], "332": ["n02328150", "Angora"], "333": ["n02342885", "hamster"], "334": ["n02346627", "porcupine"], "335": ["n02356798", "fox_squirrel"], "336": ["n02361337", "marmot"], "337": ["n02363005", "beaver"], "338": ["n02364673", "guinea_pig"], "339": ["n02389026", "sorrel"], "340": ["n02391049", "zebra"], "341": ["n02395406", "hog"], "342": ["n02396427", "wild_boar"], "343": ["n02397096", "warthog"], "344": ["n02398521", "hippopotamus"], "345": ["n02403003", "ox"], "346": ["n02408429", "water_buffalo"], "347": ["n02410509", "bison"], "348": ["n02412080", "ram"], "349": ["n02415577", "bighorn"], "350": ["n02417914", "ibex"], "351": ["n02422106", "hartebeest"], "352": ["n02422699", "impala"], "353": ["n02423022", "gazelle"], "354": ["n02437312", "Arabian_camel"], "355": ["n02437616", "llama"], "356": ["n02441942", "weasel"], "357": ["n02442845", "mink"], "358": ["n02443114", "polecat"], "359": ["n02443484", "black-footed_ferret"], "360": ["n02444819", "otter"], "361": ["n02445715", "skunk"], "362": ["n02447366", "badger"], "363": ["n02454379", "armadillo"], "364": ["n02457408", "three-toed_sloth"], "365": ["n02480495", "orangutan"], "366": ["n02480855", "gorilla"], "367": ["n02481823", "chimpanzee"], "368": ["n02483362", "gibbon"], "369": ["n02483708", "siamang"], "370": ["n02484975", "guenon"], "371": ["n02486261", "patas"], "372": ["n02486410", "baboon"], "373": ["n02487347", "macaque"], "374": ["n02488291", "langur"], "375": ["n02488702", "colobus"], "376": ["n02489166", "proboscis_monkey"], "377": ["n02490219", "marmoset"], "378": ["n02492035", "capuchin"], "379": ["n02492660", "howler_monkey"], "380": ["n02493509", "titi"], "381": ["n02493793", "spider_monkey"], "382": ["n02494079", "squirrel_monkey"], "383": ["n02497673", "Madagascar_cat"], "384": ["n02500267", "indri"], "385": ["n02504013", "Indian_elephant"], "386": ["n02504458", "African_elephant"], "387": ["n02509815", "lesser_panda"], "388": ["n02510455", "giant_panda"], "389": ["n02514041", "barracouta"], "390": ["n02526121", "eel"], "391": ["n02536864", "coho"], "392": ["n02606052", "rock_beauty"], "393": ["n02607072", "anemone_fish"], "394": ["n02640242", "sturgeon"], "395": ["n02641379", "gar"], "396": ["n02643566", "lionfish"], "397": ["n02655020", "puffer"], "398": ["n02666196", "abacus"], "399": ["n02667093", "abaya"], "400": ["n02669723", "academic_gown"], "401": ["n02672831", "accordion"], "402": ["n02676566", "acoustic_guitar"], "403": ["n02687172", "aircraft_carrier"], "404": ["n02690373", "airliner"], "405": ["n02692877", "airship"], "406": ["n02699494", "altar"], "407": ["n02701002", "ambulance"], "408": ["n02704792", "amphibian"], "409": ["n02708093", "analog_clock"], "410": ["n02727426", "apiary"], "411": ["n02730930", "apron"], "412": ["n02747177", "ashcan"], "413": ["n02749479", "assault_rifle"], "414": ["n02769748", "backpack"], "415": ["n02776631", "bakery"], "416": ["n02777292", "balance_beam"], "417": ["n02782093", "balloon"], "418": ["n02783161", "ballpoint"], "419": ["n02786058", "Band_Aid"], "420": ["n02787622", "banjo"], "421": ["n02788148", "bannister"], "422": ["n02790996", "barbell"], "423": ["n02791124", "barber_chair"], "424": ["n02791270", "barbershop"], "425": ["n02793495", "barn"], "426": ["n02794156", "barometer"], "427": ["n02795169", "barrel"], "428": ["n02797295", "barrow"], "429": ["n02799071", "baseball"], "430": ["n02802426", "basketball"], "431": ["n02804414", "bassinet"], "432": ["n02804610", "bassoon"], "433": ["n02807133", "bathing_cap"], "434": ["n02808304", "bath_towel"], "435": ["n02808440", "bathtub"], "436": ["n02814533", "beach_wagon"], "437": ["n02814860", "beacon"], "438": ["n02815834", "beaker"], "439": ["n02817516", "bearskin"], "440": ["n02823428", "beer_bottle"], "441": ["n02823750", "beer_glass"], "442": ["n02825657", "bell_cote"], "443": ["n02834397", "bib"], "444": ["n02835271", "bicycle-built-for-two"], "445": ["n02837789", "bikini"], "446": ["n02840245", "binder"], "447": ["n02841315", "binoculars"], "448": ["n02843684", "birdhouse"], "449": ["n02859443", "boathouse"], "450": ["n02860847", "bobsled"], "451": ["n02865351", "bolo_tie"], "452": ["n02869837", "bonnet"], "453": ["n02870880", "bookcase"], "454": ["n02871525", "bookshop"], "455": ["n02877765", "bottlecap"], "456": ["n02879718", "bow"], "457": ["n02883205", "bow_tie"], "458": ["n02892201", "brass"], "459": ["n02892767", "brassiere"], "460": ["n02894605", "breakwater"], "461": ["n02895154", "breastplate"], "462": ["n02906734", "broom"], "463": ["n02909870", "bucket"], "464": ["n02910353", "buckle"], "465": ["n02916936", "bulletproof_vest"], "466": ["n02917067", "bullet_train"], "467": ["n02927161", "butcher_shop"], "468": ["n02930766", "cab"], "469": ["n02939185", "caldron"], "470": ["n02948072", "candle"], "471": ["n02950826", "cannon"], "472": ["n02951358", "canoe"], "473": ["n02951585", "can_opener"], "474": ["n02963159", "cardigan"], "475": ["n02965783", "car_mirror"], "476": ["n02966193", "carousel"], "477": ["n02966687", "carpenter's_kit"], "478": ["n02971356", "carton"], "479": ["n02974003", "car_wheel"], "480": ["n02977058", "cash_machine"], "481": ["n02978881", "cassette"], "482": ["n02979186", "cassette_player"], "483": ["n02980441", "castle"], "484": ["n02981792", "catamaran"], "485": ["n02988304", "CD_player"], "486": ["n02992211", "cello"], "487": ["n02992529", "cellular_telephone"], "488": ["n02999410", "chain"], "489": ["n03000134", "chainlink_fence"], "490": ["n03000247", "chain_mail"], "491": ["n03000684", "chain_saw"], "492": ["n03014705", "chest"], "493": ["n03016953", "chiffonier"], "494": ["n03017168", "chime"], "495": ["n03018349", "china_cabinet"], "496": ["n03026506", "Christmas_stocking"], "497": ["n03028079", "church"], "498": ["n03032252", "cinema"], "499": ["n03041632", "cleaver"], "500": ["n03042490", "cliff_dwelling"], "501": ["n03045698", "cloak"], "502": ["n03047690", "clog"], "503": ["n03062245", "cocktail_shaker"], "504": ["n03063599", "coffee_mug"], "505": ["n03063689", "coffeepot"], "506": ["n03065424", "coil"], "507": ["n03075370", "combination_lock"], "508": ["n03085013", "computer_keyboard"], "509": ["n03089624", "confectionery"], "510": ["n03095699", "container_ship"], "511": ["n03100240", "convertible"], "512": ["n03109150", "corkscrew"], "513": ["n03110669", "cornet"], "514": ["n03124043", "cowboy_boot"], "515": ["n03124170", "cowboy_hat"], "516": ["n03125729", "cradle"], "517": ["n03126707", "crane"], "518": ["n03127747", "crash_helmet"], "519": ["n03127925", "crate"], "520": ["n03131574", "crib"], "521": ["n03133878", "Crock_Pot"], "522": ["n03134739", "croquet_ball"], "523": ["n03141823", "crutch"], "524": ["n03146219", "cuirass"], "525": ["n03160309", "dam"], "526": ["n03179701", "desk"], "527": ["n03180011", "desktop_computer"], "528": ["n03187595", "dial_telephone"], "529": ["n03188531", "diaper"], "530": ["n03196217", "digital_clock"], "531": ["n03197337", "digital_watch"], "532": ["n03201208", "dining_table"], "533": ["n03207743", "dishrag"], "534": ["n03207941", "dishwasher"], "535": ["n03208938", "disk_brake"], "536": ["n03216828", "dock"], "537": ["n03218198", "dogsled"], "538": ["n03220513", "dome"], "539": ["n03223299", "doormat"], "540": ["n03240683", "drilling_platform"], "541": ["n03249569", "drum"], "542": ["n03250847", "drumstick"], "543": ["n03255030", "dumbbell"], "544": ["n03259280", "Dutch_oven"], "545": ["n03271574", "electric_fan"], "546": ["n03272010", "electric_guitar"], "547": ["n03272562", "electric_locomotive"], "548": ["n03290653", "entertainment_center"], "549": ["n03291819", "envelope"], "550": ["n03297495", "espresso_maker"], "551": ["n03314780", "face_powder"], "552": ["n03325584", "feather_boa"], "553": ["n03337140", "file"], "554": ["n03344393", "fireboat"], "555": ["n03345487", "fire_engine"], "556": ["n03347037", "fire_screen"], "557": ["n03355925", "flagpole"], "558": ["n03372029", "flute"], "559": ["n03376595", "folding_chair"], "560": ["n03379051", "football_helmet"], "561": ["n03384352", "forklift"], "562": ["n03388043", "fountain"], "563": ["n03388183", "fountain_pen"], "564": ["n03388549", "four-poster"], "565": ["n03393912", "freight_car"], "566": ["n03394916", "French_horn"], "567": ["n03400231", "frying_pan"], "568": ["n03404251", "fur_coat"], "569": ["n03417042", "garbage_truck"], "570": ["n03424325", "gasmask"], "571": ["n03425413", "gas_pump"], "572": ["n03443371", "goblet"], "573": ["n03444034", "go-kart"], "574": ["n03445777", "golf_ball"], "575": ["n03445924", "golfcart"], "576": ["n03447447", "gondola"], "577": ["n03447721", "gong"], "578": ["n03450230", "gown"], "579": ["n03452741", "grand_piano"], "580": ["n03457902", "greenhouse"], "581": ["n03459775", "grille"], "582": ["n03461385", "grocery_store"], "583": ["n03467068", "guillotine"], "584": ["n03476684", "hair_slide"], "585": ["n03476991", "hair_spray"], "586": ["n03478589", "half_track"], "587": ["n03481172", "hammer"], "588": ["n03482405", "hamper"], "589": ["n03483316", "hand_blower"], "590": ["n03485407", "hand-held_computer"], "591": ["n03485794", "handkerchief"], "592": ["n03492542", "hard_disc"], "593": ["n03494278", "harmonica"], "594": ["n03495258", "harp"], "595": ["n03496892", "harvester"], "596": ["n03498962", "hatchet"], "597": ["n03527444", "holster"], "598": ["n03529860", "home_theater"], "599": ["n03530642", "honeycomb"], "600": ["n03532672", "hook"], "601": ["n03534580", "hoopskirt"], "602": ["n03535780", "horizontal_bar"], "603": ["n03538406", "horse_cart"], "604": ["n03544143", "hourglass"], "605": ["n03584254", "iPod"], "606": ["n03584829", "iron"], "607": ["n03590841", "jack-o'-lantern"], "608": ["n03594734", "jean"], "609": ["n03594945", "jeep"], "610": ["n03595614", "jersey"], "611": ["n03598930", "jigsaw_puzzle"], "612": ["n03599486", "jinrikisha"], "613": ["n03602883", "joystick"], "614": ["n03617480", "kimono"], "615": ["n03623198", "knee_pad"], "616": ["n03627232", "knot"], "617": ["n03630383", "lab_coat"], "618": ["n03633091", "ladle"], "619": ["n03637318", "lampshade"], "620": ["n03642806", "laptop"], "621": ["n03649909", "lawn_mower"], "622": ["n03657121", "lens_cap"], "623": ["n03658185", "letter_opener"], "624": ["n03661043", "library"], "625": ["n03662601", "lifeboat"], "626": ["n03666591", "lighter"], "627": ["n03670208", "limousine"], "628": ["n03673027", "liner"], "629": ["n03676483", "lipstick"], "630": ["n03680355", "Loafer"], "631": ["n03690938", "lotion"], "632": ["n03691459", "loudspeaker"], "633": ["n03692522", "loupe"], "634": ["n03697007", "lumbermill"], "635": ["n03706229", "magnetic_compass"], "636": ["n03709823", "mailbag"], "637": ["n03710193", "mailbox"], "638": ["n03710637", "maillot"], "639": ["n03710721", "maillot"], "640": ["n03717622", "manhole_cover"], "641": ["n03720891", "maraca"], "642": ["n03721384", "marimba"], "643": ["n03724870", "mask"], "644": ["n03729826", "matchstick"], "645": ["n03733131", "maypole"], "646": ["n03733281", "maze"], "647": ["n03733805", "measuring_cup"], "648": ["n03742115", "medicine_chest"], "649": ["n03743016", "megalith"], "650": ["n03759954", "microphone"], "651": ["n03761084", "microwave"], "652": ["n03763968", "military_uniform"], "653": ["n03764736", "milk_can"], "654": ["n03769881", "minibus"], "655": ["n03770439", "miniskirt"], "656": ["n03770679", "minivan"], "657": ["n03773504", "missile"], "658": ["n03775071", "mitten"], "659": ["n03775546", "mixing_bowl"], "660": ["n03776460", "mobile_home"], "661": ["n03777568", "Model_T"], "662": ["n03777754", "modem"], "663": ["n03781244", "monastery"], "664": ["n03782006", "monitor"], "665": ["n03785016", "moped"], "666": ["n03786901", "mortar"], "667": ["n03787032", "mortarboard"], "668": ["n03788195", "mosque"], "669": ["n03788365", "mosquito_net"], "670": ["n03791053", "motor_scooter"], "671": ["n03792782", "mountain_bike"], "672": ["n03792972", "mountain_tent"], "673": ["n03793489", "mouse"], "674": ["n03794056", "mousetrap"], "675": ["n03796401", "moving_van"], "676": ["n03803284", "muzzle"], "677": ["n03804744", "nail"], "678": ["n03814639", "neck_brace"], "679": ["n03814906", "necklace"], "680": ["n03825788", "nipple"], "681": ["n03832673", "notebook"], "682": ["n03837869", "obelisk"], "683": ["n03838899", "oboe"], "684": ["n03840681", "ocarina"], "685": ["n03841143", "odometer"], "686": ["n03843555", "oil_filter"], "687": ["n03854065", "organ"], "688": ["n03857828", "oscilloscope"], "689": ["n03866082", "overskirt"], "690": ["n03868242", "oxcart"], "691": ["n03868863", "oxygen_mask"], "692": ["n03871628", "packet"], "693": ["n03873416", "paddle"], "694": ["n03874293", "paddlewheel"], "695": ["n03874599", "padlock"], "696": ["n03876231", "paintbrush"], "697": ["n03877472", "pajama"], "698": ["n03877845", "palace"], "699": ["n03884397", "panpipe"], "700": ["n03887697", "paper_towel"], "701": ["n03888257", "parachute"], "702": ["n03888605", "parallel_bars"], "703": ["n03891251", "park_bench"], "704": ["n03891332", "parking_meter"], "705": ["n03895866", "passenger_car"], "706": ["n03899768", "patio"], "707": ["n03902125", "pay-phone"], "708": ["n03903868", "pedestal"], "709": ["n03908618", "pencil_box"], "710": ["n03908714", "pencil_sharpener"], "711": ["n03916031", "perfume"], "712": ["n03920288", "Petri_dish"], "713": ["n03924679", "photocopier"], "714": ["n03929660", "pick"], "715": ["n03929855", "pickelhaube"], "716": ["n03930313", "picket_fence"], "717": ["n03930630", "pickup"], "718": ["n03933933", "pier"], "719": ["n03935335", "piggy_bank"], "720": ["n03937543", "pill_bottle"], "721": ["n03938244", "pillow"], "722": ["n03942813", "ping-pong_ball"], "723": ["n03944341", "pinwheel"], "724": ["n03947888", "pirate"], "725": ["n03950228", "pitcher"], "726": ["n03954731", "plane"], "727": ["n03956157", "planetarium"], "728": ["n03958227", "plastic_bag"], "729": ["n03961711", "plate_rack"], "730": ["n03967562", "plow"], "731": ["n03970156", "plunger"], "732": ["n03976467", "Polaroid_camera"], "733": ["n03976657", "pole"], "734": ["n03977966", "police_van"], "735": ["n03980874", "poncho"], "736": ["n03982430", "pool_table"], "737": ["n03983396", "pop_bottle"], "738": ["n03991062", "pot"], "739": ["n03992509", "potter's_wheel"], "740": ["n03995372", "power_drill"], "741": ["n03998194", "prayer_rug"], "742": ["n04004767", "printer"], "743": ["n04005630", "prison"], "744": ["n04008634", "projectile"], "745": ["n04009552", "projector"], "746": ["n04019541", "puck"], "747": ["n04023962", "punching_bag"], "748": ["n04026417", "purse"], "749": ["n04033901", "quill"], "750": ["n04033995", "quilt"], "751": ["n04037443", "racer"], "752": ["n04039381", "racket"], "753": ["n04040759", "radiator"], "754": ["n04041544", "radio"], "755": ["n04044716", "radio_telescope"], "756": ["n04049303", "rain_barrel"], "757": ["n04065272", "recreational_vehicle"], "758": ["n04067472", "reel"], "759": ["n04069434", "reflex_camera"], "760": ["n04070727", "refrigerator"], "761": ["n04074963", "remote_control"], "762": ["n04081281", "restaurant"], "763": ["n04086273", "revolver"], "764": ["n04090263", "rifle"], "765": ["n04099969", "rocking_chair"], "766": ["n04111531", "rotisserie"], "767": ["n04116512", "rubber_eraser"], "768": ["n04118538", "rugby_ball"], "769": ["n04118776", "rule"], "770": ["n04120489", "running_shoe"], "771": ["n04125021", "safe"], "772": ["n04127249", "safety_pin"], "773": ["n04131690", "saltshaker"], "774": ["n04133789", "sandal"], "775": ["n04136333", "sarong"], "776": ["n04141076", "sax"], "777": ["n04141327", "scabbard"], "778": ["n04141975", "scale"], "779": ["n04146614", "school_bus"], "780": ["n04147183", "schooner"], "781": ["n04149813", "scoreboard"], "782": ["n04152593", "screen"], "783": ["n04153751", "screw"], "784": ["n04154565", "screwdriver"], "785": ["n04162706", "seat_belt"], "786": ["n04179913", "sewing_machine"], "787": ["n04192698", "shield"], "788": ["n04200800", "shoe_shop"], "789": ["n04201297", "shoji"], "790": ["n04204238", "shopping_basket"], "791": ["n04204347", "shopping_cart"], "792": ["n04208210", "shovel"], "793": ["n04209133", "shower_cap"], "794": ["n04209239", "shower_curtain"], "795": ["n04228054", "ski"], "796": ["n04229816", "ski_mask"], "797": ["n04235860", "sleeping_bag"], "798": ["n04238763", "slide_rule"], "799": ["n04239074", "sliding_door"], "800": ["n04243546", "slot"], "801": ["n04251144", "snorkel"], "802": ["n04252077", "snowmobile"], "803": ["n04252225", "snowplow"], "804": ["n04254120", "soap_dispenser"], "805": ["n04254680", "soccer_ball"], "806": ["n04254777", "sock"], "807": ["n04258138", "solar_dish"], "808": ["n04259630", "sombrero"], "809": ["n04263257", "soup_bowl"], "810": ["n04264628", "space_bar"], "811": ["n04265275", "space_heater"], "812": ["n04266014", "space_shuttle"], "813": ["n04270147", "spatula"], "814": ["n04273569", "speedboat"], "815": ["n04275548", "spider_web"], "816": ["n04277352", "spindle"], "817": ["n04285008", "sports_car"], "818": ["n04286575", "spotlight"], "819": ["n04296562", "stage"], "820": ["n04310018", "steam_locomotive"], "821": ["n04311004", "steel_arch_bridge"], "822": ["n04311174", "steel_drum"], "823": ["n04317175", "stethoscope"], "824": ["n04325704", "stole"], "825": ["n04326547", "stone_wall"], "826": ["n04328186", "stopwatch"], "827": ["n04330267", "stove"], "828": ["n04332243", "strainer"], "829": ["n04335435", "streetcar"], "830": ["n04336792", "stretcher"], "831": ["n04344873", "studio_couch"], "832": ["n04346328", "stupa"], "833": ["n04347754", "submarine"], "834": ["n04350905", "suit"], "835": ["n04355338", "sundial"], "836": ["n04355933", "sunglass"], "837": ["n04356056", "sunglasses"], "838": ["n04357314", "sunscreen"], "839": ["n04366367", "suspension_bridge"], "840": ["n04367480", "swab"], "841": ["n04370456", "sweatshirt"], "842": ["n04371430", "swimming_trunks"], "843": ["n04371774", "swing"], "844": ["n04372370", "switch"], "845": ["n04376876", "syringe"], "846": ["n04380533", "table_lamp"], "847": ["n04389033", "tank"], "848": ["n04392985", "tape_player"], "849": ["n04398044", "teapot"], "850": ["n04399382", "teddy"], "851": ["n04404412", "television"], "852": ["n04409515", "tennis_ball"], "853": ["n04417672", "thatch"], "854": ["n04418357", "theater_curtain"], "855": ["n04423845", "thimble"], "856": ["n04428191", "thresher"], "857": ["n04429376", "throne"], "858": ["n04435653", "tile_roof"], "859": ["n04442312", "toaster"], "860": ["n04443257", "tobacco_shop"], "861": ["n04447861", "toilet_seat"], "862": ["n04456115", "torch"], "863": ["n04458633", "totem_pole"], "864": ["n04461696", "tow_truck"], "865": ["n04462240", "toyshop"], "866": ["n04465501", "tractor"], "867": ["n04467665", "trailer_truck"], "868": ["n04476259", "tray"], "869": ["n04479046", "trench_coat"], "870": ["n04482393", "tricycle"], "871": ["n04483307", "trimaran"], "872": ["n04485082", "tripod"], "873": ["n04486054", "triumphal_arch"], "874": ["n04487081", "trolleybus"], "875": ["n04487394", "trombone"], "876": ["n04493381", "tub"], "877": ["n04501370", "turnstile"], "878": ["n04505470", "typewriter_keyboard"], "879": ["n04507155", "umbrella"], "880": ["n04509417", "unicycle"], "881": ["n04515003", "upright"], "882": ["n04517823", "vacuum"], "883": ["n04522168", "vase"], "884": ["n04523525", "vault"], "885": ["n04525038", "velvet"], "886": ["n04525305", "vending_machine"], "887": ["n04532106", "vestment"], "888": ["n04532670", "viaduct"], "889": ["n04536866", "violin"], "890": ["n04540053", "volleyball"], "891": ["n04542943", "waffle_iron"], "892": ["n04548280", "wall_clock"], "893": ["n04548362", "wallet"], "894": ["n04550184", "wardrobe"], "895": ["n04552348", "warplane"], "896": ["n04553703", "washbasin"], "897": ["n04554684", "washer"], "898": ["n04557648", "water_bottle"], "899": ["n04560804", "water_jug"], "900": ["n04562935", "water_tower"], "901": ["n04579145", "whiskey_jug"], "902": ["n04579432", "whistle"], "903": ["n04584207", "wig"], "904": ["n04589890", "window_screen"], "905": ["n04590129", "window_shade"], "906": ["n04591157", "Windsor_tie"], "907": ["n04591713", "wine_bottle"], "908": ["n04592741", "wing"], "909": ["n04596742", "wok"], "910": ["n04597913", "wooden_spoon"], "911": ["n04599235", "wool"], "912": ["n04604644", "worm_fence"], "913": ["n04606251", "wreck"], "914": ["n04612504", "yawl"], "915": ["n04613696", "yurt"], "916": ["n06359193", "web_site"], "917": ["n06596364", "comic_book"], "918": ["n06785654", "crossword_puzzle"], "919": ["n06794110", "street_sign"], "920": ["n06874185", "traffic_light"], "921": ["n07248320", "book_jacket"], "922": ["n07565083", "menu"], "923": ["n07579787", "plate"], "924": ["n07583066", "guacamole"], "925": ["n07584110", "consomme"], "926": ["n07590611", "hot_pot"], "927": ["n07613480", "trifle"], "928": ["n07614500", "ice_cream"], "929": ["n07615774", "ice_lolly"], "930": ["n07684084", "French_loaf"], "931": ["n07693725", "bagel"], "932": ["n07695742", "pretzel"], "933": ["n07697313", "cheeseburger"], "934": ["n07697537", "hotdog"], "935": ["n07711569", "mashed_potato"], "936": ["n07714571", "head_cabbage"], "937": ["n07714990", "broccoli"], "938": ["n07715103", "cauliflower"], "939": ["n07716358", "zucchini"], "940": ["n07716906", "spaghetti_squash"], "941": ["n07717410", "acorn_squash"], "942": ["n07717556", "butternut_squash"], "943": ["n07718472", "cucumber"], "944": ["n07718747", "artichoke"], "945": ["n07720875", "bell_pepper"], "946": ["n07730033", "cardoon"], "947": ["n07734744", "mushroom"], "948": ["n07742313", "Granny_Smith"], "949": ["n07745940", "strawberry"], "950": ["n07747607", "orange"], "951": ["n07749582", "lemon"], "952": ["n07753113", "fig"], "953": ["n07753275", "pineapple"], "954": ["n07753592", "banana"], "955": ["n07754684", "jackfruit"], "956": ["n07760859", "custard_apple"], "957": ["n07768694", "pomegranate"], "958": ["n07802026", "hay"], "959": ["n07831146", "carbonara"], "960": ["n07836838", "chocolate_sauce"], "961": ["n07860988", "dough"], "962": ["n07871810", "meat_loaf"], "963": ["n07873807", "pizza"], "964": ["n07875152", "potpie"], "965": ["n07880968", "burrito"], "966": ["n07892512", "red_wine"], "967": ["n07920052", "espresso"], "968": ["n07930864", "cup"], "969": ["n07932039", "eggnog"], "970": ["n09193705", "alp"], "971": ["n09229709", "bubble"], "972": ["n09246464", "cliff"], "973": ["n09256479", "coral_reef"], "974": ["n09288635", "geyser"], "975": ["n09332890", "lakeside"], "976": ["n09399592", "promontory"], "977": ["n09421951", "sandbar"], "978": ["n09428293", "seashore"], "979": ["n09468604", "valley"], "980": ["n09472597", "volcano"], "981": ["n09835506", "ballplayer"], "982": ["n10148035", "groom"], "983": ["n10565667", "scuba_diver"], "984": ["n11879895", "rapeseed"], "985": ["n11939491", "daisy"], "986": ["n12057211", "yellow_lady's_slipper"], "987": ["n12144580", "corn"], "988": ["n12267677", "acorn"], "989": ["n12620546", "hip"], "990": ["n12768682", "buckeye"], "991": ["n12985857", "coral_fungus"], "992": ["n12998815", "agaric"], "993": ["n13037406", "gyromitra"], "994": ["n13040303", "stinkhorn"], "995": ["n13044778", "earthstar"], "996": ["n13052670", "hen-of-the-woods"], "997": ["n13054560", "bolete"], "998": ["n13133613", "ear"], "999": ["n15075141", "toilet_tissue"]} \ No newline at end of file diff --git a/benchmarks/400.inference/413.image-classification/python/init.sh b/benchmarks/400.inference/413.image-classification/python/init.sh new file mode 100755 index 000000000..71a2e39c0 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/init.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +DIR=$1 +VERBOSE=$2 +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +path="${SCRIPT_DIR}/imagenet_class_index.json" +if [ "$VERBOSE" = true ]; then + echo "Update ${DIR} with json ${path}" +fi +cp ${path} ${DIR} diff --git a/benchmarks/400.inference/413.image-classification/python/package.sh b/benchmarks/400.inference/413.image-classification/python/package.sh new file mode 100644 index 000000000..038fac7c5 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/package.sh @@ -0,0 +1,32 @@ +# Stripping package code is based on https://github.com/ryfeus/lambda-packs repo + +PACKAGE_DIR=$1 +echo "Original size $(du -sh $1 | cut -f1)" + +CUR_DIR=$(pwd) +cd $1 +# cleaning libs +rm -rf external +find . -type d -name "tests" -exec rm -rf {} + +find . -type d -name "test" -exec rm -rf {} + +find . -type d -name "bin" -not -path "*/torch/*" -exec rm -rf {} + + +# cleaning +# stripping some of the numpy libs - libgfortran-2e0d59d6.so.5.0.0 - causes issues on Azure +find -name "*.so" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip +find -name "*.so.*" -not -path "*/PIL/*" -not -path "*/Pillow.libs/*" -not -path "*libgfortran*" | xargs strip + +rm -r pip >/dev/null +rm -r pip-* >/dev/null +rm -r wheel >/dev/null +rm -r wheel-* >/dev/null +rm easy_install.py >/dev/null +find . -name \*.pyc -delete +cd ${CUR_DIR} +echo "Stripped size $(du -sh $1 | cut -f1)" + +if ([[ "${PLATFORM}" == "AWS" ]] || [[ "${PLATFORM}" == "GCP" ]]) && ([[ "${PYTHON_VERSION}" == "3.8" ]] || [[ "${PYTHON_VERSION}" == "3.9" ]]); then + zip -qr torch.zip $1/torch + rm -rf $1/torch + echo "Torch-zipped size $(du -sh ${CUR_DIR} | cut -f1)" +fi diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt b/benchmarks/400.inference/413.image-classification/python/requirements.txt new file mode 100755 index 000000000..01d9a45b4 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt @@ -0,0 +1,5 @@ +numpy>=1.22,<2.0 +pillow>=9.5,<10.0 +torch==2.4.1 +torchvision==0.19.1 +typing-extensions>=4.8 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.10 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.10 new file mode 100644 index 000000000..96299cb57 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.10 @@ -0,0 +1,4 @@ +numpy>=2.0 +pillow>=10.0 +torch==2.5.1 +torchvision==0.20.1 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.11 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.11 new file mode 100644 index 000000000..96299cb57 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.11 @@ -0,0 +1,4 @@ +numpy>=2.0 +pillow>=10.0 +torch==2.5.1 +torchvision==0.20.1 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.12 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.12 new file mode 100644 index 000000000..96299cb57 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.12 @@ -0,0 +1,4 @@ +numpy>=2.0 +pillow>=10.0 +torch==2.5.1 +torchvision==0.20.1 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 new file mode 100755 index 000000000..01d9a45b4 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.8 @@ -0,0 +1,5 @@ +numpy>=1.22,<2.0 +pillow>=9.5,<10.0 +torch==2.4.1 +torchvision==0.19.1 +typing-extensions>=4.8 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 new file mode 100755 index 000000000..96299cb57 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.3.9 @@ -0,0 +1,4 @@ +numpy>=2.0 +pillow>=10.0 +torch==2.5.1 +torchvision==0.20.1 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.8 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..01d9a45b4 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.8 @@ -0,0 +1,5 @@ +numpy>=1.22,<2.0 +pillow>=9.5,<10.0 +torch==2.4.1 +torchvision==0.19.1 +typing-extensions>=4.8 diff --git a/benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.9 b/benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..96299cb57 --- /dev/null +++ b/benchmarks/400.inference/413.image-classification/python/requirements.txt.arm.3.9 @@ -0,0 +1,4 @@ +numpy>=2.0 +pillow>=10.0 +torch==2.5.1 +torchvision==0.20.1 diff --git a/benchmarks/400.inference/413.recommendation/config.json b/benchmarks/400.inference/413.recommendation/config.json new file mode 100644 index 000000000..649bb78d6 --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 1024, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/400.inference/413.recommendation/input.py b/benchmarks/400.inference/413.recommendation/input.py new file mode 100644 index 000000000..4e48cfa52 --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/input.py @@ -0,0 +1,30 @@ +import os + + +def buckets_count(): + return (2, 0) + + +def upload_files(data_root, data_dir, upload_func): + for root, _, files in os.walk(data_dir): + prefix = os.path.relpath(root, data_root) + for file in files: + upload_func(0, os.path.join(prefix, file), os.path.join(root, file)) + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + model_file = "dlrm_tiny.pt" + upload_func(0, model_file, os.path.join(data_dir, "model", model_file)) + + requests_file = "requests.jsonl" + upload_func(1, requests_file, os.path.join(data_dir, "data", requests_file)) + + cfg = {"object": {}, "bucket": {}} + cfg["object"]["model"] = model_file + cfg["object"]["requests"] = requests_file + cfg["bucket"]["bucket"] = benchmarks_bucket + cfg["bucket"]["model"] = input_paths[0] + cfg["bucket"]["requests"] = input_paths[1] + return cfg diff --git a/benchmarks/400.inference/413.recommendation/python/function.py b/benchmarks/400.inference/413.recommendation/python/function.py new file mode 100644 index 000000000..e7b4ae73c --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/function.py @@ -0,0 +1,144 @@ +import datetime +import json +import os +import uuid + +import torch +import torch.nn as nn + +from . import storage + +client = storage.storage.get_instance() + +MODEL_FILE = "dlrm_tiny.pt" +MODEL_CACHE = "/tmp/dlrm_gpu_model" + +_model = None +_device = torch.device("cpu") + + +class TinyDLRM(nn.Module): + def __init__(self, num_users, num_items, num_categories, embed_dim=8): + super().__init__() + self.user_emb = nn.Embedding(num_users, embed_dim) + self.item_emb = nn.Embedding(num_items, embed_dim) + self.category_emb = nn.Embedding(num_categories, embed_dim) + in_dim = embed_dim * 3 + 2 + hidden = 16 + self.mlp = nn.Sequential( + nn.Linear(in_dim, hidden), + nn.ReLU(), + nn.Linear(hidden, 1), + ) + + def forward(self, user_id, item_id, category_id, dense): + features = torch.cat( + [ + self.user_emb(user_id), + self.item_emb(item_id), + self.category_emb(category_id), + dense, + ], + dim=-1, + ) + return torch.sigmoid(self.mlp(features)) + + +def _select_device(): + if torch.cuda.is_available(): + return torch.device("cuda") + raise RuntimeError("CUDA is not available") + return torch.device("cpu") + + +def _load_model(bucket, prefix): + global _model, _device + + if _model is not None: + return 0.0, 0.0 + + download_begin = datetime.datetime.now() + os.makedirs(MODEL_CACHE, exist_ok=True) + tmp_path = os.path.join("/tmp", f"{uuid.uuid4()}-{MODEL_FILE}") + client.download(bucket, os.path.join(prefix, MODEL_FILE), tmp_path) + download_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + checkpoint = torch.load(tmp_path, map_location="cpu") + meta = checkpoint["meta"] + _device = _select_device() + model = TinyDLRM( + meta["num_users"], meta["num_items"], meta["num_categories"], meta["embed_dim"] + ) + model.load_state_dict(checkpoint["state_dict"]) + model.to(_device) + model.eval() + _model = model + os.remove(tmp_path) + process_end = datetime.datetime.now() + + download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + return download_time, process_time + + +def _prepare_batch(requests): + user_ids = torch.tensor([req["user_id"] for req in requests], dtype=torch.long, device=_device) + item_ids = torch.tensor([req["item_id"] for req in requests], dtype=torch.long, device=_device) + category_ids = torch.tensor( + [req["category_id"] for req in requests], dtype=torch.long, device=_device + ) + dense = torch.tensor( + [req.get("dense", [0.0, 0.0]) for req in requests], dtype=torch.float32, device=_device + ) + return user_ids, item_ids, category_ids, dense + + +def handler(event): + bucket = event.get("bucket", {}).get("bucket") + model_prefix = event.get("bucket", {}).get("model") + requests_prefix = event.get("bucket", {}).get("requests") + requests_key = event.get("object", {}).get("requests") + + download_begin = datetime.datetime.now() + req_path = os.path.join("/tmp", f"{uuid.uuid4()}-{os.path.basename(requests_key)}") + client.download(bucket, os.path.join(requests_prefix, requests_key), req_path) + download_end = datetime.datetime.now() + + model_download_time, model_process_time = _load_model(bucket, model_prefix) + + with open(req_path, "r") as f: + payloads = [json.loads(line) for line in f if line.strip()] + os.remove(req_path) + + inference_begin = datetime.datetime.now() + user_ids, item_ids, category_ids, dense = _prepare_batch(payloads) + + with torch.no_grad(): + scores = _model(user_ids, item_ids, category_ids, dense).squeeze(-1).tolist() + inference_end = datetime.datetime.now() + + predictions = [] + for req, score in zip(payloads, scores): + predictions.append( + { + "user_id": req["user_id"], + "item_id": req["item_id"], + "category_id": req["category_id"], + "score": score, + "device": str(_device), + } + ) + + download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) + compute_time = (inference_end - inference_begin) / datetime.timedelta(microseconds=1) + + return { + "result": {"predictions": predictions}, + "measurement": { + "download_time": download_time + model_download_time, + "compute_time": compute_time + model_process_time, + "model_time": model_process_time, + "model_download_time": model_download_time, + }, + } diff --git a/benchmarks/400.inference/413.recommendation/python/init.sh b/benchmarks/400.inference/413.recommendation/python/init.sh new file mode 100644 index 000000000..f42329404 --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/init.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +# No additional initialization required for GPU recommendation benchmark. diff --git a/benchmarks/400.inference/413.recommendation/python/package.sh b/benchmarks/400.inference/413.recommendation/python/package.sh new file mode 100644 index 000000000..64e9deacb --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/package.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +PACKAGE_DIR=$1 +echo "DLRM GPU package size $(du -sh $1 | cut -f1)" diff --git a/benchmarks/400.inference/413.recommendation/python/requirements.txt b/benchmarks/400.inference/413.recommendation/python/requirements.txt new file mode 100644 index 000000000..c5ddafe5b --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/requirements.txt @@ -0,0 +1 @@ +torch==2.2.2 diff --git a/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.10 b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.10 new file mode 100644 index 000000000..c5ddafe5b --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.10 @@ -0,0 +1 @@ +torch==2.2.2 diff --git a/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.11 b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.11 new file mode 100644 index 000000000..c5ddafe5b --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.11 @@ -0,0 +1 @@ +torch==2.2.2 diff --git a/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.8 b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.8 new file mode 100644 index 000000000..c5ddafe5b --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.8 @@ -0,0 +1 @@ +torch==2.2.2 diff --git a/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.9 b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.9 new file mode 100644 index 000000000..c5ddafe5b --- /dev/null +++ b/benchmarks/400.inference/413.recommendation/python/requirements.txt.3.9 @@ -0,0 +1 @@ +torch==2.2.2 diff --git a/benchmarks/500.scientific/501.graph-pagerank/input.py b/benchmarks/500.scientific/501.graph-pagerank/input.py index e20a6dcd1..a4ab10fb8 100644 --- a/benchmarks/500.scientific/501.graph-pagerank/input.py +++ b/benchmarks/500.scientific/501.graph-pagerank/input.py @@ -1,8 +1,7 @@ -size_generators = { - 'test' : 10, - 'small' : 10000, - 'large': 100000 -} +size_generators = {"test": 10, "small": 10000, "large": 100000} -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - return { 'size': size_generators[size], 'seed': 42} + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/500.scientific/501.graph-pagerank/python/function.py b/benchmarks/500.scientific/501.graph-pagerank/python/function.py index 0e462e9b4..461fc14a9 100755 --- a/benchmarks/500.scientific/501.graph-pagerank/python/function.py +++ b/benchmarks/500.scientific/501.graph-pagerank/python/function.py @@ -1,9 +1,10 @@ import datetime import igraph + def handler(event): - size = event.get('size') + size = event.get("size") if "seed" in event: import random @@ -17,13 +18,15 @@ def handler(event): result = graph.pagerank() process_end = datetime.datetime.now() - graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta(microseconds=1) + graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta( + microseconds=1 + ) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': result[0], - 'measurement': { - 'graph_generating_time': graph_generating_time, - 'compute_time': process_time - } + "result": result[0], + "measurement": { + "graph_generating_time": graph_generating_time, + "compute_time": process_time, + }, } diff --git a/benchmarks/500.scientific/502.graph-mst/input.py b/benchmarks/500.scientific/502.graph-mst/input.py index e20a6dcd1..a4ab10fb8 100644 --- a/benchmarks/500.scientific/502.graph-mst/input.py +++ b/benchmarks/500.scientific/502.graph-mst/input.py @@ -1,8 +1,7 @@ -size_generators = { - 'test' : 10, - 'small' : 10000, - 'large': 100000 -} +size_generators = {"test": 10, "small": 10000, "large": 100000} -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - return { 'size': size_generators[size], 'seed': 42} + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/500.scientific/502.graph-mst/python/function.py b/benchmarks/500.scientific/502.graph-mst/python/function.py index b63fbdce2..69ad77678 100755 --- a/benchmarks/500.scientific/502.graph-mst/python/function.py +++ b/benchmarks/500.scientific/502.graph-mst/python/function.py @@ -1,9 +1,10 @@ import datetime import igraph + def handler(event): - size = event.get('size') + size = event.get("size") if "seed" in event: import random @@ -17,13 +18,15 @@ def handler(event): result = graph.spanning_tree(None, False) process_end = datetime.datetime.now() - graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta(microseconds=1) + graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta( + microseconds=1 + ) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': result[0], - 'measurement': { - 'graph_generating_time': graph_generating_time, - 'compute_time': process_time - } + "result": result[0], + "measurement": { + "graph_generating_time": graph_generating_time, + "compute_time": process_time, + }, } diff --git a/benchmarks/500.scientific/503.graph-bfs/input.py b/benchmarks/500.scientific/503.graph-bfs/input.py index e20a6dcd1..a4ab10fb8 100644 --- a/benchmarks/500.scientific/503.graph-bfs/input.py +++ b/benchmarks/500.scientific/503.graph-bfs/input.py @@ -1,8 +1,7 @@ -size_generators = { - 'test' : 10, - 'small' : 10000, - 'large': 100000 -} +size_generators = {"test": 10, "small": 10000, "large": 100000} -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - return { 'size': size_generators[size], 'seed': 42} + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/500.scientific/503.graph-bfs/python/function.py b/benchmarks/500.scientific/503.graph-bfs/python/function.py index 18423ae1a..51a37346b 100755 --- a/benchmarks/500.scientific/503.graph-bfs/python/function.py +++ b/benchmarks/500.scientific/503.graph-bfs/python/function.py @@ -1,9 +1,10 @@ import datetime import igraph + def handler(event): - size = event.get('size') + size = event.get("size") if "seed" in event: import random @@ -17,13 +18,15 @@ def handler(event): result = graph.bfs(0) process_end = datetime.datetime.now() - graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta(microseconds=1) + graph_generating_time = (graph_generating_end - graph_generating_begin) / datetime.timedelta( + microseconds=1 + ) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': result, - 'measurement': { - 'graph_generating_time': graph_generating_time, - 'compute_time': process_time - } + "result": result, + "measurement": { + "graph_generating_time": graph_generating_time, + "compute_time": process_time, + }, } diff --git a/benchmarks/500.scientific/504.dna-visualisation/input.py b/benchmarks/500.scientific/504.dna-visualisation/input.py index a9f376ea2..ea26f48c0 100644 --- a/benchmarks/500.scientific/504.dna-visualisation/input.py +++ b/benchmarks/500.scientific/504.dna-visualisation/input.py @@ -1,16 +1,21 @@ -import glob, os +import glob +import os + def buckets_count(): return (1, 1) -def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func): - for file in glob.glob(os.path.join(data_dir, '*.fasta')): +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + + for file in glob.glob(os.path.join(data_dir, "*.fasta")): data = os.path.relpath(file, data_dir) upload_func(0, data, file) - input_config = {'object': {}, 'bucket': {}} - input_config['object']['key'] = data - input_config['bucket']['bucket'] = benchmarks_bucket - input_config['bucket']['input'] = input_paths[0] - input_config['bucket']['output'] = output_paths[0] + input_config = {"object": {}, "bucket": {}} + input_config["object"]["key"] = data + input_config["bucket"]["bucket"] = benchmarks_bucket + input_config["bucket"]["input"] = input_paths[0] + input_config["bucket"]["output"] = output_paths[0] return input_config diff --git a/benchmarks/500.scientific/504.dna-visualisation/python/function.py b/benchmarks/500.scientific/504.dna-visualisation/python/function.py index 8362a73a1..ca9f5975e 100755 --- a/benchmarks/500.scientific/504.dna-visualisation/python/function.py +++ b/benchmarks/500.scientific/504.dna-visualisation/python/function.py @@ -1,17 +1,23 @@ -import datetime, io, json, os +import datetime +import io +import json +import os + # using https://squiggle.readthedocs.io/en/latest/ from squiggle import transform from . import storage + client = storage.storage.get_instance() + def handler(event): - bucket = event.get('bucket').get('bucket') - input_prefix = event.get('bucket').get('input') - output_prefix = event.get('bucket').get('output') - key = event.get('object').get('key') - download_path = '/tmp/{}'.format(key) + bucket = event.get("bucket").get("bucket") + input_prefix = event.get("bucket").get("input") + output_prefix = event.get("bucket").get("output") + key = event.get("object").get("key") + download_path = "/tmp/{}".format(key) download_begin = datetime.datetime.now() client.download(bucket, os.path.join(input_prefix, key), download_path) @@ -34,13 +40,10 @@ def handler(event): process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { - 'result': { - 'bucket': bucket, - 'key': key_name - }, - 'measurement': { - 'download_time': download_time, - 'compute_time': process_time, - 'upload_time': process_time - } + "result": {"bucket": bucket, "key": key_name}, + "measurement": { + "download_time": download_time, + "compute_time": process_time, + "upload_time": upload_time, + }, } diff --git a/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/config.json b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/config.json new file mode 100644 index 000000000..ff297ac5b --- /dev/null +++ b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 2048, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/input.py b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/input.py new file mode 100644 index 000000000..bb53694c9 --- /dev/null +++ b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/input.py @@ -0,0 +1,17 @@ +size_generators = { + "test": {"ny": 61, "nx": 61, "nit": 5, "rho": 1.0, "nu": 0.1, "F": 1.0}, + "small": {"ny": 121, "nx": 121, "nit": 10, "rho": 1.0, "nu": 0.1, "F": 1.0}, + "large": {"ny": 201, "nx": 201, "nit": 20, "rho": 1.0, "nu": 0.1, "F": 1.0}, +} + + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_paths, + output_paths, + upload_func, + nosql_func, +): + return {"size": size_generators[size]} diff --git a/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/function.py b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/function.py new file mode 100644 index 000000000..5788880b2 --- /dev/null +++ b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/function.py @@ -0,0 +1,279 @@ +# Barba, Lorena A., and Forsyth, Gilbert F. (2018). +# CFD Python: the 12 steps to Navier-Stokes equations. +# Journal of Open Source Education, 1(9), 21, +# https://doi.org/10.21105/jose.00021 +# TODO: License +# (c) 2017 Lorena A. Barba, Gilbert F. Forsyth. +# All content is under Creative Commons Attribution CC-BY 4.0, +# and all code is under BSD-3 clause (previously under MIT, and changed on March 8, 2018). + +import datetime + +import jax.numpy as jnp +import jax +from jax import lax +from functools import partial + + +@partial(jax.jit, static_argnums=(0,)) +def build_up_b(rho, dt, dx, dy, u, v): + b = jnp.zeros_like(u) + b = b.at[1:-1, 1:-1].set( + ( + rho + * ( + 1 + / dt + * ( + (u[1:-1, 2:] - u[1:-1, 0:-2]) / (2 * dx) + + (v[2:, 1:-1] - v[0:-2, 1:-1]) / (2 * dy) + ) + - ((u[1:-1, 2:] - u[1:-1, 0:-2]) / (2 * dx)) ** 2 + - 2 + * ( + (u[2:, 1:-1] - u[0:-2, 1:-1]) + / (2 * dy) + * (v[1:-1, 2:] - v[1:-1, 0:-2]) + / (2 * dx) + ) + - ((v[2:, 1:-1] - v[0:-2, 1:-1]) / (2 * dy)) ** 2 + ) + ) + ) + + # Periodic BC Pressure @ x = 2 + b = b.at[1:-1, -1].set( + ( + rho + * ( + 1 + / dt + * ((u[1:-1, 0] - u[1:-1, -2]) / (2 * dx) + (v[2:, -1] - v[0:-2, -1]) / (2 * dy)) + - ((u[1:-1, 0] - u[1:-1, -2]) / (2 * dx)) ** 2 + - 2 * ((u[2:, -1] - u[0:-2, -1]) / (2 * dy) * (v[1:-1, 0] - v[1:-1, -2]) / (2 * dx)) + - ((v[2:, -1] - v[0:-2, -1]) / (2 * dy)) ** 2 + ) + ) + ) + + # Periodic BC Pressure @ x = 0 + b = b.at[1:-1, 0].set( + ( + rho + * ( + 1 + / dt + * ((u[1:-1, 1] - u[1:-1, -1]) / (2 * dx) + (v[2:, 0] - v[0:-2, 0]) / (2 * dy)) + - ((u[1:-1, 1] - u[1:-1, -1]) / (2 * dx)) ** 2 + - 2 * ((u[2:, 0] - u[0:-2, 0]) / (2 * dy) * (v[1:-1, 1] - v[1:-1, -1]) / (2 * dx)) + - ((v[2:, 0] - v[0:-2, 0]) / (2 * dy)) ** 2 + ) + ) + ) + + return b + + +@partial(jax.jit, static_argnums=(0,)) +def pressure_poisson_periodic(nit, p, dx, dy, b): + def body_func(p, q): + pn = p.copy() + p = p.at[1:-1, 1:-1].set( + ((pn[1:-1, 2:] + pn[1:-1, 0:-2]) * dy**2 + (pn[2:, 1:-1] + pn[0:-2, 1:-1]) * dx**2) + / (2 * (dx**2 + dy**2)) + - dx**2 * dy**2 / (2 * (dx**2 + dy**2)) * b[1:-1, 1:-1] + ) + + # Periodic BC Pressure @ x = 2 + p = p.at[1:-1, -1].set( + ((pn[1:-1, 0] + pn[1:-1, -2]) * dy**2 + (pn[2:, -1] + pn[0:-2, -1]) * dx**2) + / (2 * (dx**2 + dy**2)) + - dx**2 * dy**2 / (2 * (dx**2 + dy**2)) * b[1:-1, -1] + ) + + # Periodic BC Pressure @ x = 0 + p = p.at[1:-1, 0].set( + ( + ((pn[1:-1, 1] + pn[1:-1, -1]) * dy**2 + (pn[2:, 0] + pn[0:-2, 0]) * dx**2) + / (2 * (dx**2 + dy**2)) + - dx**2 * dy**2 / (2 * (dx**2 + dy**2)) * b[1:-1, 0] + ) + ) + + # Wall boundary conditions, pressure + p = p.at[-1, :].set(p[-2, :]) # dp/dy = 0 at y = 2 + p = p.at[0, :].set(p[1, :]) # dp/dy = 0 at y = 0 + + return p, None + + p, _ = lax.scan(body_func, p, jnp.arange(nit)) + + +@partial(jax.jit, static_argnums=(0, 7, 8, 9)) +def channel_flow(nit, u, v, dt, dx, dy, p, rho, nu, F): + udiff = 1 + stepcount = 0 + + array_vals = (udiff, stepcount, u, v, p) + + def conf_func(array_vals): + udiff, _, _, _, _ = array_vals + return udiff > 0.001 + + def body_func(array_vals): + _, stepcount, u, v, p = array_vals + + un = u.copy() + vn = v.copy() + + b = build_up_b(rho, dt, dx, dy, u, v) + pressure_poisson_periodic(nit, p, dx, dy, b) + + u = u.at[1:-1, 1:-1].set( + un[1:-1, 1:-1] + - un[1:-1, 1:-1] * dt / dx * (un[1:-1, 1:-1] - un[1:-1, 0:-2]) + - vn[1:-1, 1:-1] * dt / dy * (un[1:-1, 1:-1] - un[0:-2, 1:-1]) + - dt / (2 * rho * dx) * (p[1:-1, 2:] - p[1:-1, 0:-2]) + + nu + * ( + dt / dx**2 * (un[1:-1, 2:] - 2 * un[1:-1, 1:-1] + un[1:-1, 0:-2]) + + dt / dy**2 * (un[2:, 1:-1] - 2 * un[1:-1, 1:-1] + un[0:-2, 1:-1]) + ) + + F * dt + ) + + v = v.at[1:-1, 1:-1].set( + vn[1:-1, 1:-1] + - un[1:-1, 1:-1] * dt / dx * (vn[1:-1, 1:-1] - vn[1:-1, 0:-2]) + - vn[1:-1, 1:-1] * dt / dy * (vn[1:-1, 1:-1] - vn[0:-2, 1:-1]) + - dt / (2 * rho * dy) * (p[2:, 1:-1] - p[0:-2, 1:-1]) + + nu + * ( + dt / dx**2 * (vn[1:-1, 2:] - 2 * vn[1:-1, 1:-1] + vn[1:-1, 0:-2]) + + dt / dy**2 * (vn[2:, 1:-1] - 2 * vn[1:-1, 1:-1] + vn[0:-2, 1:-1]) + ) + ) + + # Periodic BC u @ x = 2 + u = u.at[1:-1, -1].set( + un[1:-1, -1] + - un[1:-1, -1] * dt / dx * (un[1:-1, -1] - un[1:-1, -2]) + - vn[1:-1, -1] * dt / dy * (un[1:-1, -1] - un[0:-2, -1]) + - dt / (2 * rho * dx) * (p[1:-1, 0] - p[1:-1, -2]) + + nu + * ( + dt / dx**2 * (un[1:-1, 0] - 2 * un[1:-1, -1] + un[1:-1, -2]) + + dt / dy**2 * (un[2:, -1] - 2 * un[1:-1, -1] + un[0:-2, -1]) + ) + + F * dt + ) + + # Periodic BC u @ x = 0 + u = u.at[1:-1, 0].set( + un[1:-1, 0] + - un[1:-1, 0] * dt / dx * (un[1:-1, 0] - un[1:-1, -1]) + - vn[1:-1, 0] * dt / dy * (un[1:-1, 0] - un[0:-2, 0]) + - dt / (2 * rho * dx) * (p[1:-1, 1] - p[1:-1, -1]) + + nu + * ( + dt / dx**2 * (un[1:-1, 1] - 2 * un[1:-1, 0] + un[1:-1, -1]) + + dt / dy**2 * (un[2:, 0] - 2 * un[1:-1, 0] + un[0:-2, 0]) + ) + + F * dt + ) + + # Periodic BC v @ x = 2 + v = v.at[1:-1, -1].set( + vn[1:-1, -1] + - un[1:-1, -1] * dt / dx * (vn[1:-1, -1] - vn[1:-1, -2]) + - vn[1:-1, -1] * dt / dy * (vn[1:-1, -1] - vn[0:-2, -1]) + - dt / (2 * rho * dy) * (p[2:, -1] - p[0:-2, -1]) + + nu + * ( + dt / dx**2 * (vn[1:-1, 0] - 2 * vn[1:-1, -1] + vn[1:-1, -2]) + + dt / dy**2 * (vn[2:, -1] - 2 * vn[1:-1, -1] + vn[0:-2, -1]) + ) + ) + + # Periodic BC v @ x = 0 + v = v.at[1:-1, 0].set( + vn[1:-1, 0] + - un[1:-1, 0] * dt / dx * (vn[1:-1, 0] - vn[1:-1, -1]) + - vn[1:-1, 0] * dt / dy * (vn[1:-1, 0] - vn[0:-2, 0]) + - dt / (2 * rho * dy) * (p[2:, 0] - p[0:-2, 0]) + + nu + * ( + dt / dx**2 * (vn[1:-1, 1] - 2 * vn[1:-1, 0] + vn[1:-1, -1]) + + dt / dy**2 * (vn[2:, 0] - 2 * vn[1:-1, 0] + vn[0:-2, 0]) + ) + ) + + # Wall BC: u,v = 0 @ y = 0,2 + u = u.at[0, :].set(0) + u = u.at[-1, :].set(0) + v = v.at[0, :].set(0) + v = v.at[-1, :].set(0) + + udiff = (jnp.sum(u) - jnp.sum(un)) / jnp.sum(u) + stepcount += 1 + + return (udiff, stepcount, u, v, p) + + _, stepcount, _, _, _ = lax.while_loop(conf_func, body_func, array_vals) + + return stepcount + + +def initialize(ny, nx): + u = jnp.zeros((ny, nx), dtype=jnp.float64) + v = jnp.zeros((ny, nx), dtype=jnp.float64) + p = jnp.ones((ny, nx), dtype=jnp.float64) + dx = 2 / (nx - 1) + dy = 2 / (ny - 1) + dt = 0.1 / ((nx - 1) * (ny - 1)) + return u, v, p, dx, dy, dt + + +def handler(event): + + if "size" in event: + size = event["size"] + ny = size["ny"] + nx = size["nx"] + nit = size["nit"] + rho = size["rho"] + nu = size["nu"] + F = size["F"] + + generate_begin = datetime.datetime.now() + + u, v, p, dx, dy, dt = initialize(ny, nx) + + generate_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + + results = channel_flow(nit, u, v, dt, dx, dy, p, rho, nu, F) + + process_end = datetime.datetime.now() + + # y_re_im = jnp.stack([jnp.real(result), jnp.imag(result)], axis=-1).tolist() + + process_time = (process_end - process_begin) / datetime.timedelta(milliseconds=1) + generate_time = (generate_end - generate_begin) / datetime.timedelta(milliseconds=1) + + try: + results = jax.device_get(results) + except Exception: + pass + + if hasattr(results, "item"): + results = results.item() + elif hasattr(results, "tolist"): + results = results.tolist() + + return { + "size": size, + "result": results, + "measurement": {"compute_time": process_time, "generate_time": generate_time}, + } diff --git a/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/requirements.txt b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/requirements.txt new file mode 100644 index 000000000..f31e1afe0 --- /dev/null +++ b/benchmarks/500.scientific/5xx.channel_flow_jax_npbench/python/requirements.txt @@ -0,0 +1 @@ +jax[cuda12] \ No newline at end of file diff --git a/benchmarks/500.scientific/5xx.compute_jax_npbench/config.json b/benchmarks/500.scientific/5xx.compute_jax_npbench/config.json new file mode 100644 index 000000000..ff297ac5b --- /dev/null +++ b/benchmarks/500.scientific/5xx.compute_jax_npbench/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 2048, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/500.scientific/5xx.compute_jax_npbench/input.py b/benchmarks/500.scientific/5xx.compute_jax_npbench/input.py new file mode 100644 index 000000000..56f136720 --- /dev/null +++ b/benchmarks/500.scientific/5xx.compute_jax_npbench/input.py @@ -0,0 +1,17 @@ +size_generators = { + "test": {"M": 2000, "N": 2000}, + "small": {"M": 5000, "N": 5000}, + "large": {"M": 16000, "N": 16000}, +} + + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_paths, + output_paths, + upload_func, + nosql_func, +): + return {"size": size_generators[size]} diff --git a/benchmarks/500.scientific/5xx.compute_jax_npbench/python/function.py b/benchmarks/500.scientific/5xx.compute_jax_npbench/python/function.py new file mode 100644 index 000000000..2e16b320d --- /dev/null +++ b/benchmarks/500.scientific/5xx.compute_jax_npbench/python/function.py @@ -0,0 +1,62 @@ +import datetime + +import jax.numpy as jnp +import jax + + +@jax.jit +def compute(array_1, array_2, a, b, c): + return jnp.clip(array_1, 2, 10) * a + array_2 * b + c + + +def initialize(M, N): + from numpy.random import default_rng + + rng = default_rng(42) + array_1 = rng.uniform(0, 1000, size=(M, N)).astype(jnp.int64) + array_2 = rng.uniform(0, 1000, size=(M, N)).astype(jnp.int64) + a = jnp.int64(4) + b = jnp.int64(3) + c = jnp.int64(9) + return array_1, array_2, a, b, c + + +def handler(event): + + if "size" in event: + size = event["size"] + M = size["M"] + N = size["N"] + + generate_begin = datetime.datetime.now() + + array_1, array_2, a, b, c = initialize(M, N) + + generate_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + + results = compute(array_1, array_2, a, b, c) + + process_end = datetime.datetime.now() + + # y_re_im = jnp.stack([jnp.real(result), jnp.imag(result)], axis=-1).tolist() + + process_time = (process_end - process_begin) / datetime.timedelta(milliseconds=1) + generate_time = (generate_end - generate_begin) / datetime.timedelta(milliseconds=1) + + try: + results = jax.device_get(results) + except Exception: + pass + + if getattr(results, "ndim", 0) == 0 or getattr(results, "size", 0) == 1: + results = results.item() + else: + results = results.tolist() + + return { + "size": size, + "result": results, + "measurement": {"compute_time": process_time, "generate_time": generate_time}, + } diff --git a/benchmarks/500.scientific/5xx.compute_jax_npbench/python/requirements.txt b/benchmarks/500.scientific/5xx.compute_jax_npbench/python/requirements.txt new file mode 100644 index 000000000..f31e1afe0 --- /dev/null +++ b/benchmarks/500.scientific/5xx.compute_jax_npbench/python/requirements.txt @@ -0,0 +1 @@ +jax[cuda12] \ No newline at end of file diff --git a/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/config.json b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/config.json new file mode 100644 index 000000000..ff297ac5b --- /dev/null +++ b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 2048, + "languages": ["python"], + "modules": ["storage"] +} diff --git a/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/input.py b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/input.py new file mode 100644 index 000000000..937e96e44 --- /dev/null +++ b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/input.py @@ -0,0 +1,17 @@ +size_generators = { + "test": {"N": 8, "W": 14, "H": 14, "C1": 32, "C2": 8}, + "small": {"N": 8, "W": 28, "H": 28, "C1": 64, "C2": 16}, + "large": {"N": 8, "W": 56, "H": 56, "C1": 128, "C2": 32}, +} + + +def generate_input( + data_dir, + size, + benchmarks_bucket, + input_paths, + output_paths, + upload_func, + nosql_func, +): + return {"size": size_generators[size]} diff --git a/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/function.py b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/function.py new file mode 100644 index 000000000..f24b2cc71 --- /dev/null +++ b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/function.py @@ -0,0 +1,123 @@ +import datetime + +import jax.numpy as jnp +import jax +from jax import lax + + +@jax.jit +def relu(x): + return jnp.maximum(x, 0) + + +# Deep learning convolutional operator (stride = 1) +@jax.jit +def conv2d(input, weights): + K = weights.shape[0] # Assuming square kernel + N = input.shape[0] + H_out = input.shape[1] - K + 1 + W_out = input.shape[2] - K + 1 + C_out = weights.shape[3] + output = jnp.empty((N, H_out, W_out, C_out), dtype=jnp.float32) + + def row_update(output, i): + def col_update(output, j): + input_slice = lax.dynamic_slice(input, (0, i, j, 0), (N, K, K, input.shape[-1])) + conv_result = jnp.sum( + input_slice[:, :, :, :, None] * weights[None, :, :, :], axis=(1, 2, 3) + ) + output = lax.dynamic_update_slice(output, conv_result[:, None, None, :], (0, i, j, 0)) + return output, None + + output, _ = lax.scan(col_update, output, jnp.arange(W_out)) + return output, None + + output, _ = lax.scan(row_update, output, jnp.arange(H_out)) + return output + + +# Batch normalization operator, as used in ResNet +@jax.jit +def batchnorm2d(x, eps=1e-5): + mean = jnp.mean(x, axis=0, keepdims=True) + std = jnp.std(x, axis=0, keepdims=True) + return (x - mean) / jnp.sqrt(std + eps) + + +# Bottleneck residual block (after initial convolution, without downsampling) +# in the ResNet-50 CNN (inference) +@jax.jit +def resnet_basicblock(input, conv1, conv2, conv3): + # Pad output of first convolution for second convolution + padded = jnp.zeros( + (input.shape[0], input.shape[1] + 2, input.shape[2] + 2, conv1.shape[3]), + dtype=jnp.float32, + ) + padded = lax.dynamic_update_slice(padded, conv2d(input, conv1), (0, 1, 1, 0)) + x = batchnorm2d(padded) + x = relu(x) + + x = conv2d(x, conv2) + x = batchnorm2d(x) + x = relu(x) + x = conv2d(x, conv3) + x = batchnorm2d(x) + return relu(x + input) + + +def initialize(N, W, H, C1, C2): + from numpy.random import default_rng + + rng = default_rng(42) + + # Input + input = rng.random((N, H, W, C1), dtype=jnp.float32) + # Weights + conv1 = rng.random((1, 1, C1, C2), dtype=jnp.float32) + conv2 = rng.random((3, 3, C2, C2), dtype=jnp.float32) + conv3 = rng.random((1, 1, C2, C1), dtype=jnp.float32) + return (input, conv1, conv2, conv3) + + +def handler(event): + + if "size" in event: + size = event["size"] + N = size["N"] + W = size["W"] + H = size["H"] + C1 = size["C1"] + C2 = size["C2"] + + generate_begin = datetime.datetime.now() + + input, conv1, conv2, conv3 = initialize(N, W, H, C1, C2) + + generate_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + + results = resnet_basicblock(input, conv1, conv2, conv3) + + process_end = datetime.datetime.now() + + # y_re_im = jnp.stack([jnp.real(result), jnp.imag(result)], axis=-1).tolist() + + process_time = (process_end - process_begin) / datetime.timedelta(milliseconds=1) + generate_time = (generate_end - generate_begin) / datetime.timedelta(milliseconds=1) + + try: + results = jax.device_get(results) + except Exception: + pass + + if getattr(results, "ndim", 0) == 0 or getattr(results, "size", 0) == 1: + results = results.item() + else: + results = results.tolist() + + return { + "size": size, + "result": results, + "measurement": {"compute_time": process_time, "generate_time": generate_time}, + } diff --git a/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/requirements.txt b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/requirements.txt new file mode 100644 index 000000000..f31e1afe0 --- /dev/null +++ b/benchmarks/500.scientific/5xx.deep_learning_resnet_jax_npbench/python/requirements.txt @@ -0,0 +1 @@ +jax[cuda12] \ No newline at end of file diff --git a/benchmarks/600.linearalgebra/601.matmul/config.json b/benchmarks/600.linearalgebra/601.matmul/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/601.matmul/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/601.matmul/input.py b/benchmarks/600.linearalgebra/601.matmul/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/601.matmul/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/601.matmul/python/function.py b/benchmarks/600.linearalgebra/601.matmul/python/function.py new file mode 100755 index 000000000..ee88b2e58 --- /dev/null +++ b/benchmarks/600.linearalgebra/601.matmul/python/function.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python3 +import torch +import datetime + + +def initialize_torch(NI, NJ, NK, dtype=torch.float32, device="cuda"): + alpha = torch.tensor(1.5, dtype=dtype, device=device) + beta = torch.tensor(1.2, dtype=dtype, device=device) + i = torch.arange(NI, device=device) + j = torch.arange(NJ, device=device) + k = torch.arange(NK, device=device) + C = ((i[:, None] * j[None, :] + 1) % NI).to(dtype) / NI + A = ((i[:, None] * (k[None, :] + 1)) % NK).to(dtype) / NK + B = ((k[:, None] * (j[None, :] + 2)) % NJ).to(dtype) / NJ + return alpha, beta, C, A, B + + +def kernel_gemm(alpha, beta, C, A, B, reps=1): + torch.cuda.synchronize() + _ = alpha * (A @ B) + beta * C # warmup + torch.cuda.synchronize() + start = torch.cuda.Event(enable_timing=True) + end = torch.cuda.Event(enable_timing=True) + start.record() + for _ in range(reps): + C = alpha * (A @ B) + beta * C + end.record() + torch.cuda.synchronize() + return C, float(start.elapsed_time(end)) # ms for all reps + + +def handler(event): + + size = event.get("size") + if "seed" in event: + import random + + random.seed(event["seed"]) + + seed = event.get("seed", 42) + seed = int(seed) + + matrix_generating_begin = datetime.datetime.now() + alpha, beta, C, A, B = initialize_torch(size, size, size, dtype=torch.float32, device="cuda") + matrix_generating_end = datetime.datetime.now() + + matmul_begin = datetime.datetime.now() + C_out, gpu_ms = kernel_gemm(alpha, beta, C, A, B, reps=1) + matmul_end = datetime.datetime.now() + + matrix_generating_time = (matrix_generating_end - matrix_generating_begin) / datetime.timedelta( + microseconds=1 + ) + matmul_time = (matmul_end - matmul_begin) / datetime.timedelta(microseconds=1) + + return { + # "result": result[0], + "measurement": { + "generating_time": matrix_generating_time, + "compute_time": matmul_time, + }, + } diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/601.matmul/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/config.json b/benchmarks/600.linearalgebra/602.axpy/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/602.axpy/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/602.axpy/input.py b/benchmarks/600.linearalgebra/602.axpy/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/602.axpy/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/602.axpy/python/function.py b/benchmarks/600.linearalgebra/602.axpy/python/function.py new file mode 100755 index 000000000..79117fa1b --- /dev/null +++ b/benchmarks/600.linearalgebra/602.axpy/python/function.py @@ -0,0 +1,59 @@ +#!/usr/bin/env python3 +import torch +import datetime + + +def initialize_torch(N, dtype=torch.float32, device="cuda", seed=42): + if seed is not None: + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + alpha = torch.randn((), dtype=dtype, device=device) + x = torch.randn(N, dtype=dtype, device=device) + y = torch.randn(N, dtype=dtype, device=device) + return alpha, x, y + + +def kernel_axpy(alpha, x, y, reps=100): + torch.cuda.synchronize() + _ = alpha * x + y # warmup + torch.cuda.synchronize() + + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() + for _ in range(reps): + y = alpha * x + y + end_evt.record() + torch.cuda.synchronize() + gpu_ms = float(start_evt.elapsed_time(end_evt)) + return y, gpu_ms + + +def handler(event): + size = event.get("size") + if "seed" in event: + import random + + random.seed(event["seed"]) + + seed = event.get("seed", 42) + seed = int(seed) + + gen_begin = datetime.datetime.now() + alpha, x, y = initialize_torch(size, dtype=torch.float32, device="cuda", seed=seed) + gen_end = datetime.datetime.now() + + comp_begin = datetime.datetime.now() + y_out, gpu_ms = kernel_axpy(alpha, x, y, reps=100) + comp_end = datetime.datetime.now() + + gen_us = (gen_end - gen_begin) / datetime.timedelta(microseconds=1) + comp_us = (comp_end - comp_begin) / datetime.timedelta(microseconds=1) + + return { + "measurement": { + "generating_time": gen_us, + "compute_time": comp_us, + "gpu_time": gpu_ms, + } + } diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/602.axpy/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/config.json b/benchmarks/600.linearalgebra/603.jacobi2d/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/603.jacobi2d/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/input.py b/benchmarks/600.linearalgebra/603.jacobi2d/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/603.jacobi2d/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py b/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py new file mode 100755 index 000000000..4dc37e2c6 --- /dev/null +++ b/benchmarks/600.linearalgebra/603.jacobi2d/python/function.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +import torch +import datetime + + +def initialize_torch(N, dtype=torch.float32, device="cuda"): + i = torch.arange(N, device=device, dtype=dtype).view(-1, 1) + j = torch.arange(N, device=device, dtype=dtype).view(1, -1) + + A = i * (j + 2) / N + B = i * (j + 3) / N + return A, B + + +def kernel_jacobi2d(A, B, iters=50): + torch.cuda.synchronize() + # warmup + if A.shape[0] > 2 and A.shape[1] > 2: + B_inner = 0.2 * (A[1:-1, 1:-1] + A[1:-1, :-2] + A[1:-1, 2:] + A[2:, 1:-1] + A[:-2, 1:-1]) + B[1:-1, 1:-1].copy_(B_inner) + + A_inner = 0.2 * (B[1:-1, 1:-1] + B[1:-1, :-2] + B[1:-1, 2:] + B[2:, 1:-1] + B[:-2, 1:-1]) + A[1:-1, 1:-1].copy_(A_inner) + torch.cuda.synchronize() + + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() + for _ in range(iters): + B_inner = 0.2 * (A[1:-1, 1:-1] + A[1:-1, :-2] + A[1:-1, 2:] + A[2:, 1:-1] + A[:-2, 1:-1]) + B[1:-1, 1:-1].copy_(B_inner) + + A_inner = 0.2 * (B[1:-1, 1:-1] + B[1:-1, :-2] + B[1:-1, 2:] + B[2:, 1:-1] + B[:-2, 1:-1]) + A[1:-1, 1:-1].copy_(A_inner) + end_evt.record() + torch.cuda.synchronize() + gpu_ms = float(start_evt.elapsed_time(end_evt)) + return A, B, gpu_ms + + +def handler(event): + + size = event.get("size") + if "seed" in event: + import random + + random.seed(event["seed"]) + + seed = event.get("seed", 42) + seed = int(seed) + + matrix_generating_begin = datetime.datetime.now() + A, B = initialize_torch(size, dtype=torch.float32, device="cuda") + matrix_generating_end = datetime.datetime.now() + + matmul_begin = datetime.datetime.now() + A_out, B_out, gpu_ms = kernel_jacobi2d(A, B, iters=50) + matmul_end = datetime.datetime.now() + + matrix_generating_time = (matrix_generating_end - matrix_generating_begin) / datetime.timedelta( + microseconds=1 + ) + matmul_time = (matmul_end - matmul_begin) / datetime.timedelta(microseconds=1) + + return { + # "result": result[0], + "measurement": { + "generating_time": matrix_generating_time, + "compute_time": matmul_time, + "gpu_time": gpu_ms, + }, + } diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/603.jacobi2d/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/config.json b/benchmarks/600.linearalgebra/604.cholesky/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/604.cholesky/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/604.cholesky/input.py b/benchmarks/600.linearalgebra/604.cholesky/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/604.cholesky/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/function.py b/benchmarks/600.linearalgebra/604.cholesky/python/function.py new file mode 100755 index 000000000..5a7ac77d5 --- /dev/null +++ b/benchmarks/600.linearalgebra/604.cholesky/python/function.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +import torch +import datetime + + +def initialize_torch(N, dtype=torch.float32, device="cuda"): + j = torch.arange(N, device=device) + v = (torch.remainder(-j, N).to(dtype) / N) + 1 + + L = v.expand(N, -1).clone() + L = torch.tril(L) + L.fill_diagonal_(1.0) + + A = L @ L.transpose(-1, -2) + return A + + +def kernel_cholesky(A): + torch.cuda.synchronize() + _ = torch.linalg.cholesky(A) # warmup + torch.cuda.synchronize() + + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() + for _ in range(A.size(0)): + L = torch.linalg.cholesky(A) + end_evt.record() + torch.cuda.synchronize() + gpu_ms = float(start_evt.elapsed_time(end_evt)) + return L, gpu_ms + + +def handler(event): + size = event.get("size") + if "seed" in event: + import random + + random.seed(event["seed"]) + + seed = event.get("seed", 42) + seed = int(seed) + + gen_begin = datetime.datetime.now() + A = initialize_torch(size, dtype=torch.float32, device="cuda") + gen_end = datetime.datetime.now() + + comp_begin = datetime.datetime.now() + L, gpu_ms = kernel_cholesky(A) + comp_end = datetime.datetime.now() + + gen_us = (gen_end - gen_begin) / datetime.timedelta(microseconds=1) + comp_us = (comp_end - comp_begin) / datetime.timedelta(microseconds=1) + + return { + "measurement": { + "generating_time": gen_us, + "compute_time": comp_us, + "gpu_time": gpu_ms, + } + } diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/604.cholesky/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/config.json b/benchmarks/600.linearalgebra/605.lu/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/605.lu/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/605.lu/input.py b/benchmarks/600.linearalgebra/605.lu/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/605.lu/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/605.lu/python/function.py b/benchmarks/600.linearalgebra/605.lu/python/function.py new file mode 100755 index 000000000..fc99a3ab9 --- /dev/null +++ b/benchmarks/600.linearalgebra/605.lu/python/function.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python3 +import torch +import datetime + + +def initialize_torch(N, dtype=torch.float32, device="cuda"): + col = torch.arange(N, device=device) + base = (torch.remainder(-col, N).to(dtype) / N) + 1 + + A = torch.tril(base.expand(N, N)).clone() + + A.fill_diagonal_(torch.tensor(1.0, dtype=dtype, device=device)) + + A = A @ A.T + return A + + +def _kernel_lu(B: torch.Tensor) -> torch.Tensor: + n = B.shape[0] + for i in range(n): + for j in range(i): + B[i, j] = B[i, j] - (B[i, :j] @ B[:j, j]) + B[i, j] = B[i, j] / B[j, j] + for j in range(i, n): + B[i, j] = B[i, j] - (B[i, :i] @ B[:i, j]) + return B + + +def kernel(A: torch.Tensor): + torch.cuda.synchronize() + + _ = _kernel_lu(A.clone()) # Warm-up + + torch.cuda.synchronize() + + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + + start_evt.record() + B = None + for _ in range(A.size(0)): + B = _kernel_lu(A.clone()) + end_evt.record() + + torch.cuda.synchronize() + + gpu_ms = float(start_evt.elapsed_time(end_evt)) + return B, gpu_ms + + +def handler(event): + size = event.get("size") + if "seed" in event: + import random + + random.seed(event["seed"]) + + seed = event.get("seed", 42) + seed = int(seed) + + gen_begin = datetime.datetime.now() + A = initialize_torch(size, dtype=torch.float32, device="cuda") + gen_end = datetime.datetime.now() + + comp_begin = datetime.datetime.now() + B, gpu_ms = kernel(A) + comp_end = datetime.datetime.now() + + gen_us = (gen_end - gen_begin) / datetime.timedelta(microseconds=1) + comp_us = (comp_end - comp_begin) / datetime.timedelta(microseconds=1) + + return { + "measurement": { + "generating_time": gen_us, + "compute_time": comp_us, + "gpu_time": gpu_ms, + } + } diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/605.lu/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/config.json b/benchmarks/600.linearalgebra/606.spmv/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/606.spmv/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/606.spmv/input.py b/benchmarks/600.linearalgebra/606.spmv/input.py new file mode 100644 index 000000000..e0f215890 --- /dev/null +++ b/benchmarks/600.linearalgebra/606.spmv/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42, "density": 0.01} diff --git a/benchmarks/600.linearalgebra/606.spmv/python/function.py b/benchmarks/600.linearalgebra/606.spmv/python/function.py new file mode 100755 index 000000000..e2c4b0218 --- /dev/null +++ b/benchmarks/600.linearalgebra/606.spmv/python/function.py @@ -0,0 +1,71 @@ +import torch +import datetime + + +def initialize_torch(N, density=0.01, dtype=torch.float32, device="cuda", seed=42): + if seed is not None: + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + nnz = int(N * N * density) + row_indices = torch.randint(0, N, (nnz,), device=device) + col_indices = torch.randint(0, N, (nnz,), device=device) + values = torch.randn(nnz, dtype=dtype, device=device) + + indices = torch.stack([row_indices, col_indices]) + sparse_matrix = torch.sparse_coo_tensor(indices, values, (N, N), dtype=dtype, device=device) + + sparse_matrix_csr = sparse_matrix.to_sparse_csr() + + x = torch.randn(N, dtype=dtype, device=device) + + return sparse_matrix_csr, x + + +def kernel_spmv(A, x, reps=100): + torch.cuda.synchronize() + _ = torch.sparse.mm(A, x.unsqueeze(1)).squeeze() # warmup + torch.cuda.synchronize() + + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() + for _ in range(reps): + y = torch.sparse.mm(A, x.unsqueeze(1)).squeeze() + end_evt.record() + torch.cuda.synchronize() + gpu_ms = float(start_evt.elapsed_time(end_evt)) + return y, gpu_ms + + +def handler(event): + size = event.get("size") + density = event.get("density", 0.01) # default 1% density + + if "seed" in event: + import random + + random.seed(event["seed"]) + seed = event.get("seed", 42) + seed = int(seed) + else: + seed = 42 + + gen_begin = datetime.datetime.now() + A, x = initialize_torch(size, density=density, dtype=torch.float32, device="cuda", seed=seed) + gen_end = datetime.datetime.now() + + comp_begin = datetime.datetime.now() + y_out, gpu_ms = kernel_spmv(A, x, reps=100) + comp_end = datetime.datetime.now() + + gen_us = (gen_end - gen_begin) / datetime.timedelta(microseconds=1) + comp_us = (comp_end - comp_begin) / datetime.timedelta(microseconds=1) + + return { + "measurement": { + "generating_time": gen_us, + "compute_time": comp_us, + "gpu_time": gpu_ms, + } + } diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/606.spmv/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/config.json b/benchmarks/600.linearalgebra/607.fw/config.json new file mode 100644 index 000000000..e80fb4351 --- /dev/null +++ b/benchmarks/600.linearalgebra/607.fw/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 120, + "memory": 512, + "languages": ["python"], + "modules": [] +} diff --git a/benchmarks/600.linearalgebra/607.fw/input.py b/benchmarks/600.linearalgebra/607.fw/input.py new file mode 100644 index 000000000..79ff6f5cb --- /dev/null +++ b/benchmarks/600.linearalgebra/607.fw/input.py @@ -0,0 +1,7 @@ +size_generators = {"test": 10, "small": 100, "large": 1000} + + +def generate_input( + data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func, nosql_func +): + return {"size": size_generators[size], "seed": 42} diff --git a/benchmarks/600.linearalgebra/607.fw/python/function.py b/benchmarks/600.linearalgebra/607.fw/python/function.py new file mode 100755 index 000000000..bee06dd03 --- /dev/null +++ b/benchmarks/600.linearalgebra/607.fw/python/function.py @@ -0,0 +1,71 @@ +import torch +import datetime + + +def initialize_torch(N, dtype=torch.int32, device="cuda", seed=42): + if seed is not None: + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + i, j = torch.meshgrid( + torch.arange(N, device=device), torch.arange(N, device=device), indexing="ij" + ) + path = ((i * j) % 7 + 1).to(dtype) + + mask = ((i + j) % 13 == 0) | ((i + j) % 7 == 0) | ((i + j) % 11 == 0) + path = path.masked_fill(mask, torch.as_tensor(999, dtype=dtype, device=device)) + return path + + +def kernel_fw(path): + torch.cuda.synchronize() + path2 = path.clone() + n = path2.size(0) + for k in range(n): + for i in range(n): + path2[i, :] = torch.minimum(path2[i, :], path2[i, k] + path2[k, :]) # warmup + torch.cuda.synchronize() + + start_evt = torch.cuda.Event(enable_timing=True) + end_evt = torch.cuda.Event(enable_timing=True) + start_evt.record() + n = path.size(0) + for k in range(n): + for i in range(n): + path[i, :] = torch.minimum(path[i, :], path[i, k] + path[k, :]) + end_evt.record() + torch.cuda.synchronize() + gpu_ms = float(start_evt.elapsed_time(end_evt)) + return path, gpu_ms + + +def handler(event): + size = event.get("size") + + if "seed" in event: + import random + + random.seed(event["seed"]) + seed = event.get("seed", 42) + seed = int(seed) + else: + seed = 42 + + gen_begin = datetime.datetime.now() + path = initialize_torch(size, dtype=torch.float32, device="cuda", seed=seed) + gen_end = datetime.datetime.now() + + comp_begin = datetime.datetime.now() + path_out, gpu_ms = kernel_fw(path) + comp_end = datetime.datetime.now() + + gen_us = (gen_end - gen_begin) / datetime.timedelta(microseconds=1) + comp_us = (comp_end - comp_begin) / datetime.timedelta(microseconds=1) + + return { + "measurement": { + "generating_time": gen_us, + "compute_time": comp_us, + "gpu_time": gpu_ms, + } + } diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt new file mode 100755 index 000000000..d8d966118 --- /dev/null +++ b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt @@ -0,0 +1 @@ +torch==2.4.1 diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.10 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.10 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.11 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.11 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.12 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.12 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.7 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.7 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.8 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.8 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.9 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.3.9 new file mode 100755 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.arm.3.8 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.arm.3.8 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.arm.3.9 b/benchmarks/600.linearalgebra/607.fw/python/requirements.txt.arm.3.9 new file mode 100644 index 000000000..e69de29bb diff --git a/benchmarks/wrappers/aws/python/handler.py b/benchmarks/wrappers/aws/python/handler.py index 907b2c612..f5a1d4195 100644 --- a/benchmarks/wrappers/aws/python/handler.py +++ b/benchmarks/wrappers/aws/python/handler.py @@ -1,39 +1,46 @@ - -import datetime, io, json, os, sys, uuid +import datetime +import io +import json +import os +import sys +import uuid # Add current directory to allow location of packages -sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) +sys.path.append(os.path.join(os.path.dirname(__file__), ".python_packages/lib/site-packages")) # TODO: usual trigger # implement support for S3 and others + + def handler(event, context): income_timestamp = datetime.datetime.now().timestamp() # HTTP trigger with API Gateaway - if 'body' in event: - event = json.loads(event['body']) + if "body" in event: + event = json.loads(event["body"]) req_id = context.aws_request_id - event['request-id'] = req_id - event['income-timestamp'] = income_timestamp + event["request-id"] = req_id + event["income-timestamp"] = income_timestamp begin = datetime.datetime.now() from function import function + ret = function.handler(event) end = datetime.datetime.now() - log_data = { - 'output': ret['result'] - } - if 'measurement' in ret: - log_data['measurement'] = ret['measurement'] - if 'logs' in event: - log_data['time'] = (end - begin) / datetime.timedelta(microseconds=1) + log_data = {"output": ret["result"]} + if "measurement" in ret: + log_data["measurement"] = ret["measurement"] + if "logs" in event: + log_data["time"] = (end - begin) / datetime.timedelta(microseconds=1) results_begin = datetime.datetime.now() from function import storage + storage_inst = storage.storage.get_instance() - b = event.get('logs').get('bucket') - storage_inst.upload_stream(b, '{}.json'.format(req_id), - io.BytesIO(json.dumps(log_data).encode('utf-8'))) + b = event.get("logs").get("bucket") + storage_inst.upload_stream( + b, "{}.json".format(req_id), io.BytesIO(json.dumps(log_data).encode("utf-8")) + ) results_end = datetime.datetime.now() results_time = (results_end - results_begin) / datetime.timedelta(microseconds=1) else: @@ -41,14 +48,14 @@ def handler(event, context): # cold test is_cold = False - fname = os.path.join('/tmp', 'cold_run') + fname = os.path.join("/tmp", "cold_run") if not os.path.exists(fname): is_cold = True container_id = str(uuid.uuid4())[0:8] - with open(fname, 'a') as f: + with open(fname, "a") as f: f.write(container_id) else: - with open(fname, 'r') as f: + with open(fname, "r") as f: container_id = f.read() cold_start_var = "" @@ -56,16 +63,17 @@ def handler(event, context): cold_start_var = os.environ["cold_start"] return { - 'statusCode': 200, - 'body': json.dumps({ - 'begin': begin.strftime('%s.%f'), - 'end': end.strftime('%s.%f'), - 'results_time': results_time, - 'is_cold': is_cold, - 'result': log_data, - 'request_id': context.aws_request_id, - 'cold_start_var': cold_start_var, - 'container_id': container_id, - }) + "statusCode": 200, + "body": json.dumps( + { + "begin": begin.strftime("%s.%f"), + "end": end.strftime("%s.%f"), + "results_time": results_time, + "is_cold": is_cold, + "result": log_data, + "request_id": context.aws_request_id, + "cold_start_var": cold_start_var, + "container_id": container_id, + } + ), } - diff --git a/benchmarks/wrappers/aws/python/setup.py b/benchmarks/wrappers/aws/python/setup.py index b3d878351..016974465 100644 --- a/benchmarks/wrappers/aws/python/setup.py +++ b/benchmarks/wrappers/aws/python/setup.py @@ -2,14 +2,13 @@ from glob import glob from pkg_resources import parse_requirements -with open('requirements.txt') as f: +with open("requirements.txt") as f: requirements = [str(r) for r in parse_requirements(f)] setup( - name='function', + name="function", install_requires=requirements, - packages=['function'], - package_dir={'function': '.'}, - package_data={'function': glob('**', recursive=True)}, + packages=["function"], + package_dir={"function": "."}, + package_data={"function": glob("**", recursive=True)}, ) - diff --git a/benchmarks/wrappers/aws/python/storage.py b/benchmarks/wrappers/aws/python/storage.py index 4be0025e8..50875fbfc 100644 --- a/benchmarks/wrappers/aws/python/storage.py +++ b/benchmarks/wrappers/aws/python/storage.py @@ -10,16 +10,14 @@ class storage: client = None def __init__(self): - self.client = boto3.client('s3') + self.client = boto3.client("s3") @staticmethod def unique_name(name): name, extension = os.path.splitext(name) - return '{name}.{random}{extension}'.format( - name=name, - extension=extension, - random=str(uuid.uuid4()).split('-')[0] - ) + return "{name}.{random}{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) def upload(self, bucket, file, filepath): key_name = storage.unique_name(file) @@ -31,8 +29,8 @@ def download(self, bucket, file, filepath): def download_directory(self, bucket, prefix, path): objects = self.client.list_objects_v2(Bucket=bucket, Prefix=prefix) - for obj in objects['Contents']: - file_name = obj['Key'] + for obj in objects["Contents"]: + file_name = obj["Key"] path_to_file = os.path.dirname(file_name) os.makedirs(os.path.join(path, path_to_file), exist_ok=True) self.download(bucket, file_name, os.path.join(path, file_name)) @@ -46,7 +44,7 @@ def download_stream(self, bucket, file): data = io.BytesIO() self.client.download_fileobj(bucket, file, data) return data.getbuffer() - + def get_instance(): if storage.instance is None: storage.instance = storage() diff --git a/benchmarks/wrappers/azure/python/handler.py b/benchmarks/wrappers/azure/python/handler.py index 88e44baf6..964fc2fde 100644 --- a/benchmarks/wrappers/azure/python/handler.py +++ b/benchmarks/wrappers/azure/python/handler.py @@ -1,52 +1,60 @@ - -import datetime, io, json, os, uuid +import datetime +import io +import json +import os +import uuid import azure.functions as func -if 'NOSQL_STORAGE_DATABASE' in os.environ: +if "NOSQL_STORAGE_DATABASE" in os.environ: from . import nosql nosql.nosql.get_instance( - os.environ['NOSQL_STORAGE_DATABASE'], - os.environ['NOSQL_STORAGE_URL'], - os.environ['NOSQL_STORAGE_CREDS'] + os.environ["NOSQL_STORAGE_DATABASE"], + os.environ["NOSQL_STORAGE_URL"], + os.environ["NOSQL_STORAGE_CREDS"], ) -if 'STORAGE_CONNECTION_STRING' in os.environ: +if "STORAGE_CONNECTION_STRING" in os.environ: from . import storage - client = storage.storage.get_instance(os.environ['STORAGE_CONNECTION_STRING']) + + client = storage.storage.get_instance(os.environ["STORAGE_CONNECTION_STRING"]) + # TODO: usual trigger # implement support for blob and others + + def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: income_timestamp = datetime.datetime.now().timestamp() req_json = req.get_json() - req_json['request-id'] = context.invocation_id - req_json['income-timestamp'] = income_timestamp + req_json["request-id"] = context.invocation_id + req_json["income-timestamp"] = income_timestamp begin = datetime.datetime.now() # We are deployed in the same directory from . import function + ret = function.handler(req_json) end = datetime.datetime.now() - log_data = { - 'output': ret['result'] - } - if 'measurement' in ret: - log_data['measurement'] = ret['measurement'] - if 'logs' in req_json: - log_data['time'] = (end - begin) / datetime.timedelta(microseconds=1) + log_data = {"output": ret["result"]} + if "measurement" in ret: + log_data["measurement"] = ret["measurement"] + if "logs" in req_json: + log_data["time"] = (end - begin) / datetime.timedelta(microseconds=1) results_begin = datetime.datetime.now() from . import storage + storage_inst = storage.storage.get_instance() - b = req_json.get('logs').get('bucket') + b = req_json.get("logs").get("bucket") req_id = context.invocation_id - storage_inst.upload_stream(b, '{}.json'.format(req_id), - io.BytesIO(json.dumps(log_data).encode('utf-8'))) + storage_inst.upload_stream( + b, "{}.json".format(req_id), io.BytesIO(json.dumps(log_data).encode("utf-8")) + ) results_end = datetime.datetime.now() results_time = (results_end - results_begin) / datetime.timedelta(microseconds=1) else: @@ -54,14 +62,14 @@ def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: # cold test is_cold = False - fname = os.path.join('/tmp','cold_run') + fname = os.path.join("/tmp", "cold_run") if not os.path.exists(fname): is_cold = True container_id = str(uuid.uuid4())[0:8] - with open(fname, 'a') as f: + with open(fname, "a") as f: f.write(container_id) else: - with open(fname, 'r') as f: + with open(fname, "r") as f: container_id = f.read() is_cold_worker = False @@ -73,17 +81,18 @@ def main(req: func.HttpRequest, context: func.Context) -> func.HttpResponse: is_cold_worker = True return func.HttpResponse( - json.dumps({ - 'begin': begin.strftime('%s.%f'), - 'end': end.strftime('%s.%f'), - 'results_time': results_time, - 'result': log_data, - 'is_cold': is_cold, - 'is_cold_worker': is_cold_worker, - 'container_id': container_id, - 'environ_container_id': os.environ['CONTAINER_NAME'], - 'request_id': context.invocation_id - }), - mimetype="application/json" + json.dumps( + { + "begin": begin.strftime("%s.%f"), + "end": end.strftime("%s.%f"), + "results_time": results_time, + "result": log_data, + "is_cold": is_cold, + "is_cold_worker": is_cold_worker, + "container_id": container_id, + "environ_container_id": os.environ["CONTAINER_NAME"], + "request_id": context.invocation_id, + } + ), + mimetype="application/json", ) - diff --git a/benchmarks/wrappers/azure/python/storage.py b/benchmarks/wrappers/azure/python/storage.py index 42b129c89..fabd8e6a1 100644 --- a/benchmarks/wrappers/azure/python/storage.py +++ b/benchmarks/wrappers/azure/python/storage.py @@ -1,10 +1,10 @@ - import os import uuid from typing import Optional from azure.storage.blob import BlobServiceClient + class storage: instance = None client = None @@ -15,20 +15,18 @@ def __init__(self, connection_string: str): @staticmethod def unique_name(name): name, extension = os.path.splitext(name) - return '{name}.{random}{extension}'.format( - name=name, - extension=extension, - random=str(uuid.uuid4()).split('-')[0] - ) + return "{name}.{random}{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) def upload(self, container, file, filepath): - with open(filepath, 'rb') as data: + with open(filepath, "rb") as data: return self.upload_stream(container, file, data) def download(self, container, file, filepath): - with open(filepath, 'wb') as download_file: - download_file.write( self.download_stream(container, file) ) - + with open(filepath, "wb") as download_file: + download_file.write(self.download_stream(container, file)) + def download_directory(self, container, prefix, path): client = self.client.get_container_client(container=container) objects = client.list_blobs(name_starts_with=prefix) @@ -37,20 +35,17 @@ def download_directory(self, container, prefix, path): path_to_file = os.path.dirname(file_name) os.makedirs(os.path.join(path, path_to_file), exist_ok=True) self.download(container, file_name, os.path.join(path, file_name)) - + def upload_stream(self, container, file, data): key_name = storage.unique_name(file) - client = self.client.get_blob_client( - container=container, - blob=key_name - ) + client = self.client.get_blob_client(container=container, blob=key_name) client.upload_blob(data) return key_name def download_stream(self, container, file): client = self.client.get_blob_client(container=container, blob=file) return client.download_blob().readall() - + @staticmethod def get_instance(connection_string: Optional[str] = None): if storage.instance is None: diff --git a/benchmarks/wrappers/gcp/python/handler.py b/benchmarks/wrappers/gcp/python/handler.py index 9b6989611..57e1d000b 100644 --- a/benchmarks/wrappers/gcp/python/handler.py +++ b/benchmarks/wrappers/gcp/python/handler.py @@ -1,44 +1,46 @@ -import datetime, io, json, os, uuid, sys +import datetime +import io +import json +import os +import sys +import uuid -sys.path.append(os.path.join(os.path.dirname(__file__), '.python_packages/lib/site-packages')) +sys.path.append(os.path.join(os.path.dirname(__file__), ".python_packages/lib/site-packages")) # This variable is defined by SeBS during function creation. -if 'NOSQL_STORAGE_DATABASE' in os.environ: +if "NOSQL_STORAGE_DATABASE" in os.environ: from function import nosql - nosql.nosql.get_instance( - os.environ['NOSQL_STORAGE_DATABASE'] - ) + nosql.nosql.get_instance(os.environ["NOSQL_STORAGE_DATABASE"]) def handler(req): income_timestamp = datetime.datetime.now().timestamp() - req_id = req.headers.get('Function-Execution-Id') - + req_id = req.headers.get("Function-Execution-Id") req_json = req.get_json() - req_json['request-id'] = req_id - req_json['income-timestamp'] = income_timestamp + req_json["request-id"] = req_id + req_json["income-timestamp"] = income_timestamp begin = datetime.datetime.now() # We are deployed in the same directorygit status from function import function + ret = function.handler(req_json) end = datetime.datetime.now() - - log_data = { - 'output': ret['result'] - } - if 'measurement' in ret: - log_data['measurement'] = ret['measurement'] - if 'logs' in req_json: - log_data['time'] = (end - begin) / datetime.timedelta(microseconds=1) + log_data = {"output": ret["result"]} + if "measurement" in ret: + log_data["measurement"] = ret["measurement"] + if "logs" in req_json: + log_data["time"] = (end - begin) / datetime.timedelta(microseconds=1) results_begin = datetime.datetime.now() from function import storage + storage_inst = storage.storage.get_instance() - b = req_json.get('logs').get('bucket') - storage_inst.upload_stream(b, '{}.json'.format(req_id), - io.BytesIO(json.dumps(log_data).encode('utf-8'))) + b = req_json.get("logs").get("bucket") + storage_inst.upload_stream( + b, "{}.json".format(req_id), io.BytesIO(json.dumps(log_data).encode("utf-8")) + ) results_end = datetime.datetime.now() results_time = (results_end - results_begin) / datetime.timedelta(microseconds=1) else: @@ -46,27 +48,33 @@ def handler(req): # cold test is_cold = False - fname = os.path.join('/tmp', 'cold_run') + fname = os.path.join("/tmp", "cold_run") if not os.path.exists(fname): is_cold = True container_id = str(uuid.uuid4())[0:8] - with open(fname, 'a') as f: + with open(fname, "a") as f: f.write(container_id) else: - with open(fname, 'r') as f: + with open(fname, "r") as f: container_id = f.read() cold_start_var = "" if "cold_start" in os.environ: cold_start_var = os.environ["cold_start"] - return json.dumps({ - 'begin': begin.strftime('%s.%f'), - 'end': end.strftime('%s.%f'), - 'results_time': results_time, - 'is_cold': is_cold, - 'result': log_data, - 'request_id': req_id, - 'cold_start_var': cold_start_var, - 'container_id': container_id, - }), 200, {'ContentType': 'application/json'} + return ( + json.dumps( + { + "begin": begin.strftime("%s.%f"), + "end": end.strftime("%s.%f"), + "results_time": results_time, + "is_cold": is_cold, + "result": log_data, + "request_id": req_id, + "cold_start_var": cold_start_var, + "container_id": container_id, + } + ), + 200, + {"ContentType": "application/json"}, + ) diff --git a/benchmarks/wrappers/gcp/python/storage.py b/benchmarks/wrappers/gcp/python/storage.py index 81163cb34..70f182618 100644 --- a/benchmarks/wrappers/gcp/python/storage.py +++ b/benchmarks/wrappers/gcp/python/storage.py @@ -15,11 +15,9 @@ def __init__(self): @staticmethod def unique_name(name): name, extension = os.path.splitext(name) - return '{name}.{random}{extension}'.format( - name=name, - extension=extension, - random=str(uuid.uuid4()).split('-')[0] - ) + return "{name}.{random}{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) def upload(self, bucket, file, filepath): key_name = storage.unique_name(file) diff --git a/benchmarks/wrappers/local/python/storage.py b/benchmarks/wrappers/local/python/storage.py index b44968408..d25583a13 100644 --- a/benchmarks/wrappers/local/python/storage.py +++ b/benchmarks/wrappers/local/python/storage.py @@ -1,32 +1,28 @@ -import io import os import uuid import minio + class storage: instance = None client = None def __init__(self): - if 'MINIO_ADDRESS' in os.environ: - address = os.environ['MINIO_ADDRESS'] - access_key = os.environ['MINIO_ACCESS_KEY'] - secret_key = os.environ['MINIO_SECRET_KEY'] + if "MINIO_ADDRESS" in os.environ: + address = os.environ["MINIO_ADDRESS"] + access_key = os.environ["MINIO_ACCESS_KEY"] + secret_key = os.environ["MINIO_SECRET_KEY"] self.client = minio.Minio( - address, - access_key=access_key, - secret_key=secret_key, - secure=False) + address, access_key=access_key, secret_key=secret_key, secure=False + ) @staticmethod def unique_name(name): name, extension = os.path.splitext(name) - return '{name}.{random}{extension}'.format( - name=name, - extension=extension, - random=str(uuid.uuid4()).split('-')[0] - ) + return "{name}.{random}{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) def upload(self, bucket, file, filepath): key_name = storage.unique_name(file) @@ -55,4 +51,3 @@ def get_instance(): if storage.instance is None: storage.instance = storage() return storage.instance - diff --git a/benchmarks/wrappers/openwhisk/python/__main__.py b/benchmarks/wrappers/openwhisk/python/__main__.py index 3ae44f9c2..3833bff8c 100644 --- a/benchmarks/wrappers/openwhisk/python/__main__.py +++ b/benchmarks/wrappers/openwhisk/python/__main__.py @@ -2,24 +2,30 @@ import datetime import os + def main(args): logging.getLogger().setLevel(logging.INFO) begin = datetime.datetime.now() - args['request-id'] = os.getenv('__OW_ACTIVATION_ID') - args['income-timestamp'] = begin.timestamp() + args["request-id"] = os.getenv("__OW_ACTIVATION_ID") + args["income-timestamp"] = begin.timestamp() - for arg in ["MINIO_STORAGE_CONNECTION_URL", "MINIO_STORAGE_ACCESS_KEY", "MINIO_STORAGE_SECRET_KEY"]: + for arg in [ + "MINIO_STORAGE_CONNECTION_URL", + "MINIO_STORAGE_ACCESS_KEY", + "MINIO_STORAGE_SECRET_KEY", + ]: os.environ[arg] = args[arg] del args[arg] key_list = list(args.keys()) for arg in key_list: - if 'NOSQL_STORAGE_' in arg: + if "NOSQL_STORAGE_" in arg: os.environ[arg] = args[arg] del args[arg] try: from function import function + ret = function.handler(args) end = datetime.datetime.now() logging.info("Function result: {}".format(ret)) @@ -38,7 +44,7 @@ def main(args): return { "begin": begin.strftime("%s.%f"), "end": end.strftime("%s.%f"), - "request_id": os.getenv('__OW_ACTIVATION_ID'), + "request_id": os.getenv("__OW_ACTIVATION_ID"), "results_time": results_time, "is_cold": is_cold, "result": log_data, @@ -49,7 +55,7 @@ def main(args): return { "begin": begin.strftime("%s.%f"), "end": end.strftime("%s.%f"), - "request_id": os.getenv('__OW_ACTIVATION_ID'), + "request_id": os.getenv("__OW_ACTIVATION_ID"), "results_time": results_time, - "result": f"Error - invocation failed! Reason: {e}" + "result": f"Error - invocation failed! Reason: {e}", } diff --git a/benchmarks/wrappers/openwhisk/python/nosql.py b/benchmarks/wrappers/openwhisk/python/nosql.py index da8245009..4a8676d36 100644 --- a/benchmarks/wrappers/openwhisk/python/nosql.py +++ b/benchmarks/wrappers/openwhisk/python/nosql.py @@ -5,6 +5,7 @@ import boto3 from botocore.client import Config + class nosql: instance: Optional["nosql"] = None @@ -14,14 +15,14 @@ def __init__(self): if environ["NOSQL_STORAGE_TYPE"] != "scylladb": raise RuntimeError(f"Unsupported NoSQL storage type: {environ['NOSQL_STORAGE_TYPE']}!") - config = Config(connect_timeout=5, retries={'max_attempts': 0}) + config = Config(connect_timeout=5, retries={"max_attempts": 0}) self.client = boto3.resource( "dynamodb", region_name="None", aws_access_key_id="None", aws_secret_access_key="None", endpoint_url=f"http://{environ['NOSQL_STORAGE_ENDPOINT']}", - config=config + config=config, ) self._tables = {} diff --git a/benchmarks/wrappers/openwhisk/python/setup.py b/benchmarks/wrappers/openwhisk/python/setup.py index b942d059b..016974465 100644 --- a/benchmarks/wrappers/openwhisk/python/setup.py +++ b/benchmarks/wrappers/openwhisk/python/setup.py @@ -2,13 +2,13 @@ from glob import glob from pkg_resources import parse_requirements -with open('requirements.txt') as f: +with open("requirements.txt") as f: requirements = [str(r) for r in parse_requirements(f)] setup( - name='function', + name="function", install_requires=requirements, - packages=['function'], - package_dir={'function': '.'}, - package_data={'function': glob('**', recursive=True)}, -) \ No newline at end of file + packages=["function"], + package_dir={"function": "."}, + package_data={"function": glob("**", recursive=True)}, +) diff --git a/benchmarks/wrappers/openwhisk/python/storage.py b/benchmarks/wrappers/openwhisk/python/storage.py index 76c7e3e8e..09b9e78a7 100644 --- a/benchmarks/wrappers/openwhisk/python/storage.py +++ b/benchmarks/wrappers/openwhisk/python/storage.py @@ -1,8 +1,8 @@ +import logging import os import uuid -import json + import minio -import logging class storage: @@ -25,14 +25,14 @@ def __init__(self): maxsize=10, retries=urllib3.Retry( total=5, backoff_factor=0.2, status_forcelist=[500, 502, 503, 504] - ) + ), ) self.client = minio.Minio( os.getenv("MINIO_STORAGE_CONNECTION_URL"), access_key=os.getenv("MINIO_STORAGE_ACCESS_KEY"), secret_key=os.getenv("MINIO_STORAGE_SECRET_KEY"), secure=False, - http_client=mgr + http_client=mgr, ) except Exception as e: logging.info(e) @@ -41,12 +41,9 @@ def __init__(self): @staticmethod def unique_name(name): name, extension = os.path.splitext(name) - return '{name}.{random}{extension}'.format( - name=name, - extension=extension, - random=str(uuid.uuid4()).split('-')[0] - ) - + return "{name}.{random}{extension}".format( + name=name, extension=extension, random=str(uuid.uuid4()).split("-")[0] + ) def upload(self, bucket, file, filepath): key_name = storage.unique_name(file) @@ -64,9 +61,7 @@ def download_directory(self, bucket, prefix, path): def upload_stream(self, bucket, file, bytes_data): key_name = storage.unique_name(file) - self.client.put_object( - bucket, key_name, bytes_data, bytes_data.getbuffer().nbytes - ) + self.client.put_object(bucket, key_name, bytes_data, bytes_data.getbuffer().nbytes) return key_name def download_stream(self, bucket, file): diff --git a/docs/benchmarks.md b/docs/benchmarks.md index e292a4b04..6977672d6 100644 --- a/docs/benchmarks.md +++ b/docs/benchmarks.md @@ -10,6 +10,8 @@ | Multimedia | 220.video-processing | Python | x64, arm64 | Add a watermark and generate gif of a video file. | | Utilities | 311.compression | Python | x64, arm64 | Create a .zip file for a group of files in storage and return to user to download. | | Inference | 411.image-recognition | Python | x64 | Image recognition with ResNet and pytorch. | +| Inference | 412.language-bert | Python | x64 | Sentence classification with a compact BERT model served via ONNX Runtime. | +| Inference | 413.recommendation | Python | x64 | GPU DLRM-inspired recommender scoring implemented in PyTorch. | | Scientific | 501.graph-pagerank | Python | x64, arm64 | PageRank implementation with igraph. | | Scientific | 502.graph-mst | Python | x64, arm64 | Minimum spanning tree (MST) implementation with igraph. | | Scientific | 503.graph-bfs | Python | x64, arm64 | Breadth-first search (BFS) implementation with igraph. | @@ -70,6 +72,14 @@ It implements the .zip file creation with the help of the `shutil` standard libr The benchmark is inspired by MLPerf and implements image recognition with Resnet50. It downloads the input and model from the storage and uses the CPU-only `pytorch` library in Python. +### Language Inference + +This benchmark runs sequence classification with a compact BERT model exported to ONNX. The function downloads the model archive and text samples from storage, tokenizes the sentences, executes the ONNX Runtime session, and returns the predicted labels together with confidences. + +### Recommendation + +Inspired by MLPerf’s DLRM v2, this benchmark ships a tiny PyTorch DLRM model that optionally runs on CUDA when available. The function downloads the model and request batch, moves the network to GPU if possible, performs batched inference, and reports recommendation scores alongside timing measurements. + ## Scientific ### Graph PageRank, BFS, MST @@ -87,4 +97,3 @@ This benchmark is inspired by the [DNAVisualization](https://github.com/Benjamin ## Applications **(WiP)** Coming soon! - diff --git a/install.py b/install.py index 57f047d23..b856e45b7 100755 --- a/install.py +++ b/install.py @@ -86,7 +86,7 @@ def execute(cmd, cwd=None): execute(f"git pull", cwd=data_dir) # clone else: - execute(f"git clone https://github.com/spcl/serverless-benchmarks-data.git {data_dir}") + execute(f"git clone https://github.com/McLavish/serverless-benchmarks-data-dphpc.git {data_dir}") else: raise error @@ -99,4 +99,3 @@ def execute(cmd, cwd=None): execute("python3 setup.py build") execute("python3 pypapi/papi_build.py") os.chdir(cur_dir) - diff --git a/sebs/local/local.py b/sebs/local/local.py index 32b9f9ffb..7f49974e5 100644 --- a/sebs/local/local.py +++ b/sebs/local/local.py @@ -222,7 +222,12 @@ def _start_container( container_kwargs["command"] = f"/bin/bash /sebs/run_server.sh {port}" container_kwargs["ports"] = {f"{port}/tcp": port} - container = self._docker_client.containers.run(**container_kwargs) + from docker.types import DeviceRequest + + container = self._docker_client.containers.run( + **container_kwargs, + device_requests=[DeviceRequest(driver="nvidia", count=-1, capabilities=[["gpu"]])], + ) pid: Optional[int] = None if self.measurements_enabled and self._memory_measurement_path is not None: diff --git a/sebs/regression.py b/sebs/regression.py index 579760a1c..01dc8d071 100644 --- a/sebs/regression.py +++ b/sebs/regression.py @@ -21,6 +21,8 @@ "220.video-processing", "311.compression", "411.image-recognition", + "412.language-bert", + "413.recommendation", "501.graph-pagerank", "502.graph-mst", "503.graph-bfs",