diff --git a/.github/workflows/ci-nighly-benchmark-ocp.yaml b/.github/workflows/ci-nighly-benchmark-ocp.yaml index 84e9955b..81e7f7cd 100644 --- a/.github/workflows/ci-nighly-benchmark-ocp.yaml +++ b/.github/workflows/ci-nighly-benchmark-ocp.yaml @@ -62,73 +62,75 @@ jobs: - name: Run install_deps.sh run: | sudo apt-get update - ./setup/install_deps.sh + curl -sSL https://pdm-project.org/install-pdm.py | python3 - + cd llm_d_benchmark && ./setup/install_deps.sh shell: bash - - name: Install config explorer dependencies - run: pip install -r config_explorer/requirements.txt + - name: Install python dependencies + run: | + pdm install shell: bash - name: Cleanup target cloud (modelservice) env: LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} - run: ./setup/teardown.sh -c ocp_L40_fb -t modelservice -d + run: cd llm_d_benchmark && ./setup/teardown.sh -c ocp_L40_fb -t modelservice -d - name: Cleanup target cloud (standalone) env: LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} - run: ./setup/teardown.sh -c ocp_L40_fb -t standalone -d + run: cd llm_d_benchmark && ./setup/teardown.sh -c ocp_L40_fb -t standalone -d - name: Standup target cloud (standalone) env: LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} - run: ./setup/standup.sh -c ocp_L40_fb -t standalone + run: cd llm_d_benchmark && ./setup/standup.sh -c ocp_L40_fb -t standalone - name: Run benchmark (standalone, inference-perf) env: LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} - run: ./setup/run.sh -c ocp_L40_fb -t standalone + run: cd llm_d_benchmark && ./setup/run.sh -c ocp_L40_fb -t standalone - name: Run benchmark (standalone, fmperf) env: LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} - run: ./setup/run.sh -c ocp_L40_fb -t standalone -l fmperf -w sanity_short-input + run: cd llm_d_benchmark && ./setup/run.sh -c ocp_L40_fb -t standalone -l fmperf -w sanity_short-input - name: Run benchmark (standalone, guidellm) env: LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} - run: ./setup/run.sh -c ocp_L40_fb -t standalone -l guidellm -w sanity_concurrent + run: cd llm_d_benchmark && ./setup/run.sh -c ocp_L40_fb -t standalone -l guidellm -w sanity_concurrent - name: Run benchmark (standalone, vllm-benchmark) env: LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} - run: ./setup/run.sh -c ocp_L40_fb -t standalone -l vllm-benchmark + run: cd llm_d_benchmark && ./setup/run.sh -c ocp_L40_fb -t standalone -l vllm-benchmark - name: Cleanup target cloud (standalone) env: LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} - run: ./setup/teardown.sh -c ocp_L40_fb -t standalone -d + run: cd llm_d_benchmark && ./setup/teardown.sh -c ocp_L40_fb -t standalone -d - name: E2E target cloud (modelservice, inference-perf) env: LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} - run: ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep + run: cd llm_d_benchmark && ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep - name: E2E target cloud (modelservice, fmperf) env: LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} - run: ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep -l fmperf -w sanity_short-input.yaml + run: cd llm_d_benchmark && ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep -l fmperf -w sanity_short-input.yaml - name: E2E target cloud (modelservice, guidellm) env: LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} - run: ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep -l guidellm -w sanity_concurrent.yaml + run: cd llm_d_benchmark && ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep -l guidellm -w sanity_concurrent.yaml - name: E2E target cloud (modelservice, vllm-benchmark) env: LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} - run: ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep -l vllm-benchmark + run: cd llm_d_benchmark && ./setup/e2e.sh -c ocp_L40_fb -t modelservice --deep -l vllm-benchmark - name: Install AWS CLI diff --git a/.github/workflows/ci-pr-benchmark.yaml b/.github/workflows/ci-pr-benchmark.yaml index 7ef6c872..c39c2df0 100644 --- a/.github/workflows/ci-pr-benchmark.yaml +++ b/.github/workflows/ci-pr-benchmark.yaml @@ -1,8 +1,8 @@ name: CI - PR Benchmark Run on: - pull_request: - + pull_request: {} + workflow_dispatch: {} jobs: run-benchmark-sh: @@ -37,23 +37,28 @@ jobs: - name: Run install_deps run: | sudo apt-get update - ./setup/install_deps.sh + curl -sSL https://pdm-project.org/install-pdm.py | python3 - + cd llm_d_benchmark && ./setup/install_deps.sh shell: bash - - name: Install config explorer dependencies - run: pip install -r config_explorer/requirements.txt + - name: Install python dependencies + run: | + pdm install shell: bash - name: Standup a modelservice using llm-d-inference-sim run: | - ./setup/standup.sh -c kind_sim_fb -t modelservice -s 0,1,2,4,7,8,9 + cd llm_d_benchmark + pdm run bash -x ./setup/standup.sh -c kind_sim_fb -t modelservice -s 0,1,2,4,7,8,9 - name: Run harness (mock) env: LLMD_CONTROL_DRY_RUN: 1 # TODO: harness doesn't work now for kind bc no harness endpoint run: | - ./setup/run.sh -c kind_sim_fb --dry-run + cd llm_d_benchmark + pdm run bash -x ./setup/run.sh -c kind_sim_fb --dry-run - name: Teardown run: | - ./setup/teardown.sh -c kind_sim_fb + cd llm_d_benchmark + pdm run bash -x ./setup/teardown.sh -c kind_sim_fb diff --git a/.github/workflows/ci-pr-checks.yaml b/.github/workflows/ci-pr-checks.yaml index 2d623ca3..743ade7f 100644 --- a/.github/workflows/ci-pr-checks.yaml +++ b/.github/workflows/ci-pr-checks.yaml @@ -4,6 +4,8 @@ on: pull_request: branches: - main + workflow_dispatch: {} + jobs: lint-and-test: diff --git a/.github/workflows/ci-release.yaml b/.github/workflows/ci-release.yaml index 757b2630..2d49bc33 100644 --- a/.github/workflows/ci-release.yaml +++ b/.github/workflows/ci-release.yaml @@ -6,6 +6,7 @@ on: - 'v*' # Runs when a tag like v0.1.0 is pushed release: types: [published] # Also runs when a GitHub release is published + workflow_dispatch: {} jobs: docker-build-and-push: diff --git a/.github/workflows/config-explorer-test.yaml b/.github/workflows/python-test.yaml similarity index 52% rename from .github/workflows/config-explorer-test.yaml rename to .github/workflows/python-test.yaml index c086a94e..963f9003 100644 --- a/.github/workflows/config-explorer-test.yaml +++ b/.github/workflows/python-test.yaml @@ -1,13 +1,13 @@ -name: Config Explorer Test +name: Python Test -on: [push, pull_request] +on: [push, pull_request, workflow_dispatch] jobs: config-explorer-pytest: runs-on: ubuntu-latest strategy: matrix: - python-version: ["3.11", "3.12", "3.13"] + python-version: ["3.12", "3.13"] steps: - uses: actions/checkout@v5 @@ -23,11 +23,17 @@ jobs: - name: Install dependencies run: | - python -m pip install --upgrade pip - pip install -r config_explorer/requirements.txt + python -m pip install pdm + pdm install - - name: Test with pytest + - name: Format check run: | - pip install pytest pytest-cov - cd config_explorer - pytest -s tests/ --doctest-modules --junitxml=junit/test-results.xml --cov=config_explorer --cov-report=xml --cov-report=html + pdm run format-check + + - name: Lint check + run: | + pdm run lint-check + + - name: Test coverage + run: | + pdm run test-cov diff --git a/.gitignore b/.gitignore index 284b4c07..5ba47aae 100644 --- a/.gitignore +++ b/.gitignore @@ -43,6 +43,16 @@ data/**/logs/ # Python __pycache__/ +*.py[cod] +*$py.class +*.egg-info/ +dist/ +build/ + +# PDM +__pypackages__/ +.pdm-python +.pdm.toml # Jupyter Notebook .ipynb_checkpoints @@ -57,7 +67,14 @@ env.bak/ venv.bak/ environment/ -scenarios/none.sh +llm_d_benchmark/scenarios/none.sh # Python specifics -**/*.egg-info \ No newline at end of file +**/*.egg-info + +# coverage +**/.coverage + +# cache +.pytest_cache/ +.ruff_cache/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b9dec366..60acbf75 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,31 @@ repos: - repo: local hooks: - - id: basic_unit_test - name: Basic Unit Test - entry: bash -c './setup/standup.sh -c kind_sim_fb -n' + - id: format-lint + name: Format and Lint + entry: pdm run format-lint + language: system + pass_filenames: false + types: [python] + + - repo: local + hooks: + - id: py_unit_test + name: Python Unit Test + entry: pdm run pytest + require_serial: true + pass_filenames: false + language: system + + - repo: local + hooks: + - id: basic_shell_unit_test + name: Basic Shell Unit Test + entry: bash -c 'cd llm_d_benchmark && pdm run bash -x ./setup/standup.sh -c kind_sim_fb -n' require_serial: true pass_filenames: false language: system + - repo: https://github.com/ibm/detect-secrets # If you desire to use a specific version of detect-secrets, you can replace `master` with other git revisions such as branch, tag or commit sha. # You are encouraged to use static refs such as tags, instead of branch name @@ -21,3 +40,4 @@ repos: # when "--baseline" with "--use-all-plugins", pre-commit scan with all available plugins # add "--fail-on-unaudited" to fail pre-commit for unaudited potential secrets args: [--baseline, .secrets.baseline, --use-all-plugins] + additional_dependencies: ['boxsdk==3.9.2'] diff --git a/Makefile b/Makefile index f9ef7dd5..b28015d3 100644 --- a/Makefile +++ b/Makefile @@ -12,35 +12,16 @@ CONTAINER_TOOL := $(shell if command -v docker >/dev/null 2>&1; then echo docker BUILDER := $(shell command -v buildah >/dev/null 2>&1 && echo buildah || echo $(CONTAINER_TOOL)) PLATFORMS ?= linux/amd64,linux/arm64 # linux/s390x,linux/ppc64le -# go source files -SRC = $(shell find . -type f -name '*.go') - .PHONY: help help: ## Print help @awk 'BEGIN {FS = ":.*##"; printf "\nUsage:\n make \033[36m\033[0m\n"} /^[a-zA-Z_0-9-]+:.*?##/ { printf " \033[36m%-15s\033[0m %s\n", $$1, $$2 } /^##@/ { printf "\n\033[1m%s\033[0m\n", substr($$0, 5) } ' $(MAKEFILE_LIST) -##@ Development - -.PHONY: format -format: ## Format Go source files - @printf "\033[33;1m==== Running gofmt ====\033[0m\n" - @gofmt -l -w $(SRC) - -.PHONY: test -test: check-ginkgo ## Run tests - @printf "\033[33;1m==== Running tests ====\033[0m\n" - ginkgo -r -v .PHONY: post-deploy-test post-deploy-test: ## Run post deployment tests echo Success! @echo "Post-deployment tests passed." -.PHONY: lint -lint: check-golangci-lint ## Run lint - @printf "\033[33;1m==== Running linting ====\033[0m\n" - golangci-lint run - ##@ Container Build/Push .PHONY: buildah-build @@ -243,9 +224,6 @@ env: load-version-json ## Print environment variables .PHONY: check-tools check-tools: \ - check-go \ - check-ginkgo \ - check-golangci-lint \ check-jq \ check-kustomize \ check-envsubst \ @@ -255,21 +233,6 @@ check-tools: \ check-podman @echo "✅ All required tools are installed." -.PHONY: check-go -check-go: - @command -v go >/dev/null 2>&1 || { \ - echo "❌ Go is not installed. Install it from https://golang.org/dl/"; exit 1; } - -.PHONY: check-ginkgo -check-ginkgo: - @command -v ginkgo >/dev/null 2>&1 || { \ - echo "❌ ginkgo is not installed. Install with: go install github.com/onsi/ginkgo/v2/ginkgo@latest"; exit 1; } - -.PHONY: check-golangci-lint -check-golangci-lint: - @command -v golangci-lint >/dev/null 2>&1 || { \ - echo "❌ golangci-lint is not installed. Install from https://golangci-lint.run/usage/install/"; exit 1; } - .PHONY: check-jq check-jq: @command -v jq >/dev/null 2>&1 || { \ diff --git a/README.md b/README.md index 1523a2b0..9f70ebb2 100644 --- a/README.md +++ b/README.md @@ -7,12 +7,18 @@ This repository provides an automated workflow for benchmarking LLM inference us Provide a single source of automation for repeatable and reproducible experiments and performance evaluation on `llm-d`. ### 📦 Repository Setup +Install pdm following the official instructions: https://pdm-project.org/en/latest/#installation +Then, clone this repository and install the dependencies: ``` -git clone https://github.com/llm-d/llm-d-benchmark.git -cd llm-d-benchmark -./setup/install_deps.sh -pip install -r config_explorer/requirements.txt +git clone https://github.com/llm-d/llm-d-benchmark.git && cd llm-d-benchmark/ +pdm install && pdm run $SHELL +pre-commit install +``` + +Finally, install additional dependencies: +``` +cd llm_d_benchmark/ && ./setup/install_deps.sh ``` ## Quickstart @@ -20,7 +26,7 @@ pip install -r config_explorer/requirements.txt **Out of the box:** **`standup`** a `llm-d` stack (default method is `llm-d-modelservice`, serving `meta-llama/Llama-3.2-1B-Instruct` model), **`run`** a harness (default `inference-perf`) with a load profile (default `sanity_random`) and then **`teardown`** the deployed stack. ``` -./e2e.sh +cd llm_d_benchmark/ && ./e2e.sh ``` > [!TIP] @@ -29,6 +35,7 @@ pip install -r config_explorer/requirements.txt The same above example could be explicitly split in three separate parts. ``` +cd llm_d_benchmark/ ./setup/standup.sh ./run.sh ./setup/teardown.sh @@ -37,6 +44,7 @@ The same above example could be explicitly split in three separate parts. A user can elect to **`standup`** an `llm-d` stack once, and then **`run`** the `inference-perf` harness with a different load profile (i.e., `chatbot_synthetic`) ``` +cd llm_d_benchmark/ ./run.sh --harness inference-perf --workload chatbot_synthetic --methods ` ``` diff --git a/analysis/to_be_incorporated/plot_benchmark_metrics.py b/analysis/to_be_incorporated/plot_benchmark_metrics.py deleted file mode 100644 index 5fadab6b..00000000 --- a/analysis/to_be_incorporated/plot_benchmark_metrics.py +++ /dev/null @@ -1,218 +0,0 @@ -import pandas as pd -import matplotlib.pyplot as plt -import glob -import os -import re -import argparse - -# Define method types and their display names -METHOD_TYPES = { - 'vllm': 'vLLM v1', - 'llm-d': 'LLM-d', - 'vllm-prod': 'vLLM + LMCache', - 'lmcache': 'vLLM Production Stack + LMCache', - 'lmcache-0310': 'vLLM Production Stack + LMCache (03-10-2025)', - 'vllm-70b': 'vLLM v1', - 'baseline-llm-d-70b': 'llm-d w/o KVCache offloading', - 'lmcache-llm-d-70b': 'llm-d w KVCache offloading', - 'lmcache-indexing-llm-d-70b': 'llm-d w KVCache offloading + KVCache indexing', - 'lmcache-vllm-70b': 'Production Stack(vLLM v1) + LMCache', - 'vllm-70b-2replicas': 'vLLM v1 (2 replicas) + Round Robin', - 'llm-d-70b-2replicas': 'llm-d (2 replicas)' + '\n' + 'KVCache (score=2) & Load (score=1) aware routing', - 'vllm-standalone-llama-3-70b-2replicas-H100': 'vLLM v1 (2 replicas) + Round Robin (H100)', - 'llm-d-70b-2replicas-H100': 'llm-d (2 replicas)' + '\n' + 'Prefix (score=2) & Load (score=1) aware routing (H100)', - 'llm-d-70b-2replicas-H100-no-router': 'llm-d (2 replicas)' + '\n' + 'Round Robin (H100)', - 'vllm-llama4-tp4': 'vLLM v1 (TP=4)', - 'llm-d-llama4-tp4': 'llm-d (TP=4)', - 'lmcache-llm-d-llama4-tp4': 'llm-d w KVCache offloading (TP=4)', -} - -# Define benchmark types and their titles -BENCHMARK_TYPES = { - 'sharegpt': 'ShareGPT', - 'long_input': 'Long Input Short Output', - 'short_input': 'Short Input Short Output' -} - -# Define QPS ranges for each benchmark type -BENCHMARK_QPS_RANGES = { - # 'sharegpt': (0, 1.4), - 'sharegpt': (0, 100.0), - 'long_input': (0, 1.2), - 'short_input': (0, 10.0) -} - -# Define y-axis ranges for each metric -BENCHMARK_Y_RANGES = { - 'itl': (0, 0.1), # Inter-token Latency in seconds - 'ttft': (0, 1.0), # Time to First Token in seconds - 'throughput': (5000, 30000) # Throughput in tokens per second -} - -def extract_qps(filename): - # Try to extract QPS value from filename - # Pattern 1: LMBench_sharegpt_output_0.5.csv -> 0.5 - # Pattern 2: LMBench_short_input_qps0.5.csv -> 0.5 - match = re.search(r'(?:output_|qps)(\d+\.?\d*)\.csv', filename) - if match: - return float(match.group(1)) - return None - -def calculate_itl(df): - # Calculate ITL (Inter-token Latency) as generation_time / generation_tokens - return df['generation_time'] / df['generation_tokens'] - -def calculate_throughput(df): - # Calculate total tokens (input + output) - total_tokens = df['prompt_tokens'].sum() + df['generation_tokens'].sum() - - # Calculate total time (latest finish time - earliest launch time) - total_time = df['finish_time'].max() - df['launch_time'].min() - - # Calculate throughput (tokens per second) - return total_tokens / total_time - -def process_csv_files(benchmark_type, method, benchmark_dir): - # Get all CSV files matching the pattern - data_dir = os.path.join(benchmark_dir, method) - pattern = f'LMBench_{benchmark_type}_*.csv' - csv_files = glob.glob(os.path.join(data_dir, pattern)) - - if not csv_files: - print(f"No CSV files found for {benchmark_type} in {data_dir}") - return None - - # Store results - results = { - 'qps': [], - 'itl': [], - 'ttft': [], - 'throughput': [] - } - - # Process each file - for file in sorted(csv_files): - qps = extract_qps(file) - if qps is None: - print(f"Could not extract QPS from filename: {file}") - continue - - try: - # Read CSV file - df = pd.read_csv(file) - - # Calculate metrics - itl = calculate_itl(df).mean() - ttft = df['ttft'].mean() - throughput = calculate_throughput(df) - - results['qps'].append(qps) - results['itl'].append(itl) - results['ttft'].append(ttft) - results['throughput'].append(throughput) - - print(f"Processed {file}:") - print(f" QPS={qps}") - print(f" Avg ITL={itl:.4f}s") - print(f" Avg TTFT={ttft:.4f}s") - print(f" Throughput={throughput:.2f} tokens/s") - except Exception as e: - print(f"Error processing {file}: {str(e)}") - continue - - if not results['qps']: - print(f"No valid data found for {benchmark_type}") - return None - - # Sort all metrics by QPS - sorted_indices = sorted(range(len(results['qps'])), key=lambda i: results['qps'][i]) - for key in results: - results[key] = [results[key][i] for i in sorted_indices] - - return results - -def plot_metrics(results_dict, benchmark_type, title, benchmark_dir, model_name): - if not results_dict: - return - - # Create figure with three subplots - fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 6)) - - # Add main title with model name - fig.suptitle(f"{title} - {model_name}", fontsize=20, y=1.02) - - # Define colors for different methods - colors = ['bo-', 'ro-', 'go-', 'mo-', 'co-', 'yo-'] - - # Get QPS range for this benchmark type - qps_min, qps_max = BENCHMARK_QPS_RANGES[benchmark_type] - - # Plot ITL - for i, (method, results) in enumerate(results_dict.items()): - if results: - ax1.plot(results['qps'], results['itl'], colors[i % len(colors)], - linewidth=2, markersize=8, label=METHOD_TYPES[method]) - ax1.set_xlabel('QPS') - ax1.set_ylabel('Average Inter-token Latency (s)') - ax1.set_title('Average Inter-token Latency vs QPS') - ax1.set_xlim(qps_min, qps_max) - ax1.set_ylim(BENCHMARK_Y_RANGES['itl']) - ax1.grid(True) - ax1.legend() - - # Plot TTFT - for i, (method, results) in enumerate(results_dict.items()): - if results: - ax2.plot(results['qps'], results['ttft'], colors[i % len(colors)], - linewidth=2, markersize=8, label=METHOD_TYPES[method]) - ax2.set_xlabel('QPS') - ax2.set_ylabel('Average Time to First Token (s)') - ax2.set_title('Average Time to First Token vs QPS') - ax2.set_xlim(qps_min, qps_max) - ax2.set_ylim(BENCHMARK_Y_RANGES['ttft']) - ax2.grid(True) - ax2.legend() - - # Plot Throughput - for i, (method, results) in enumerate(results_dict.items()): - if results: - ax3.plot(results['qps'], results['throughput'], colors[i % len(colors)], - linewidth=2, markersize=8, label=METHOD_TYPES[method]) - ax3.set_xlabel('QPS') - ax3.set_ylabel('Throughput (tokens/s)') - ax3.set_title('Throughput vs QPS') - ax3.set_xlim(qps_min, qps_max) - ax3.set_ylim(BENCHMARK_Y_RANGES['throughput']) - ax3.grid(True) - ax3.legend() - - # Adjust layout and save - plt.tight_layout() - output_file = os.path.join(os.path.dirname(__file__), f'benchmark_metrics_{benchmark_type}.png') - plt.savefig(output_file, bbox_inches='tight') - plt.close() - print(f"Plot for {title} saved to {output_file}") - -def main(): - # Set up argument parser - parser = argparse.ArgumentParser(description='Plot benchmark metrics from CSV files') - parser.add_argument('--benchmark-dir', - default=os.path.join(os.path.dirname(__file__), '..', 'data', 'k8s', 'lmbenchmark'), - help='Path to the benchmark directory containing the method subdirectories') - parser.add_argument('--model-name', - default='Llama-3.1-8B-Instruct', - help='Name of the model being benchmarked (default: Llama-3.1-8B-Instruct)') - args = parser.parse_args() - - # Process and plot each benchmark type - for benchmark_type, title in BENCHMARK_TYPES.items(): - print(f"\nProcessing {title} benchmark for {args.model_name}...") - results_dict = {} - for method in METHOD_TYPES.keys(): - results = process_csv_files(benchmark_type, method, args.benchmark_dir) - if results: - results_dict[method] = results - plot_metrics(results_dict, benchmark_type, title, args.benchmark_dir, args.model_name) - -if __name__ == "__main__": - main() \ No newline at end of file diff --git a/build/Dockerfile b/build/Dockerfile index 622a5134..297d0918 100644 --- a/build/Dockerfile +++ b/build/Dockerfile @@ -75,13 +75,13 @@ RUN echo "fmperf: ${FM_PERF_REPO} ${FM_PERF_BRANCH}" > /workspace/repos.txt; \ RUN ln -s /usr/bin/sleep /usr/local/bin/sleep -ADD workload/harnesses/ /usr/local/bin/ -COPY workload/report/*.py /usr/local/bin/ -COPY analysis/fmperf-analyze_results.py /usr/local/bin/fmperf-analyze_results.py -COPY analysis/inference-perf-analyze_results.sh /usr/local/bin/inference-perf-analyze_results.sh -COPY analysis/nop-analyze_results.py /usr/local/bin/nop-analyze_results.py -COPY analysis/vllm-benchmark-analyze_results.sh /usr/local/bin/vllm-benchmark-analyze_results.sh -COPY analysis/guidellm-analyze_results.sh /usr/local/bin/guidellm-analyze_results.sh +ADD llm_d_benchmark/workload/harnesses/ /usr/local/bin/ +COPY llm_d_benchmark/workload/report/*.py /usr/local/bin/ +COPY llm_d_benchmark/analysis/fmperf-analyze_results.py /usr/local/bin/fmperf-analyze_results.py +COPY llm_d_benchmark/analysis/inference-perf-analyze_results.sh /usr/local/bin/inference-perf-analyze_results.sh +COPY llm_d_benchmark/analysis/nop-analyze_results.py /usr/local/bin/nop-analyze_results.py +COPY llm_d_benchmark/analysis/vllm-benchmark-analyze_results.sh /usr/local/bin/vllm-benchmark-analyze_results.sh +COPY llm_d_benchmark/analysis/guidellm-analyze_results.sh /usr/local/bin/guidellm-analyze_results.sh # Install requirements for analysis scripts COPY build/requirements-analysis.txt . diff --git a/config_explorer/db.py b/config_explorer/db.py deleted file mode 100644 index 80077605..00000000 --- a/config_explorer/db.py +++ /dev/null @@ -1,42 +0,0 @@ -""" -Mocks DB storing info about common accelerators used for LLM serving and inference -""" - -gpu_specs = { - # https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf - # https://medium.com/@bijit211987/top-nvidia-gpus-for-llm-inference-8a5316184a10 - # https://www.databasemart.com/blog/best-nvidia-gpus-for-llm-inference-2025?srsltid=AfmBOopcvcdN6yzBF24k7_DyRS_csYOmNyDLJK7zq9Rg89weW6AQAx5F - "NVIDIA-H100-80GB-HBM3": { - "memory": 80 - }, - "NVIDIA-A100-40GB": { - "memory": 40 - }, - "NVIDIA-A100-80GB": { - "memory": 80 - }, - "NVIDIA-H100-80GB": { - "memory": 80 - }, - "NVIDIA-L40-40GB": { - "memory": 40 - }, - "NVIDIA-RTX-4090": { - "memory": 24 - }, - "NVIDIA-RTX-5090": { - "memory": 32 - }, - "NVIDIA-RTX-6000":{ - "memory": 48 - }, - "NVIDIA-A6000": { - "memory": 48 - }, - "NVIDIA-A4000": { - "memory": 16 - }, - "NVIDIA-T4": { - "memory": 16 - } -} diff --git a/llm_d_benchmark/__init__.py b/llm_d_benchmark/__init__.py new file mode 100644 index 00000000..c5b53be8 --- /dev/null +++ b/llm_d_benchmark/__init__.py @@ -0,0 +1,5 @@ +""" +llm-d-benchmark: Automated workflow for benchmarking LLM inference using the llm-d stack. +""" + +__version__ = "0.3.0" diff --git a/analysis/README.md b/llm_d_benchmark/analysis/README.md similarity index 100% rename from analysis/README.md rename to llm_d_benchmark/analysis/README.md diff --git a/analysis/analysis_inference_scheduler.ipynb b/llm_d_benchmark/analysis/analysis_inference_scheduler.ipynb similarity index 99% rename from analysis/analysis_inference_scheduler.ipynb rename to llm_d_benchmark/analysis/analysis_inference_scheduler.ipynb index 003daa2a..661295ba 100644 --- a/analysis/analysis_inference_scheduler.ipynb +++ b/llm_d_benchmark/analysis/analysis_inference_scheduler.ipynb @@ -43,17 +43,16 @@ "import sys\n", "from pathlib import Path\n", "\n", - "import matplotlib\n", + "# sys.path.insert(0, '../workload/report/')\n", + "import convert\n", "import matplotlib.pyplot as plt\n", "import pandas\n", - "\n", - "#sys.path.insert(0, '../workload/report/')\n", - "import convert\n", "import schema\n", "\n", "\n", "class Text:\n", " \"\"\"ANSI SGR control codes for text formatting\"\"\"\n", + "\n", " DEFAULT = \"\\x1b[0m\"\n", " BOLD = \"\\x1b[1m\"\n", " BOLD_OFF = \"\\x1b[22m\"\n", @@ -85,7 +84,7 @@ " Args:\n", " mesg (str): Information message.\n", " \"\"\"\n", - " sys.stderr.write(f'{Text.GREEN}{mesg}\\n{Text.DEFAULT}')\n", + " sys.stderr.write(f\"{Text.GREEN}{mesg}\\n{Text.DEFAULT}\")\n", "\n", "\n", "def warn(mesg: str) -> None:\n", @@ -94,7 +93,7 @@ " Args:\n", " mesg (str): Warming message.\n", " \"\"\"\n", - " sys.stderr.write(f'{Text.YELLOW}{mesg}\\n{Text.DEFAULT}')\n", + " sys.stderr.write(f\"{Text.YELLOW}{mesg}\\n{Text.DEFAULT}\")\n", "\n", "\n", "def error(mesg: str, err_code: int = 1) -> None:\n", @@ -104,7 +103,7 @@ " mesg (str): Error message.\n", " err_code (int): Error code.\n", " \"\"\"\n", - " sys.stderr.write(f'{Text.RED}{mesg}\\n{Text.DEFAULT}')\n", + " sys.stderr.write(f\"{Text.RED}{mesg}\\n{Text.DEFAULT}\")\n", " sys.exit(err_code)\n", "\n", "\n", @@ -115,7 +114,7 @@ " dir (str): Directory to check existence of.\n", " \"\"\"\n", " if not os.path.isdir(dir):\n", - " error(f'Invalid path: {dir}')\n", + " error(f\"Invalid path: {dir}\")\n", "\n", "\n", "def check_file(file: str) -> None:\n", @@ -125,7 +124,7 @@ " file (str): File to check existence of.\n", " \"\"\"\n", " if not os.path.isfile(file):\n", - " error(f'Invalid file: {file}')\n", + " error(f\"Invalid file: {file}\")\n", "\n", "\n", "def get_benchmark_report_files(source_dir: str) -> list[str]:\n", @@ -133,7 +132,7 @@ "\n", " Args:\n", " source_dir (str): Directory to recursively search for results files.\n", - " \n", + "\n", " Returns:\n", " list: List of paths to benchmark report files.\n", " \"\"\"\n", @@ -151,35 +150,37 @@ " Returns:\n", " DataFrame: Empty DataFrame for benchmark runs.\n", " \"\"\"\n", - " return pandas.DataFrame(columns=[\n", - " 'Name',\n", - " 'Directory',\n", - " 'Model',\n", - " 'GPU',\n", - " 'ISL',\n", - " 'OSL',\n", - " 'Duration',\n", - " 'Completed',\n", - " 'Request_Throughput',\n", - " 'Output_Token_Throughput',\n", - " 'Total_Token_Throughput',\n", - " 'Mean_TTFT_ms',\n", - " 'Mean_TPOT_ms',\n", - " 'Mean_ITL_ms',\n", - " 'Mean_E2EL_ms',\n", - " 'KV_Cache_Scorer_Weight',\n", - " 'Queue_Scorer_Weight',\n", - " 'Prefix_Cache_Scorer_Weight',\n", - " 'Prefix_Cache_Scorer_Block_Size',\n", - " 'Prefix_Cache_Scorer_LRU_Capacity_Per_Server',\n", - " 'Prefix_Cache_Scorer_Max_Blocks_To_Match',\n", - " 'System_Prompt_Length',\n", - " 'Question_Length',\n", - " 'Approx_OSL',\n", - " 'Groups',\n", - " 'Prompts_Per_Group',\n", - " 'QPS',\n", - " ])\n", + " return pandas.DataFrame(\n", + " columns=[\n", + " \"Name\",\n", + " \"Directory\",\n", + " \"Model\",\n", + " \"GPU\",\n", + " \"ISL\",\n", + " \"OSL\",\n", + " \"Duration\",\n", + " \"Completed\",\n", + " \"Request_Throughput\",\n", + " \"Output_Token_Throughput\",\n", + " \"Total_Token_Throughput\",\n", + " \"Mean_TTFT_ms\",\n", + " \"Mean_TPOT_ms\",\n", + " \"Mean_ITL_ms\",\n", + " \"Mean_E2EL_ms\",\n", + " \"KV_Cache_Scorer_Weight\",\n", + " \"Queue_Scorer_Weight\",\n", + " \"Prefix_Cache_Scorer_Weight\",\n", + " \"Prefix_Cache_Scorer_Block_Size\",\n", + " \"Prefix_Cache_Scorer_LRU_Capacity_Per_Server\",\n", + " \"Prefix_Cache_Scorer_Max_Blocks_To_Match\",\n", + " \"System_Prompt_Length\",\n", + " \"Question_Length\",\n", + " \"Approx_OSL\",\n", + " \"Groups\",\n", + " \"Prompts_Per_Group\",\n", + " \"QPS\",\n", + " ]\n", + " )\n", "\n", "\n", "def _make_name(report: schema.BenchmarkReport) -> str:\n", @@ -191,12 +192,10 @@ " Returns:\n", " str: Name of benchmark run.\n", " \"\"\"\n", - " return 'name'\n", + " return \"name\"\n", "\n", "\n", - "def add_benchmark_report_to_df(\n", - " runs_df: pandas.core.frame.DataFrame,\n", - " br_file: str) -> None:\n", + "def add_benchmark_report_to_df(runs_df: pandas.core.frame.DataFrame, br_file: str) -> None:\n", " \"\"\"Load a results file and add it to the DataFrame of benchmark runs.\n", "\n", " Args:\n", @@ -205,21 +204,21 @@ " \"\"\"\n", " report = convert.import_benchmark_report(br_file)\n", " if not report.scenario.platform.metadata:\n", - " warn(f'Missing scenario.platform.metadata, skipping: {br_file}')\n", + " warn(f\"Missing scenario.platform.metadata, skipping: {br_file}\")\n", " return\n", "\n", " # Get plugin parameters\n", " prefix_cache_scorer_block_size = None\n", " prefix_cache_scorer_lur_capacity_per_server = None\n", " prefix_cacher_scorer_max_blocks_to_match = None\n", - " for plugin in report.scenario.platform.metadata['inferenceScheduler']['plugins']:\n", - " if plugin['type'] == 'prefix-cache-scorer':\n", - " if 'parameters' not in plugin:\n", + " for plugin in report.scenario.platform.metadata[\"inferenceScheduler\"][\"plugins\"]:\n", + " if plugin[\"type\"] == \"prefix-cache-scorer\":\n", + " if \"parameters\" not in plugin:\n", " continue\n", - " prefix_cache_scorer_block_size = plugin['parameters'].get('blockSize', 16)\n", - " prefix_cache_scorer_lur_capacity_per_server = plugin['parameters'].get('lruCapacityPerServer', 31250)\n", - " prefix_cacher_scorer_max_blocks_to_match = plugin['parameters'].get('maxPrefixBlocksToMatch', 256)\n", - " \n", + " prefix_cache_scorer_block_size = plugin[\"parameters\"].get(\"blockSize\", 16)\n", + " prefix_cache_scorer_lur_capacity_per_server = plugin[\"parameters\"].get(\"lruCapacityPerServer\", 31250)\n", + " prefix_cacher_scorer_max_blocks_to_match = plugin[\"parameters\"].get(\"maxPrefixBlocksToMatch\", 256)\n", + "\n", " # Set default weights to zero (disabled)\n", " # TODO: capture other settings for prefix cache scorer\n", " # https://gateway-api-inference-extension.sigs.k8s.io/guides/epp-configuration/prefix-aware/\n", @@ -230,52 +229,56 @@ " # In addition we assume the plugins have not been renamed, and the pluginRef\n", " # is the same as the plugin type.\n", " # https://gateway-api-inference-extension.sigs.k8s.io/guides/epp-configuration/config-text/\n", - " for plugin in report.scenario.platform.metadata['inferenceScheduler']['schedulingProfiles'][0]['plugins']:\n", - " # is the same as the plugin type.\n", - " if plugin['pluginRef'] == 'prefix-cache-scorer':\n", - " prefix_cache_scorer_weight = plugin.get('weight', 1)\n", - " if plugin['pluginRef'] == 'kv-cache-scorer':\n", - " kv_cache_scorer_weight = plugin.get('weight', 1)\n", - " if plugin['pluginRef'] == 'queue-scorer':\n", - " queue_scorer_weight = plugin.get('weight', 1)\n", + " for plugin in report.scenario.platform.metadata[\"inferenceScheduler\"][\"schedulingProfiles\"][0][\"plugins\"]:\n", + " # is the same as the plugin type.\n", + " if plugin[\"pluginRef\"] == \"prefix-cache-scorer\":\n", + " prefix_cache_scorer_weight = plugin.get(\"weight\", 1)\n", + " if plugin[\"pluginRef\"] == \"kv-cache-scorer\":\n", + " kv_cache_scorer_weight = plugin.get(\"weight\", 1)\n", + " if plugin[\"pluginRef\"] == \"queue-scorer\":\n", + " queue_scorer_weight = plugin.get(\"weight\", 1)\n", "\n", " # TODO get this from within benchmark report file\n", - " stage = report.scenario.load.metadata['stage']\n", - " #stage = int(br_file.rsplit('benchmark_report,_stage_')[-1].split('_', 1)[0])\n", + " stage = report.scenario.load.metadata[\"stage\"]\n", + " # stage = int(br_file.rsplit('benchmark_report,_stage_')[-1].split('_', 1)[0])\n", "\n", " # Add row to DataFrame\n", " runs_df.loc[len(runs_df)] = {\n", - " 'Name': _make_name(report),\n", + " \"Name\": _make_name(report),\n", " # We want the base directory for the sweep, which is two levels up\n", - " 'Directory': os.path.abspath(br_file).rsplit(os.sep, 1)[0],\n", - " 'Model': report.scenario.model.name,\n", + " \"Directory\": os.path.abspath(br_file).rsplit(os.sep, 1)[0],\n", + " \"Model\": report.scenario.model.name,\n", " # Assume heterogeneous\n", - " 'GPU': report.scenario.host.accelerator[0].model,\n", + " \"GPU\": report.scenario.host.accelerator[0].model,\n", " # TODO this may need to be configurable...\n", - " 'ISL': int(round(report.metrics.requests.input_length.mean)),\n", - " 'OSL': int(report.metrics.requests.output_length.mean),\n", - " 'Duration': report.metrics.time.duration,\n", - " 'Completed': report.metrics.requests.total,\n", - " 'Request_Throughput': report.metrics.throughput.requests_per_sec,\n", - " 'Output_Token_Throughput': report.metrics.throughput.output_tokens_per_sec,\n", - " 'Total_Token_Throughput': report.metrics.throughput.total_tokens_per_sec,\n", - " 'Mean_TTFT_ms': report.metrics.latency.time_to_first_token.mean * (1000 if report.metrics.latency.time_to_first_token.units == schema.Units.S else 1),\n", - " 'Mean_TPOT_ms': report.metrics.latency.time_per_output_token.mean * (1000 if report.metrics.latency.time_per_output_token.units == schema.Units.S_PER_TOKEN else 1),\n", - " 'Mean_ITL_ms': report.metrics.latency.inter_token_latency.mean * (1000 if report.metrics.latency.inter_token_latency.units == schema.Units.S_PER_TOKEN else 1),\n", - " 'Mean_E2EL_ms': report.metrics.latency.request_latency.mean * (1000 if report.metrics.latency.request_latency.units == schema.Units.S else 1),\n", - " 'KV_Cache_Scorer_Weight': kv_cache_scorer_weight,\n", - " 'Queue_Scorer_Weight': queue_scorer_weight,\n", - " 'Prefix_Cache_Scorer_Weight': prefix_cache_scorer_weight,\n", - " 'Prefix_Cache_Scorer_Block_Size': prefix_cache_scorer_block_size,\n", - " 'Prefix_Cache_Scorer_LRU_Capacity_Per_Server': prefix_cache_scorer_lur_capacity_per_server,\n", - " 'Prefix_Cache_Scorer_Max_Blocks_To_Match': prefix_cacher_scorer_max_blocks_to_match,\n", - " 'System_Prompt_Length': report.scenario.load.args['data']['shared_prefix']['system_prompt_len'],\n", - " 'Question_Length': report.scenario.load.args['data']['shared_prefix']['question_len'],\n", - " 'Approx_OSL': report.scenario.load.args['data']['shared_prefix']['output_len'],\n", - " 'Groups': report.scenario.load.args['data']['shared_prefix']['num_groups'],\n", - " 'Prompts_Per_Group': report.scenario.load.args['data']['shared_prefix']['num_prompts_per_group'],\n", - " 'QPS': report.scenario.load.args['load']['stages'][stage]['rate'],\n", - " }\n" + " \"ISL\": int(round(report.metrics.requests.input_length.mean)),\n", + " \"OSL\": int(report.metrics.requests.output_length.mean),\n", + " \"Duration\": report.metrics.time.duration,\n", + " \"Completed\": report.metrics.requests.total,\n", + " \"Request_Throughput\": report.metrics.throughput.requests_per_sec,\n", + " \"Output_Token_Throughput\": report.metrics.throughput.output_tokens_per_sec,\n", + " \"Total_Token_Throughput\": report.metrics.throughput.total_tokens_per_sec,\n", + " \"Mean_TTFT_ms\": report.metrics.latency.time_to_first_token.mean\n", + " * (1000 if report.metrics.latency.time_to_first_token.units == schema.Units.S else 1),\n", + " \"Mean_TPOT_ms\": report.metrics.latency.time_per_output_token.mean\n", + " * (1000 if report.metrics.latency.time_per_output_token.units == schema.Units.S_PER_TOKEN else 1),\n", + " \"Mean_ITL_ms\": report.metrics.latency.inter_token_latency.mean\n", + " * (1000 if report.metrics.latency.inter_token_latency.units == schema.Units.S_PER_TOKEN else 1),\n", + " \"Mean_E2EL_ms\": report.metrics.latency.request_latency.mean\n", + " * (1000 if report.metrics.latency.request_latency.units == schema.Units.S else 1),\n", + " \"KV_Cache_Scorer_Weight\": kv_cache_scorer_weight,\n", + " \"Queue_Scorer_Weight\": queue_scorer_weight,\n", + " \"Prefix_Cache_Scorer_Weight\": prefix_cache_scorer_weight,\n", + " \"Prefix_Cache_Scorer_Block_Size\": prefix_cache_scorer_block_size,\n", + " \"Prefix_Cache_Scorer_LRU_Capacity_Per_Server\": prefix_cache_scorer_lur_capacity_per_server,\n", + " \"Prefix_Cache_Scorer_Max_Blocks_To_Match\": prefix_cacher_scorer_max_blocks_to_match,\n", + " \"System_Prompt_Length\": report.scenario.load.args[\"data\"][\"shared_prefix\"][\"system_prompt_len\"],\n", + " \"Question_Length\": report.scenario.load.args[\"data\"][\"shared_prefix\"][\"question_len\"],\n", + " \"Approx_OSL\": report.scenario.load.args[\"data\"][\"shared_prefix\"][\"output_len\"],\n", + " \"Groups\": report.scenario.load.args[\"data\"][\"shared_prefix\"][\"num_groups\"],\n", + " \"Prompts_Per_Group\": report.scenario.load.args[\"data\"][\"shared_prefix\"][\"num_prompts_per_group\"],\n", + " \"QPS\": report.scenario.load.args[\"load\"][\"stages\"][stage][\"rate\"],\n", + " }" ] }, { @@ -311,7 +314,7 @@ "\n", "# Populate the runs DataFrame\n", "for sdir in search_dirs:\n", - " info(f'Searching for benchmark report files within {sdir}')\n", + " info(f\"Searching for benchmark report files within {sdir}\")\n", " # Find all benchmark report files in the directory\n", " for br_file in get_benchmark_report_files(sdir):\n", " # info(f'Importing {br_file}')\n", @@ -355,7 +358,16 @@ "# Definitions\n", "################################################################################\n", "\n", - "SCENARIO_COLUMNS = ['Model', 'GPU', 'System_Prompt_Length', 'Question_Length', 'Approx_OSL', 'Groups', 'Prompts_Per_Group']\n", + "SCENARIO_COLUMNS = [\n", + " \"Model\",\n", + " \"GPU\",\n", + " \"System_Prompt_Length\",\n", + " \"Question_Length\",\n", + " \"Approx_OSL\",\n", + " \"Groups\",\n", + " \"Prompts_Per_Group\",\n", + "]\n", + "\n", "\n", "def get_scenarios(runs_df: pandas.core.frame.DataFrame) -> list[tuple[str]]:\n", " \"\"\"Get a list of available scenarios from runs DataFrame.\n", @@ -385,20 +397,21 @@ " spans[jj] = len(str(item))\n", "\n", " # Create header, starting with scenario index\n", - " header = f'{Text.BOLD}{Text.BLUE}IDX {Text.DEFAULT}{Text.BOLD}'\n", + " header = f\"{Text.BOLD}{Text.BLUE}IDX {Text.DEFAULT}{Text.BOLD}\"\n", " # Add each column name to header\n", " for ii, col in enumerate(SCENARIO_COLUMNS):\n", " header += col + \" \" * (spans[ii] - len(col) + 2)\n", - " header += f'{Text.DEFAULT}'\n", + " header += f\"{Text.DEFAULT}\"\n", " print(header)\n", "\n", " # Print details of each scenario\n", " for ii, sc in enumerate(scenarios):\n", - " row = f'{Text.BLUE}{ii}{Text.DEFAULT}' + \" \" * (5 - len(str(ii)))\n", + " row = f\"{Text.BLUE}{ii}{Text.DEFAULT}\" + \" \" * (5 - len(str(ii)))\n", " for jj, val in enumerate(sc):\n", - " row += f'{str(val)}' + \" \" * (spans[jj] - len(str(val)) + 2)\n", + " row += f\"{str(val)}\" + \" \" * (spans[jj] - len(str(val)) + 2)\n", " print(row)\n", "\n", + "\n", "################################################################################\n", "# Execute code\n", "################################################################################\n", @@ -426,11 +439,10 @@ "# Definitions\n", "################################################################################\n", "\n", + "\n", "def plot_scenario(\n", - " runs: pandas.core.frame.DataFrame,\n", - " scenarios: list[tuple[str]],\n", - " idx: int,\n", - " print_tables: bool = False) -> None:\n", + " runs: pandas.core.frame.DataFrame, scenarios: list[tuple[str]], idx: int, print_tables: bool = False\n", + ") -> None:\n", " \"\"\"\n", " Plot inference scheduler scenario as TTFT vs throughput for different\n", " request rates (in queries per second).\n", @@ -443,73 +455,105 @@ " \"\"\"\n", " # Get parameters of selected scenario\n", " model, gpu, prompt_len, q_len, osl, groups, prompts_per_grp = scenarios[idx]\n", - " \n", + "\n", " # Filter on column values\n", " runs_selected = runs[\n", - " (runs['Model'] == model) &\n", - " (runs['GPU'] == gpu) &\n", - " (runs['System_Prompt_Length'] == prompt_len) &\n", - " (runs['Question_Length'] == q_len) &\n", - " (runs['Approx_OSL'] == osl) &\n", - " (runs['Groups'] == groups) &\n", - " (runs['Prompts_Per_Group'] == prompts_per_grp)\n", - " ][[\n", - " 'KV_Cache_Scorer_Weight',\n", - " 'Queue_Scorer_Weight',\n", - " 'Prefix_Cache_Scorer_Weight',\n", - " 'Total_Token_Throughput',\n", - " 'Mean_TTFT_ms',\n", - " 'Mean_TPOT_ms',\n", - " 'QPS']].sort_values(by='Mean_TTFT_ms')\n", - " \n", + " (runs[\"Model\"] == model)\n", + " & (runs[\"GPU\"] == gpu)\n", + " & (runs[\"System_Prompt_Length\"] == prompt_len)\n", + " & (runs[\"Question_Length\"] == q_len)\n", + " & (runs[\"Approx_OSL\"] == osl)\n", + " & (runs[\"Groups\"] == groups)\n", + " & (runs[\"Prompts_Per_Group\"] == prompts_per_grp)\n", + " ][\n", + " [\n", + " \"KV_Cache_Scorer_Weight\",\n", + " \"Queue_Scorer_Weight\",\n", + " \"Prefix_Cache_Scorer_Weight\",\n", + " \"Total_Token_Throughput\",\n", + " \"Mean_TTFT_ms\",\n", + " \"Mean_TPOT_ms\",\n", + " \"QPS\",\n", + " ]\n", + " ].sort_values(by=\"Mean_TTFT_ms\")\n", + "\n", " # Unique configurations of scorer weights\n", " # NOTE: We are assuming plugin parameters in this analysis!\n", - " config_sets = list(set(runs_selected.set_index(['KV_Cache_Scorer_Weight', 'Queue_Scorer_Weight', 'Prefix_Cache_Scorer_Weight']).index))\n", + " config_sets = list(\n", + " set(\n", + " runs_selected.set_index(\n", + " [\"KV_Cache_Scorer_Weight\", \"Queue_Scorer_Weight\", \"Prefix_Cache_Scorer_Weight\"]\n", + " ).index\n", + " )\n", + " )\n", " config_sets.sort()\n", " # Convert the list of sets to a list of dicts, to make code following clearer\n", " configs = []\n", " for conf in config_sets:\n", - " configs.append({\n", - " 'kv': conf[0],\n", - " 'queue': conf[1],\n", - " 'prefix': conf[2],\n", - " })\n", - " \n", + " configs.append(\n", + " {\n", + " \"kv\": conf[0],\n", + " \"queue\": conf[1],\n", + " \"prefix\": conf[2],\n", + " }\n", + " )\n", + "\n", " # Plot performance results\n", - " colors = ['#FF0000', '#FFAA00', '#DDDD00', '#00DD00', '#00FFFF', '#0000FF',\n", - " '#FF00FF', '#666666', '#000000',\n", - " '#990000', '#777700', '#007700', '#009999', '#000099']\n", - " \n", + " colors = [\n", + " \"#FF0000\",\n", + " \"#FFAA00\",\n", + " \"#DDDD00\",\n", + " \"#00DD00\",\n", + " \"#00FFFF\",\n", + " \"#0000FF\",\n", + " \"#FF00FF\",\n", + " \"#666666\",\n", + " \"#000000\",\n", + " \"#990000\",\n", + " \"#777700\",\n", + " \"#007700\",\n", + " \"#009999\",\n", + " \"#000099\",\n", + " ]\n", + "\n", " # Plot TTFT vs throughput across rates for each configuration\n", " for ii, conf in enumerate(configs):\n", - " \n", " # Make a DataFrame for specific configuration\n", " conf_df = runs_selected[\n", - " (runs_selected['KV_Cache_Scorer_Weight'] == conf['kv']) &\n", - " (runs_selected['Queue_Scorer_Weight'] == conf['queue']) &\n", - " (runs_selected['Prefix_Cache_Scorer_Weight'] == conf['prefix'])\n", - " ].sort_values(by='QPS')\n", - " \n", + " (runs_selected[\"KV_Cache_Scorer_Weight\"] == conf[\"kv\"])\n", + " & (runs_selected[\"Queue_Scorer_Weight\"] == conf[\"queue\"])\n", + " & (runs_selected[\"Prefix_Cache_Scorer_Weight\"] == conf[\"prefix\"])\n", + " ].sort_values(by=\"QPS\")\n", + "\n", " # Print table\n", " if print_tables:\n", " display(conf_df)\n", - " \n", + "\n", " # Plot throughputs for configuration\n", - " plt.plot(conf_df.Total_Token_Throughput, conf_df.Mean_TTFT_ms,\n", - " label=f'KV:{conf['kv']} Queue:{conf['queue']} Prefix:{conf['prefix']}',\n", - " marker='o', markersize=4,\n", - " color=colors[ii%len(colors)]\n", - " )\n", + " plt.plot(\n", + " conf_df.Total_Token_Throughput,\n", + " conf_df.Mean_TTFT_ms,\n", + " label=f\"KV:{conf['kv']} Queue:{conf['queue']} Prefix:{conf['prefix']}\",\n", + " marker=\"o\",\n", + " markersize=4,\n", + " color=colors[ii % len(colors)],\n", + " )\n", " for jj, val in enumerate(conf_df.QPS):\n", - " plt.text(list(conf_df.Total_Token_Throughput)[jj],\n", - " list(conf_df.Mean_TTFT_ms)[jj]+runs_selected['Mean_TTFT_ms'].max()*0.02,\n", - " str(val), ha='center', color=colors[ii%len(colors)])\n", - " \n", - " plt.title(f'GPU: {gpu}\\nModel: {model}\\nPrompt Len: {prompt_len} Query Len: {q_len} OSL: {osl}\\nGroups: {groups} Prompts per Group: {prompts_per_grp}')\n", - " plt.xlabel('Total Throughput (Tok/s)', fontsize='16')\n", - " plt.ylabel('Mean TTFT (ms)', fontsize='16')\n", - " plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", - " plt.grid(True, linewidth=1, ls='--', color='gray')\n", + " plt.text(\n", + " list(conf_df.Total_Token_Throughput)[jj],\n", + " list(conf_df.Mean_TTFT_ms)[jj] + runs_selected[\"Mean_TTFT_ms\"].max() * 0.02,\n", + " str(val),\n", + " ha=\"center\",\n", + " color=colors[ii % len(colors)],\n", + " )\n", + "\n", + " plt.title(\n", + " f\"GPU: {gpu}\\nModel: {model}\\nPrompt Len: {prompt_len} Query Len: {q_len} OSL: {osl}\\nGroups: {groups} Prompts per Group: {prompts_per_grp}\"\n", + " )\n", + " plt.xlabel(\"Total Throughput (Tok/s)\", fontsize=\"16\")\n", + " plt.ylabel(\"Mean TTFT (ms)\", fontsize=\"16\")\n", + " plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)\n", + " plt.grid(True, linewidth=1, ls=\"--\", color=\"gray\")\n", " plt.axis([0, None, 0, None])\n", " plt.show()" ] diff --git a/analysis/analysis_pd.ipynb b/llm_d_benchmark/analysis/analysis_pd.ipynb similarity index 95% rename from analysis/analysis_pd.ipynb rename to llm_d_benchmark/analysis/analysis_pd.ipynb index 6f2a5e03..e36797e2 100644 --- a/analysis/analysis_pd.ipynb +++ b/llm_d_benchmark/analysis/analysis_pd.ipynb @@ -45,17 +45,16 @@ "import sys\n", "from pathlib import Path\n", "\n", - "import matplotlib\n", + "# sys.path.insert(0, '../workload/report/')\n", + "import convert\n", "import matplotlib.pyplot as plt\n", "import pandas\n", - "\n", - "#sys.path.insert(0, '../workload/report/')\n", - "import convert\n", "import schema\n", "\n", "\n", "class Text:\n", " \"\"\"ANSI SGR control codes for text formatting\"\"\"\n", + "\n", " DEFAULT = \"\\x1b[0m\"\n", " BOLD = \"\\x1b[1m\"\n", " BOLD_OFF = \"\\x1b[22m\"\n", @@ -87,7 +86,7 @@ " Args:\n", " mesg (str): Information message.\n", " \"\"\"\n", - " sys.stderr.write(f'{Text.GREEN}{mesg}\\n{Text.DEFAULT}')\n", + " sys.stderr.write(f\"{Text.GREEN}{mesg}\\n{Text.DEFAULT}\")\n", "\n", "\n", "def warn(mesg: str) -> None:\n", @@ -96,7 +95,7 @@ " Args:\n", " mesg (str): Warming message.\n", " \"\"\"\n", - " sys.stderr.write(f'{Text.YELLOW}{mesg}\\n{Text.DEFAULT}')\n", + " sys.stderr.write(f\"{Text.YELLOW}{mesg}\\n{Text.DEFAULT}\")\n", "\n", "\n", "def error(mesg: str, err_code: int = 1) -> None:\n", @@ -106,7 +105,7 @@ " mesg (str): Error message.\n", " err_code (int): Error code.\n", " \"\"\"\n", - " sys.stderr.write(f'{Text.RED}{mesg}\\n{Text.DEFAULT}')\n", + " sys.stderr.write(f\"{Text.RED}{mesg}\\n{Text.DEFAULT}\")\n", " sys.exit(err_code)\n", "\n", "\n", @@ -117,7 +116,7 @@ " dir (str): Directory to check existence of.\n", " \"\"\"\n", " if not os.path.isdir(dir):\n", - " error(f'Invalid path: {dir}')\n", + " error(f\"Invalid path: {dir}\")\n", "\n", "\n", "def check_file(file: str) -> None:\n", @@ -127,7 +126,7 @@ " file (str): File to check existence of.\n", " \"\"\"\n", " if not os.path.isfile(file):\n", - " error(f'Invalid file: {file}')\n", + " error(f\"Invalid file: {file}\")\n", "\n", "\n", "def get_benchmark_report_files(source_dir: str) -> list[str]:\n", @@ -135,7 +134,7 @@ "\n", " Args:\n", " source_dir (str): Directory to recursively search for results files.\n", - " \n", + "\n", " Returns:\n", " list: List of paths to benchmark report files.\n", " \"\"\"\n", @@ -153,44 +152,46 @@ " Returns:\n", " DataFrame: Empty DataFrame for benchmark runs.\n", " \"\"\"\n", - " return pandas.DataFrame(columns=[\n", - " 'Name',\n", - " 'Directory',\n", - " 'Model',\n", - " 'GPU',\n", - " 'DP',\n", - " 'TP',\n", - " 'PP',\n", - " 'EP',\n", - " 'Replicas',\n", - " 'P_DP',\n", - " 'P_TP',\n", - " 'P_PP',\n", - " 'P_EP',\n", - " 'P_Replicas',\n", - " 'D_DP',\n", - " 'D_TP',\n", - " 'D_PP',\n", - " 'D_EP',\n", - " 'D_Replicas',\n", - " 'Concurrency',\n", - " 'ISL',\n", - " 'OSL',\n", - " 'Backend',\n", - " 'Duration',\n", - " 'Completed',\n", - " 'Request_Throughput',\n", - " 'Output_Token_Throughput',\n", - " 'Total_Token_Throughput',\n", - " 'Mean_TTFT_ms',\n", - " 'Mean_TPOT_ms',\n", - " 'Mean_ITL_ms',\n", - " 'Mean_E2EL_ms',\n", - " 'Is_PD',\n", - " 'Num_GPUs',\n", - " 'Thpt_per_GPU',\n", - " 'Thpt_per_User',\n", - " ])\n", + " return pandas.DataFrame(\n", + " columns=[\n", + " \"Name\",\n", + " \"Directory\",\n", + " \"Model\",\n", + " \"GPU\",\n", + " \"DP\",\n", + " \"TP\",\n", + " \"PP\",\n", + " \"EP\",\n", + " \"Replicas\",\n", + " \"P_DP\",\n", + " \"P_TP\",\n", + " \"P_PP\",\n", + " \"P_EP\",\n", + " \"P_Replicas\",\n", + " \"D_DP\",\n", + " \"D_TP\",\n", + " \"D_PP\",\n", + " \"D_EP\",\n", + " \"D_Replicas\",\n", + " \"Concurrency\",\n", + " \"ISL\",\n", + " \"OSL\",\n", + " \"Backend\",\n", + " \"Duration\",\n", + " \"Completed\",\n", + " \"Request_Throughput\",\n", + " \"Output_Token_Throughput\",\n", + " \"Total_Token_Throughput\",\n", + " \"Mean_TTFT_ms\",\n", + " \"Mean_TPOT_ms\",\n", + " \"Mean_ITL_ms\",\n", + " \"Mean_E2EL_ms\",\n", + " \"Is_PD\",\n", + " \"Num_GPUs\",\n", + " \"Thpt_per_GPU\",\n", + " \"Thpt_per_User\",\n", + " ]\n", + " )\n", "\n", "\n", "def _get_replicas_and_parallelism(report: schema.BenchmarkReport) -> dict[str, int | None]:\n", @@ -205,49 +206,49 @@ " of None.\n", " \"\"\"\n", " rp = {}\n", - " rp['replicas'] = report.scenario.host.type.count(schema.HostType.REPLICA)\n", - " rp['p_replicas'] = report.scenario.host.type.count(schema.HostType.PREFILL)\n", - " rp['d_replicas'] = report.scenario.host.type.count(schema.HostType.DECODE)\n", - " if rp['replicas'] == 0:\n", - " rp['replicas'] = None\n", - " if rp['p_replicas'] == 0:\n", - " rp['p_replicas'] = None\n", - " if rp['d_replicas'] == 0:\n", - " rp['d_replicas'] = None\n", - " rp['tp'] = None\n", - " rp['dp'] = None\n", - " rp['pp'] = None\n", - " rp['ep'] = None\n", - " rp['p_tp'] = None\n", - " rp['p_dp'] = None\n", - " rp['p_pp'] = None\n", - " rp['p_ep'] = None\n", - " rp['d_tp'] = None\n", - " rp['d_dp'] = None\n", - " rp['d_pp'] = None\n", - " rp['d_ep'] = None\n", - " if rp['replicas']:\n", + " rp[\"replicas\"] = report.scenario.host.type.count(schema.HostType.REPLICA)\n", + " rp[\"p_replicas\"] = report.scenario.host.type.count(schema.HostType.PREFILL)\n", + " rp[\"d_replicas\"] = report.scenario.host.type.count(schema.HostType.DECODE)\n", + " if rp[\"replicas\"] == 0:\n", + " rp[\"replicas\"] = None\n", + " if rp[\"p_replicas\"] == 0:\n", + " rp[\"p_replicas\"] = None\n", + " if rp[\"d_replicas\"] == 0:\n", + " rp[\"d_replicas\"] = None\n", + " rp[\"tp\"] = None\n", + " rp[\"dp\"] = None\n", + " rp[\"pp\"] = None\n", + " rp[\"ep\"] = None\n", + " rp[\"p_tp\"] = None\n", + " rp[\"p_dp\"] = None\n", + " rp[\"p_pp\"] = None\n", + " rp[\"p_ep\"] = None\n", + " rp[\"d_tp\"] = None\n", + " rp[\"d_dp\"] = None\n", + " rp[\"d_pp\"] = None\n", + " rp[\"d_ep\"] = None\n", + " if rp[\"replicas\"]:\n", " # We have a standalone setup\n", - " rp['is_pd'] = False\n", - " rp['tp'] = report.scenario.host.accelerator[0].parallelism.tp\n", - " rp['dp'] = report.scenario.host.accelerator[0].parallelism.dp\n", - " rp['pp'] = report.scenario.host.accelerator[0].parallelism.pp\n", - " rp['ep'] = report.scenario.host.accelerator[0].parallelism.ep\n", + " rp[\"is_pd\"] = False\n", + " rp[\"tp\"] = report.scenario.host.accelerator[0].parallelism.tp\n", + " rp[\"dp\"] = report.scenario.host.accelerator[0].parallelism.dp\n", + " rp[\"pp\"] = report.scenario.host.accelerator[0].parallelism.pp\n", + " rp[\"ep\"] = report.scenario.host.accelerator[0].parallelism.ep\n", " return rp\n", " # We have a P/D setup\n", - " rp['is_pd'] = True\n", + " rp[\"is_pd\"] = True\n", " for ii, accel in enumerate(report.scenario.host.accelerator):\n", - " if report.scenario.host.type[ii] is schema.HostType.PREFILL and not rp['p_tp']:\n", - " rp['p_tp'] = accel.parallelism.tp\n", - " rp['p_dp'] = accel.parallelism.dp\n", - " rp['p_pp'] = accel.parallelism.pp\n", - " rp['p_ep'] = accel.parallelism.ep\n", - " if report.scenario.host.type[ii] is schema.HostType.DECODE and not rp['d_tp']:\n", - " rp['d_tp'] = accel.parallelism.tp\n", - " rp['d_dp'] = accel.parallelism.dp\n", - " rp['d_pp'] = accel.parallelism.pp\n", - " rp['d_ep'] = accel.parallelism.ep\n", - " if rp['p_tp'] and rp['d_tp']:\n", + " if report.scenario.host.type[ii] is schema.HostType.PREFILL and not rp[\"p_tp\"]:\n", + " rp[\"p_tp\"] = accel.parallelism.tp\n", + " rp[\"p_dp\"] = accel.parallelism.dp\n", + " rp[\"p_pp\"] = accel.parallelism.pp\n", + " rp[\"p_ep\"] = accel.parallelism.ep\n", + " if report.scenario.host.type[ii] is schema.HostType.DECODE and not rp[\"d_tp\"]:\n", + " rp[\"d_tp\"] = accel.parallelism.tp\n", + " rp[\"d_dp\"] = accel.parallelism.dp\n", + " rp[\"d_pp\"] = accel.parallelism.pp\n", + " rp[\"d_ep\"] = accel.parallelism.ep\n", + " if rp[\"p_tp\"] and rp[\"d_tp\"]:\n", " break\n", " return rp\n", "\n", @@ -262,17 +263,15 @@ " str: Name of benchmark run, providing replica and parallelism details.\n", " \"\"\"\n", " rp = _get_replicas_and_parallelism(report)\n", - " if rp['replicas']:\n", + " if rp[\"replicas\"]:\n", " # We have a standalone setup\n", - " return f'{rp['replicas']}R TP{rp['tp']}'\n", + " return f\"{rp['replicas']}R TP{rp['tp']}\"\n", " # We have a P/D setup\n", " # TODO we currently assume the only type of parallelism is TP\n", - " return f'{rp['p_replicas']}P TP{rp['p_tp']}, {rp['d_replicas']}D TP{rp['d_tp']}'\n", + " return f\"{rp['p_replicas']}P TP{rp['p_tp']}, {rp['d_replicas']}D TP{rp['d_tp']}\"\n", "\n", "\n", - "def add_benchmark_report_to_df(\n", - " runs_df: pandas.core.frame.DataFrame,\n", - " br_file: str) -> None:\n", + "def add_benchmark_report_to_df(runs_df: pandas.core.frame.DataFrame, br_file: str) -> None:\n", " \"\"\"Load a results file and add it to the DataFrame of benchmark runs.\n", "\n", " Args:\n", @@ -285,67 +284,74 @@ " # TODO getting concurrency is speciffic to each harness, will need\n", " # a way to capture this universally in the report so we don't have to do\n", " # extractions like this\n", - " if report.scenario.load.args and 'max_concurrency' in report.scenario.load.args:\n", + " if report.scenario.load.args and \"max_concurrency\" in report.scenario.load.args:\n", " # vLLM Benchmark\n", - " concurrency = report.scenario.load.args['max_concurrency']\n", - " elif report.scenario.load.args and 'profile' in report.scenario.load.args \\\n", - " and 'measured_concurrencies' in report.scenario.load.args['profile']:\n", + " concurrency = report.scenario.load.args[\"max_concurrency\"]\n", + " elif (\n", + " report.scenario.load.args\n", + " and \"profile\" in report.scenario.load.args\n", + " and \"measured_concurrencies\" in report.scenario.load.args[\"profile\"]\n", + " ):\n", " # GuideLLM\n", - " concurrency = report.scenario.load.args['profile']['measured_concurrencies'][0]\n", + " concurrency = report.scenario.load.args[\"profile\"][\"measured_concurrencies\"][0]\n", " else:\n", " warn('\"Concurrency\" is not defined, setting to 1, \"Thpt_per_User\" and Pareto plots will also be invalid.')\n", " concurrency = 1\n", "\n", " # Calculated columns\n", - " if rp['is_pd']:\n", - " num_gpus = rp['p_tp']*rp['p_replicas'] + rp['d_tp']*rp['d_replicas']\n", + " if rp[\"is_pd\"]:\n", + " num_gpus = rp[\"p_tp\"] * rp[\"p_replicas\"] + rp[\"d_tp\"] * rp[\"d_replicas\"]\n", " else:\n", - " num_gpus = rp['tp']*rp['replicas']\n", - " thpt_per_gpu = report.metrics.throughput.output_tokens_per_sec/num_gpus\n", - " thpt_per_user = report.metrics.throughput.output_tokens_per_sec/concurrency\n", + " num_gpus = rp[\"tp\"] * rp[\"replicas\"]\n", + " thpt_per_gpu = report.metrics.throughput.output_tokens_per_sec / num_gpus\n", + " thpt_per_user = report.metrics.throughput.output_tokens_per_sec / concurrency\n", "\n", " # Add row to DataFrame\n", " runs_df.loc[len(runs_df)] = {\n", - " 'Name': _make_name(report),\n", + " \"Name\": _make_name(report),\n", " # We want the base directory for the sweep, which is two levels up\n", - " 'Directory': os.path.abspath(br_file).rsplit(os.sep, 2)[0],\n", - " 'Model': report.scenario.model.name,\n", + " \"Directory\": os.path.abspath(br_file).rsplit(os.sep, 2)[0],\n", + " \"Model\": report.scenario.model.name,\n", " # Assume heterogeneous across P and D\n", - " 'GPU': report.scenario.host.accelerator[0].model,\n", - " 'DP': rp['dp'],\n", - " 'TP': rp['tp'],\n", - " 'PP': rp['pp'],\n", - " 'EP': rp['ep'],\n", - " 'Replicas': rp['replicas'],\n", - " 'P_DP': rp['p_dp'],\n", - " 'P_TP': rp['p_tp'],\n", - " 'P_PP': rp['p_pp'],\n", - " 'P_EP': rp['p_ep'],\n", - " 'P_Replicas': rp['p_replicas'],\n", - " 'D_DP': rp['d_dp'],\n", - " 'D_TP': rp['d_tp'],\n", - " 'D_PP': rp['d_pp'],\n", - " 'D_EP': rp['d_ep'],\n", - " 'D_Replicas': rp['d_replicas'],\n", - " 'Concurrency': concurrency,\n", + " \"GPU\": report.scenario.host.accelerator[0].model,\n", + " \"DP\": rp[\"dp\"],\n", + " \"TP\": rp[\"tp\"],\n", + " \"PP\": rp[\"pp\"],\n", + " \"EP\": rp[\"ep\"],\n", + " \"Replicas\": rp[\"replicas\"],\n", + " \"P_DP\": rp[\"p_dp\"],\n", + " \"P_TP\": rp[\"p_tp\"],\n", + " \"P_PP\": rp[\"p_pp\"],\n", + " \"P_EP\": rp[\"p_ep\"],\n", + " \"P_Replicas\": rp[\"p_replicas\"],\n", + " \"D_DP\": rp[\"d_dp\"],\n", + " \"D_TP\": rp[\"d_tp\"],\n", + " \"D_PP\": rp[\"d_pp\"],\n", + " \"D_EP\": rp[\"d_ep\"],\n", + " \"D_Replicas\": rp[\"d_replicas\"],\n", + " \"Concurrency\": concurrency,\n", " # TODO this may need to be configurable...\n", " # We need to group by ISL/OSL exactly, so round and convert to int.\n", " # Round ISL to nearest 100's\n", - " 'ISL': int(round(report.metrics.requests.input_length.mean, -2)),\n", - " 'OSL': int(round(report.metrics.requests.output_length.mean, -2)),\n", - " 'Duration': report.metrics.time.duration,\n", - " 'Completed': report.metrics.requests.total,\n", - " 'Request_Throughput': report.metrics.throughput.requests_per_sec,\n", - " 'Output_Token_Throughput': report.metrics.throughput.output_tokens_per_sec,\n", - " 'Total_Token_Throughput': report.metrics.throughput.total_tokens_per_sec,\n", - " 'Mean_TTFT_ms': report.metrics.latency.time_to_first_token.mean * (1000 if report.metrics.latency.time_to_first_token.units == schema.Units.S else 1),\n", - " 'Mean_TPOT_ms': report.metrics.latency.time_per_output_token.mean * (1000 if report.metrics.latency.time_per_output_token.units == schema.Units.S_PER_TOKEN else 1),\n", - " 'Mean_ITL_ms': report.metrics.latency.inter_token_latency.mean * (1000 if report.metrics.latency.inter_token_latency.units == schema.Units.S_PER_TOKEN else 1),\n", - " 'Mean_E2EL_ms': report.metrics.latency.request_latency.mean * (1000 if report.metrics.latency.request_latency.units == schema.Units.S else 1),\n", - " 'Is_PD': rp['is_pd'],\n", - " 'Num_GPUs': num_gpus,\n", - " 'Thpt_per_GPU': thpt_per_gpu,\n", - " 'Thpt_per_User': thpt_per_user,\n", + " \"ISL\": int(round(report.metrics.requests.input_length.mean, -2)),\n", + " \"OSL\": int(round(report.metrics.requests.output_length.mean, -2)),\n", + " \"Duration\": report.metrics.time.duration,\n", + " \"Completed\": report.metrics.requests.total,\n", + " \"Request_Throughput\": report.metrics.throughput.requests_per_sec,\n", + " \"Output_Token_Throughput\": report.metrics.throughput.output_tokens_per_sec,\n", + " \"Total_Token_Throughput\": report.metrics.throughput.total_tokens_per_sec,\n", + " \"Mean_TTFT_ms\": report.metrics.latency.time_to_first_token.mean\n", + " * (1000 if report.metrics.latency.time_to_first_token.units == schema.Units.S else 1),\n", + " \"Mean_TPOT_ms\": report.metrics.latency.time_per_output_token.mean\n", + " * (1000 if report.metrics.latency.time_per_output_token.units == schema.Units.S_PER_TOKEN else 1),\n", + " \"Mean_ITL_ms\": report.metrics.latency.inter_token_latency.mean\n", + " * (1000 if report.metrics.latency.inter_token_latency.units == schema.Units.S_PER_TOKEN else 1),\n", + " \"Mean_E2EL_ms\": report.metrics.latency.request_latency.mean\n", + " * (1000 if report.metrics.latency.request_latency.units == schema.Units.S else 1),\n", + " \"Is_PD\": rp[\"is_pd\"],\n", + " \"Num_GPUs\": num_gpus,\n", + " \"Thpt_per_GPU\": thpt_per_gpu,\n", + " \"Thpt_per_User\": thpt_per_user,\n", " }\n", "\n", "\n", @@ -360,7 +366,7 @@ " list[tuple[str]]: List of scenarios, consisting of unique groups of\n", " model, GPU type, ISL, and OSL.\n", " \"\"\"\n", - " columns = ['Model', 'GPU', 'ISL', 'OSL']\n", + " columns = [\"Model\", \"GPU\", \"ISL\", \"OSL\"]\n", " return list(set(runs_df.set_index(columns).index))\n", "\n", "\n", @@ -370,7 +376,7 @@ " Args:\n", " scenarios (list[tuple[str]]): Scenario groups to print.\n", " \"\"\"\n", - " columns = ['Model', 'GPU', 'ISL', 'OSL']\n", + " columns = [\"Model\", \"GPU\", \"ISL\", \"OSL\"]\n", " # Get maximum text length for each column, including header\n", " spans = list(map(len, columns))\n", " for sc in scenarios:\n", @@ -379,18 +385,18 @@ " spans[jj] = len(str(item))\n", "\n", " # Create header, starting with scenario index\n", - " header = f'{Text.BOLD}{Text.BLUE}IDX {Text.DEFAULT}{Text.BOLD}'\n", + " header = f\"{Text.BOLD}{Text.BLUE}IDX {Text.DEFAULT}{Text.BOLD}\"\n", " # Add each column name to header\n", " for ii, col in enumerate(columns):\n", " header += col + \" \" * (spans[ii] - len(col) + 2)\n", - " header += f'{Text.DEFAULT}'\n", + " header += f\"{Text.DEFAULT}\"\n", " print(header)\n", "\n", " # Print details of each scenario\n", " for ii, sc in enumerate(scenarios):\n", - " row = f'{Text.BLUE}{ii}{Text.DEFAULT}' + \" \" * (5 - len(str(ii)))\n", + " row = f\"{Text.BLUE}{ii}{Text.DEFAULT}\" + \" \" * (5 - len(str(ii)))\n", " for jj, val in enumerate(sc):\n", - " row += f'{str(val)}' + \" \" * (spans[jj] - len(str(val)) + 2)\n", + " row += f\"{str(val)}\" + \" \" * (spans[jj] - len(str(val)) + 2)\n", " print(row)" ] }, @@ -427,10 +433,10 @@ "\n", "# Populate the runs DataFrame\n", "for sdir in search_dirs:\n", - " info(f'Searching for benchmark report files within {sdir}')\n", + " info(f\"Searching for benchmark report files within {sdir}\")\n", " # Find all benchmark report files in the directory\n", " for br_file in get_benchmark_report_files(sdir):\n", - " #info(f'Importing {br_file}')\n", + " # info(f'Importing {br_file}')\n", " # Import the results and add to the runs DataFrame\n", " add_benchmark_report_to_df(runs, br_file)" ] @@ -476,7 +482,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "id": "955501bc-de64-435a-819b-dc04435827ce", "metadata": {}, "outputs": [ @@ -1514,208 +1520,247 @@ "model, gpu, isl, osl = scenarios[idx]\n", "\n", "# Filter on column values\n", - "pd_runs_selected = runs[\n", - " (runs['Model'] == model) &\n", - " (runs['GPU'] == gpu) &\n", - " (runs['ISL'] == isl) &\n", - " (runs['OSL'] == osl) &\n", - " (runs['Is_PD'] == True) ][[\n", - " 'Model',\n", - " 'GPU',\n", - " 'P_TP',\n", - " 'P_Replicas',\n", - " 'D_TP',\n", - " 'D_Replicas',\n", - " 'Concurrency',\n", - " 'ISL',\n", - " 'OSL',\n", - " 'Output_Token_Throughput',\n", - " 'Thpt_per_GPU',\n", - " 'Thpt_per_User',\n", - " 'Directory']].drop('Model', axis=1).drop('GPU', axis=1).drop('ISL', axis=1).drop('OSL', axis=1)#.sort_values(by='Output_Token_Throughput')\n", - "\n", - "sa_runs_selected = runs[\n", - " (runs['Model'] == model) &\n", - " (runs['GPU'] == gpu) &\n", - " (runs['ISL'] == isl) &\n", - " (runs['OSL'] == osl) &\n", - " (runs['Is_PD'] == False) ][[\n", - " 'Model',\n", - " 'GPU',\n", - " 'TP',\n", - " 'Replicas',\n", - " 'Concurrency',\n", - " 'ISL',\n", - " 'OSL',\n", - " 'Output_Token_Throughput',\n", - " 'Thpt_per_GPU',\n", - " 'Thpt_per_User',\n", - " 'Directory']].drop('Model', axis=1).drop('GPU', axis=1).drop('ISL', axis=1).drop('OSL', axis=1)#.sort_values(by='Output_Token_Throughput')\n", + "pd_runs_selected = (\n", + " runs[\n", + " (runs[\"Model\"] == model) & (runs[\"GPU\"] == gpu) & (runs[\"ISL\"] == isl) & (runs[\"OSL\"] == osl) & runs[\"Is_PD\"]\n", + " ][\n", + " [\n", + " \"Model\",\n", + " \"GPU\",\n", + " \"P_TP\",\n", + " \"P_Replicas\",\n", + " \"D_TP\",\n", + " \"D_Replicas\",\n", + " \"Concurrency\",\n", + " \"ISL\",\n", + " \"OSL\",\n", + " \"Output_Token_Throughput\",\n", + " \"Thpt_per_GPU\",\n", + " \"Thpt_per_User\",\n", + " \"Directory\",\n", + " ]\n", + " ]\n", + " .drop(\"Model\", axis=1)\n", + " .drop(\"GPU\", axis=1)\n", + " .drop(\"ISL\", axis=1)\n", + " .drop(\"OSL\", axis=1)\n", + ") # .sort_values(by='Output_Token_Throughput')\n", + "\n", + "\n", + "sa_runs_selected = (\n", + " runs[\n", + " (runs[\"Model\"] == model) & (runs[\"GPU\"] == gpu) & (runs[\"ISL\"] == isl) & (runs[\"OSL\"] == osl) & ~runs[\"Is_PD\"]\n", + " ][\n", + " [\n", + " \"Model\",\n", + " \"GPU\",\n", + " \"TP\",\n", + " \"Replicas\",\n", + " \"Concurrency\",\n", + " \"ISL\",\n", + " \"OSL\",\n", + " \"Output_Token_Throughput\",\n", + " \"Thpt_per_GPU\",\n", + " \"Thpt_per_User\",\n", + " \"Directory\",\n", + " ]\n", + " ]\n", + " .drop(\"Model\", axis=1)\n", + " .drop(\"GPU\", axis=1)\n", + " .drop(\"ISL\", axis=1)\n", + " .drop(\"OSL\", axis=1)\n", + ") # .sort_values(by='Output_Token_Throughput')\n", "\n", "# Plot performance results\n", - "colors = ['#FF0000', '#FFAA00', '#DDDD00', '#00DD00', '#00FFFF', '#0000FF',\n", - " '#FF00FF', '#666666', '#000000',\n", - " '#990000', '#777700', '#007700', '#009999', '#000099']\n", + "colors = [\n", + " \"#FF0000\",\n", + " \"#FFAA00\",\n", + " \"#DDDD00\",\n", + " \"#00DD00\",\n", + " \"#00FFFF\",\n", + " \"#0000FF\",\n", + " \"#FF00FF\",\n", + " \"#606060\",\n", + " \"#00FF00\",\n", + "]\n", "\n", - "# Unique configurations of replicas and TP, described as a tuple\n", - "# Tuple format is (rep, tp, p_rep, p_tp, d_rep, d_tp, dir, is_pd)\n", - "config_sets = []\n", - "if seg_by_dir:\n", - " configs_pd = list(set(pd_runs_selected.set_index(['P_Replicas', 'P_TP', 'D_Replicas', 'D_TP', 'Directory']).index))\n", - " configs_sa = list(set(sa_runs_selected.set_index(['Replicas', 'TP', 'Directory']).index))\n", - " if show_pd:\n", - " for conf in configs_pd:\n", - " config_sets.append((\n", - " 0, # Replicas\n", - " 0, # TP\n", - " conf[0], # P replicas\n", - " conf[1], # P TP\n", - " conf[2], # D replicas\n", - " conf[3], # D TP\n", - " conf[4], # Directory\n", - " True, # Is PD\n", - " ))\n", - " if show_sa:\n", - " for conf in configs_sa:\n", - " config_sets.append((\n", - " conf[0], # Replicas\n", - " conf[1], # TP\n", - " 0, # P replicas\n", - " 0, # P TP\n", - " 0, # D replicas\n", - " 0, # D TP\n", - " conf[2], # Directory\n", - " False # Is PD\n", - " ))\n", - "else:\n", - " pd_runs_selected = pd_runs_selected.drop('Directory', axis=1)\n", - " sa_runs_selected = sa_runs_selected.drop('Directory', axis=1)\n", - " configs_pd = list(set(pd_runs_selected.set_index(['P_Replicas', 'P_TP', 'D_Replicas', 'D_TP']).index))\n", - " configs_sa = list(set(sa_runs_selected.set_index(['Replicas', 'TP']).index))\n", - " if show_pd:\n", - " for conf in configs_pd:\n", - " config_sets.append((\n", - " 0, # Replicas\n", - " 0, # TP\n", - " conf[0], # P replicas\n", - " conf[1], # P TP\n", - " conf[2], # D replicas\n", - " conf[3], # D TP\n", - " 0, # Directory\n", - " True, # Is PD\n", - " ))\n", - " if show_sa:\n", - " for conf in configs_sa:\n", - " config_sets.append((\n", - " conf[0], # Replicas\n", - " conf[1], # TP\n", - " 0, # P replicas\n", - " 0, # P TP\n", - " 0, # D replicas\n", - " 0, # D TP\n", - " 0, # Directory\n", - " False # Is PD\n", - " ))\n", "\n", - "# Sort so prinouts/plots are organized\n", - "config_sets.sort()\n", + "def plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " plot_title,\n", + " metric,\n", + " metric_label,\n", + " log_scale,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + "):\n", + " plt.figure(figsize=(12, 5))\n", "\n", - "# Convert the list of sets to a list of dicts, to make code following clearer\n", - "configs = []\n", - "for conf in config_sets:\n", - " configs.append({\n", - " 'rep': conf[0],\n", - " 'tp': conf[1],\n", - " 'p_rep': conf[2],\n", - " 'p_tp': conf[3],\n", - " 'd_rep': conf[4],\n", - " 'd_tp': conf[5],\n", - " 'dir': conf[6],\n", - " 'is_pd': conf[7],\n", - " })\n", + " color_index = 0\n", "\n", - "if not configs:\n", " if show_pd:\n", - " print('No P/D configurations for this scenario!')\n", - " if show_sa:\n", - " print('No standalone configurations for this scenario!')\n", + " # Get unique configuration combinations\n", + " configs = (\n", + " pd_runs_selected[[\"P_TP\", \"P_Replicas\", \"D_TP\", \"D_Replicas\"]].drop_duplicates().reset_index(drop=True)\n", + " )\n", "\n", - "# Sweep through configurations\n", - "for ii, conf in enumerate(configs):\n", - " is_pd = 'P_TP' in conf\n", - " # Make a DataFrame for specific configuration\n", - " if conf['is_pd']:\n", - " # This configuration is PD\n", - " if seg_by_dir:\n", - " conf_df = pd_runs_selected[\n", - " (pd_runs_selected['P_Replicas'] == conf['p_rep']) &\n", - " (pd_runs_selected['P_TP'] == conf['p_tp']) &\n", - " (pd_runs_selected['D_Replicas'] == conf['d_rep']) &\n", - " (pd_runs_selected['D_TP'] == conf['d_tp']) &\n", - " (pd_runs_selected['Directory'] == conf['dir'])\n", - " ].drop('Directory', axis=1).sort_values(by='Concurrency')\n", - "\n", - " print(pd_runs_selected.iloc[0]['Directory'])\n", - " else:\n", - " conf_df = pd_runs_selected[\n", - " (pd_runs_selected['P_Replicas'] == conf['p_rep']) &\n", - " (pd_runs_selected['P_TP'] == conf['p_tp']) &\n", - " (pd_runs_selected['D_Replicas'] == conf['d_rep']) &\n", - " (pd_runs_selected['D_TP'] == conf['d_tp'])\n", - " ].sort_values(by='Concurrency')\n", - "\n", - " \n", - " # Print table\n", - " display(conf_df)\n", - " \n", - " # Plot throughputs for configuration\n", - " plt.plot(conf_df.Thpt_per_User, conf_df.Thpt_per_GPU,\n", - " label=f'{conf['p_rep']}P-TP{conf['p_tp']} {conf['d_rep']}D-TP{conf['d_tp']}',\n", - " marker='o', markersize=4,\n", - " color=colors[ii%len(colors)]\n", + " for conf_index, conf in configs.iterrows():\n", + " p_tp = conf[\"P_TP\"]\n", + " p_r = conf[\"P_Replicas\"]\n", + " d_tp = conf[\"D_TP\"]\n", + " d_r = conf[\"D_Replicas\"]\n", + "\n", + " if not seg_by_dir:\n", + " selection = pd_runs_selected[\n", + " (pd_runs_selected[\"P_TP\"] == p_tp)\n", + " & (pd_runs_selected[\"P_Replicas\"] == p_r)\n", + " & (pd_runs_selected[\"D_TP\"] == d_tp)\n", + " & (pd_runs_selected[\"D_Replicas\"] == d_r)\n", + " ]\n", + " x_data = selection[\"Concurrency\"]\n", + " y_data = selection[metric]\n", + " plt.plot(\n", + " x_data,\n", + " y_data,\n", + " marker=\"o\",\n", + " linestyle=\"-\",\n", + " label=f\"PD (P={p_r}x{p_tp}, D={d_r}x{d_tp})\",\n", + " color=colors[color_index],\n", + " )\n", + " color_index += 1\n", + "\n", + " if seg_by_dir:\n", + " # Get the ordered list of unique directories in the selection\n", + " # directories = sorted(pd_runs_selected[\"Directory\"].unique())\n", + " directories = (\n", + " pd_runs_selected[\n", + " (pd_runs_selected[\"P_TP\"] == p_tp)\n", + " & (pd_runs_selected[\"P_Replicas\"] == p_r)\n", + " & (pd_runs_selected[\"D_TP\"] == d_tp)\n", + " & (pd_runs_selected[\"D_Replicas\"] == d_r)\n", + " ][\"Directory\"]\n", + " .unique()\n", + " .tolist()\n", " )\n", - " for jj, val in enumerate(conf_df.Concurrency):\n", - " plt.text(list(conf_df.Thpt_per_User)[jj],\n", - " list(conf_df.Thpt_per_GPU)[jj]+pd_runs_selected['Thpt_per_GPU'].max()*0.02,\n", - " str(val), ha='center', color=colors[ii%len(colors)])\n", - " else:\n", - " # This configuration is standalone\n", - " if seg_by_dir:\n", - " conf_df = sa_runs_selected[\n", - " (sa_runs_selected['Replicas'] == conf['rep']) &\n", - " (sa_runs_selected['TP'] == conf['tp']) &\n", - " (sa_runs_selected['Directory'] == conf['dir'])\n", - " ].drop('Directory', axis=1).sort_values(by='Concurrency')\n", "\n", - " print(sa_runs_selected.iloc[0]['Directory'])\n", - " else:\n", - " conf_df = sa_runs_selected[\n", - " (sa_runs_selected['Replicas'] == conf['rep']) &\n", - " (sa_runs_selected['TP'] == conf['tp'])\n", - " ].sort_values(by='Concurrency')\n", + " # Get the traces corresponding to each of the directories\n", + " for dir in directories:\n", + " selection = pd_runs_selected[\n", + " (pd_runs_selected[\"P_TP\"] == p_tp)\n", + " & (pd_runs_selected[\"P_Replicas\"] == p_r)\n", + " & (pd_runs_selected[\"D_TP\"] == d_tp)\n", + " & (pd_runs_selected[\"D_Replicas\"] == d_r)\n", + " & (pd_runs_selected[\"Directory\"] == dir)\n", + " ]\n", + " x_data = selection[\"Concurrency\"]\n", + " y_data = selection[metric]\n", + " plt.plot(\n", + " x_data,\n", + " y_data,\n", + " marker=\"o\",\n", + " linestyle=\"-\",\n", + " label=f\"PD (P={p_r}x{p_tp}, D={d_r}x{d_tp}): {dir}\",\n", + " color=colors[color_index],\n", + " )\n", + " color_index += 1\n", "\n", - " # Print table\n", - " display(conf_df)\n", - " \n", - " # Plot throughputs for configuration\n", - " plt.plot(conf_df.Thpt_per_User, conf_df.Thpt_per_GPU,\n", - " label=f'Replicas: {conf['rep']} TP{conf['tp']}',\n", - " marker='o', markersize=4,\n", - " color=colors[ii%len(colors)]\n", + " if show_sa:\n", + " # Get unique configuration combinations\n", + " configs = sa_runs_selected[[\"TP\", \"Replicas\"]].drop_duplicates().reset_index(drop=True)\n", + "\n", + " for conf_index, conf in configs.iterrows():\n", + " tp = conf[\"TP\"]\n", + " r = conf[\"Replicas\"]\n", + "\n", + " if not seg_by_dir:\n", + " selection = sa_runs_selected[(sa_runs_selected[\"TP\"] == tp) & (sa_runs_selected[\"Replicas\"] == r)]\n", + " x_data = selection[\"Concurrency\"]\n", + " y_data = selection[metric]\n", + " plt.plot(\n", + " x_data,\n", + " y_data,\n", + " marker=\"x\",\n", + " linestyle=\"--\",\n", + " label=f\"SA ({r}x{tp})\",\n", + " color=colors[color_index],\n", + " )\n", + " color_index += 1\n", + "\n", + " if seg_by_dir:\n", + " # Get the ordered list of unique directories in the selection\n", + " # directories = sorted(sa_runs_selected[\"Directory\"].unique())\n", + " directories = (\n", + " sa_runs_selected[(sa_runs_selected[\"TP\"] == tp) & (sa_runs_selected[\"Replicas\"] == r)][\"Directory\"]\n", + " .unique()\n", + " .tolist()\n", " )\n", - " for jj, val in enumerate(conf_df.Concurrency):\n", - " plt.text(list(conf_df.Thpt_per_User)[jj],\n", - " list(conf_df.Thpt_per_GPU)[jj]+sa_runs_selected['Thpt_per_GPU'].max()*0.02,\n", - " str(val), ha='center', color=colors[ii%len(colors)])\n", "\n", - "if configs:\n", - " plt.title(f'GPU: {gpu}\\nModel: {model}\\nISL: {isl} OSL: {osl}')\n", - " plt.xlabel('Tok/s/User', fontsize='16')\n", - " plt.ylabel('Tok/s/GPU', fontsize='16')\n", - " plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", - " plt.grid(True, linewidth=1, ls='--', color='gray')\n", - " plt.axis([0, None, 0, None])\n", - " plt.show()\n" + " # Get the traces corresponding to each of the directories\n", + " for dir in directories:\n", + " selection = sa_runs_selected[\n", + " (sa_runs_selected[\"TP\"] == tp)\n", + " & (sa_runs_selected[\"Replicas\"] == r)\n", + " & (sa_runs_selected[\"Directory\"] == dir)\n", + " ]\n", + " x_data = selection[\"Concurrency\"]\n", + " y_data = selection[metric]\n", + " plt.plot(\n", + " x_data,\n", + " y_data,\n", + " marker=\"x\",\n", + " linestyle=\"--\",\n", + " label=f\"SA ({r}x{tp}): {dir}\",\n", + " color=colors[color_index],\n", + " )\n", + " color_index += 1\n", + "\n", + " plt.xlabel(\"Concurrency\")\n", + " plt.ylabel(metric_label)\n", + " plt.title(plot_title)\n", + " # plt.suptitle(f\"scenario_id = {idx}; {plot_title}; {model}; {gpu}\", size=16)\n", + " plt.legend()\n", + " plt.grid(True)\n", + " if log_scale:\n", + " plt.yscale(\"log\")\n", + " plt.tight_layout()\n", + " plt.show()\n", + "\n", + "\n", + "plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " \"Output Token Throughput\",\n", + " \"Output_Token_Throughput\",\n", + " \"Output token throughput [tokens/s]\",\n", + " True,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + ")\n", + "plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " \"Throughput per GPU\",\n", + " \"Thpt_per_GPU\",\n", + " \"Throughput per GPU [tokens/s]\",\n", + " True,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + ")\n", + "plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " \"Throughput per User\",\n", + " \"Thpt_per_User\",\n", + " \"Throughput per user [tokens/s]\",\n", + " True,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + ")" ] }, { @@ -1746,6 +1791,7 @@ "source": [ "import numpy as np\n", "\n", + "\n", "ttft_max = np.inf\n", "tpot_max = np.inf\n", "thpt_min = 0\n", @@ -1772,19 +1818,15 @@ "model, gpu, isl, osl = scenarios[idx]\n", "\n", "# Filter on column values\n", - "runs_selected = runs[\n", - " (runs['Model'] == model) &\n", - " (runs['GPU'] == gpu) &\n", - " (runs['ISL'] == isl) &\n", - " (runs['OSL'] == osl)\n", - "]\n", + "runs_selected = runs[(runs[\"Model\"] == model) & (runs[\"GPU\"] == gpu) & (runs[\"ISL\"] == isl) & (runs[\"OSL\"] == osl)]\n", "\n", "runs_filtered = runs_selected[\n", - " (runs_selected.Mean_TTFT_ms <= ttft_max) &\n", - " (runs_selected.Mean_TPOT_ms <= tpot_max) &\n", - " (runs_selected.Total_Token_Throughput >= thpt_min)\n", + " (runs_selected.Mean_TTFT_ms <= ttft_max)\n", + " & (runs_selected.Mean_TPOT_ms <= tpot_max)\n", + " & (runs_selected.Total_Token_Throughput >= thpt_min)\n", "]\n", "\n", + "\n", "def get_pareto_front(df: pandas.DataFrame) -> set[int]:\n", " \"\"\"Get indices of rows on Pareto front.\n", "\n", @@ -1805,6 +1847,7 @@ " pareto_set.remove(jj)\n", " return pareto_set\n", "\n", + "\n", "pareto_set = get_pareto_front(runs_filtered)\n", "\n", "# Runs that meet scenario selection, but fail SLOs\n", @@ -1815,30 +1858,39 @@ "runs_pareto_front = runs_filtered[runs_filtered.index.isin(pareto_set)]\n", "\n", "\n", - "plt.plot(runs_pareto_front.Thpt_per_User, runs_pareto_front.Thpt_per_GPU,\n", - " marker='o', markersize=4,\n", - " color='#FF00FF',\n", - " linestyle='',\n", - " label='Pareto front'\n", - " )\n", - "plt.plot(runs_filtered_not_front.Thpt_per_User, runs_filtered_not_front.Thpt_per_GPU,\n", - " marker='o', markersize=4,\n", - " color='#000000',\n", - " linestyle='',\n", - " label='Meets SLOs but non-optimal'\n", - " )\n", - "plt.plot(runs_fails_slo.Thpt_per_User, runs_fails_slo.Thpt_per_GPU,\n", - " marker='o', markersize=4,\n", - " color='#CCCCCC',\n", - " linestyle='',\n", - " label='Fails SLOs'\n", - " )\n", - "plt.title(f'GPU: {gpu}\\nModel: {model}\\nISL: {isl} OSL: {osl}')\n", - "plt.xlabel('Tok/s/User', fontsize='16')\n", - "plt.ylabel('Tok/s/GPU', fontsize='16')\n", - "plt.grid(True, linewidth=1, ls='--', color='gray')\n", + "plt.plot(\n", + " runs_pareto_front.Thpt_per_User,\n", + " runs_pareto_front.Thpt_per_GPU,\n", + " marker=\"o\",\n", + " markersize=4,\n", + " color=\"#FF00FF\",\n", + " linestyle=\"\",\n", + " label=\"Pareto front\",\n", + ")\n", + "plt.plot(\n", + " runs_filtered_not_front.Thpt_per_User,\n", + " runs_filtered_not_front.Thpt_per_GPU,\n", + " marker=\"o\",\n", + " markersize=4,\n", + " color=\"#000000\",\n", + " linestyle=\"\",\n", + " label=\"Meets SLOs but non-optimal\",\n", + ")\n", + "plt.plot(\n", + " runs_fails_slo.Thpt_per_User,\n", + " runs_fails_slo.Thpt_per_GPU,\n", + " marker=\"o\",\n", + " markersize=4,\n", + " color=\"#CCCCCC\",\n", + " linestyle=\"\",\n", + " label=\"Fails SLOs\",\n", + ")\n", + "plt.title(f\"GPU: {gpu}\\nModel: {model}\\nISL: {isl} OSL: {osl}\")\n", + "plt.xlabel(\"Tok/s/User\", fontsize=\"16\")\n", + "plt.ylabel(\"Tok/s/GPU\", fontsize=\"16\")\n", + "plt.grid(True, linewidth=1, ls=\"--\", color=\"gray\")\n", "plt.axis([0, None, 0, None])\n", - "plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", + "plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)\n", "plt.show()" ] }, @@ -2106,7 +2158,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "id": "27b95498-4d35-466e-8ee3-67c4f15fca01", "metadata": {}, "outputs": [ @@ -2152,10 +2204,6 @@ } ], "source": [ - "################################################################################\n", - "# User inputs\n", - "################################################################################\n", - "\n", "# Select scenario\n", "idx = 0\n", "\n", @@ -2177,382 +2225,371 @@ "\n", "# Filter on column values\n", "pd_runs_selected = runs[\n", - " (runs['Model'] == model) &\n", - " (runs['GPU'] == gpu) &\n", - " (runs['ISL'] == isl) &\n", - " (runs['OSL'] == osl) &\n", - " (runs['Is_PD'] == True) ]\n", + " (runs[\"Model\"] == model) & (runs[\"GPU\"] == gpu) & (runs[\"ISL\"] == isl) & (runs[\"OSL\"] == osl) & runs[\"Is_PD\"]\n", + "]\n", "\n", "sa_runs_selected = runs[\n", - " (runs['Model'] == model) &\n", - " (runs['GPU'] == gpu) &\n", - " (runs['ISL'] == isl) &\n", - " (runs['OSL'] == osl) &\n", - " (runs['Is_PD'] == False) ]\n", + " (runs[\"Model\"] == model) & (runs[\"GPU\"] == gpu) & (runs[\"ISL\"] == isl) & (runs[\"OSL\"] == osl) & ~runs[\"Is_PD\"]\n", + "]\n", "\n", "# Plot performance results\n", - "colors = ['#FF0000', '#FFAA00', '#DDDD00', '#00DD00', '#00FFFF', '#0000FF',\n", - " '#FF00FF', '#666666', '#000000',\n", - " '#990000', '#777700', '#007700', '#009999', '#000099']\n", + "colors = [\n", + " \"#FF0000\",\n", + " \"#FFAA00\",\n", + " \"#DDDD00\",\n", + " \"#00DD00\",\n", + " \"#00FFFF\",\n", + " \"#0000FF\",\n", + " \"#FF00FF\",\n", + " \"#666666\",\n", + " \"#000000\",\n", + " \"#990000\",\n", + " \"#777700\",\n", + " \"#007700\",\n", + " \"#009999\",\n", + " \"#000099\",\n", + " \"#990099\",\n", + " \"#007777\",\n", + " \"#770077\",\n", + "]\n", "\n", - "# Unique configurations of replicas and TP, described as a tuple\n", - "# Tuple format is (rep, tp, p_rep, p_tp, d_rep, d_tp, dir, is_pd)\n", - "config_sets = []\n", - "if seg_by_dir:\n", - " configs_pd = list(set(pd_runs_selected.set_index(['P_Replicas', 'P_TP', 'D_Replicas', 'D_TP', 'Directory']).index))\n", - " configs_sa = list(set(sa_runs_selected.set_index(['Replicas', 'TP', 'Directory']).index))\n", - " if show_pd:\n", - " for conf in configs_pd:\n", - " config_sets.append((\n", - " 0, # Replicas\n", - " 0, # TP\n", - " conf[0], # P replicas\n", - " conf[1], # P TP\n", - " conf[2], # D replicas\n", - " conf[3], # D TP\n", - " conf[4], # Directory\n", - " True, # Is PD\n", - " ))\n", - " if show_sa:\n", - " for conf in configs_sa:\n", - " config_sets.append((\n", - " conf[0], # Replicas\n", - " conf[1], # TP\n", - " 0, # P replicas\n", - " 0, # P TP\n", - " 0, # D replicas\n", - " 0, # D TP\n", - " conf[2], # Directory\n", - " False # Is PD\n", - " ))\n", - "else:\n", - " pd_runs_selected = pd_runs_selected.drop('Directory', axis=1)\n", - " sa_runs_selected = sa_runs_selected.drop('Directory', axis=1)\n", - " configs_pd = list(set(pd_runs_selected.set_index(['P_Replicas', 'P_TP', 'D_Replicas', 'D_TP']).index))\n", - " configs_sa = list(set(sa_runs_selected.set_index(['Replicas', 'TP']).index))\n", - " if show_pd:\n", - " for conf in configs_pd:\n", - " config_sets.append((\n", - " 0, # Replicas\n", - " 0, # TP\n", - " conf[0], # P replicas\n", - " conf[1], # P TP\n", - " conf[2], # D replicas\n", - " conf[3], # D TP\n", - " 0, # Directory\n", - " True, # Is PD\n", - " ))\n", - " if show_sa:\n", - " for conf in configs_sa:\n", - " config_sets.append((\n", - " conf[0], # Replicas\n", - " conf[1], # TP\n", - " 0, # P replicas\n", - " 0, # P TP\n", - " 0, # D replicas\n", - " 0, # D TP\n", - " 0, # Directory\n", - " False # Is PD\n", - " ))\n", "\n", - "# Sort so prinouts/plots are organized\n", - "config_sets.sort()\n", + "def plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " plot_title,\n", + " metric,\n", + " metric_label,\n", + " log_scale,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + "):\n", + " plt.figure(figsize=(14, 6))\n", "\n", - "# Convert the list of sets to a list of dicts, to make code following clearer\n", - "configs = []\n", - "for conf in config_sets:\n", - " configs.append({\n", - " 'rep': conf[0],\n", - " 'tp': conf[1],\n", - " 'p_rep': conf[2],\n", - " 'p_tp': conf[3],\n", - " 'd_rep': conf[4],\n", - " 'd_tp': conf[5],\n", - " 'dir': conf[6],\n", - " 'is_pd': conf[7],\n", - " })\n", + " color_index = 0\n", "\n", - "if not configs:\n", " if show_pd:\n", - " print('No P/D configurations for this scenario!')\n", - " if show_sa:\n", - " print('No standalone configurations for this scenario!')\n", - "\n", - "################################################################################\n", + " # Get unique configuration combinations\n", + " configs = (\n", + " pd_runs_selected[[\"P_TP\", \"P_Replicas\", \"D_TP\", \"D_Replicas\"]].drop_duplicates().reset_index(drop=True)\n", + " )\n", "\n", - "# Plot total throughput vs TTFT\n", - "# Sweep through configurations\n", - "for ii, conf in enumerate(configs):\n", - " is_pd = 'P_TP' in conf\n", - " # Make a DataFrame for specific configuration\n", - " if conf['is_pd']:\n", - " # This configuration is PD\n", - " if seg_by_dir:\n", - " conf_df = pd_runs_selected[\n", - " (pd_runs_selected['P_Replicas'] == conf['p_rep']) &\n", - " (pd_runs_selected['P_TP'] == conf['p_tp']) &\n", - " (pd_runs_selected['D_Replicas'] == conf['d_rep']) &\n", - " (pd_runs_selected['D_TP'] == conf['d_tp']) &\n", - " (pd_runs_selected['Directory'] == conf['dir'])\n", - " ].drop('Directory', axis=1).sort_values(by='Concurrency')\n", - " else:\n", - " conf_df = pd_runs_selected[\n", - " (pd_runs_selected['P_Replicas'] == conf['p_rep']) &\n", - " (pd_runs_selected['P_TP'] == conf['p_tp']) &\n", - " (pd_runs_selected['D_Replicas'] == conf['d_rep']) &\n", - " (pd_runs_selected['D_TP'] == conf['d_tp'])\n", - " ].sort_values(by='Concurrency')\n", - " \n", - " # Plot throughputs for configuration\n", - " plt.semilogx(conf_df.Mean_TTFT_ms, conf_df.Total_Token_Throughput,\n", - " label=f'{conf['p_rep']}P-TP{conf['p_tp']} {conf['d_rep']}D-TP{conf['d_tp']}',\n", - " marker='o', markersize=4,\n", - " color=colors[ii%len(colors)]\n", + " for conf_index, conf in configs.iterrows():\n", + " p_tp = conf[\"P_TP\"]\n", + " p_r = conf[\"P_Replicas\"]\n", + " d_tp = conf[\"D_TP\"]\n", + " d_r = conf[\"D_Replicas\"]\n", + "\n", + " if not seg_by_dir:\n", + " selection = pd_runs_selected[\n", + " (pd_runs_selected[\"P_TP\"] == p_tp)\n", + " & (pd_runs_selected[\"P_Replicas\"] == p_r)\n", + " & (pd_runs_selected[\"D_TP\"] == d_tp)\n", + " & (pd_runs_selected[\"D_Replicas\"] == d_r)\n", + " ]\n", + " x_data = selection[\"Concurrency\"]\n", + " y_data = selection[metric]\n", + " plt.plot(\n", + " x_data,\n", + " y_data,\n", + " marker=\"o\",\n", + " linestyle=\"-\",\n", + " label=f\"PD (P={p_r}x{p_tp}, D={d_r}x{d_tp})\",\n", + " color=colors[color_index],\n", + " )\n", + " color_index += 1\n", + "\n", + " if seg_by_dir:\n", + " # Get the ordered list of unique directories in the selection\n", + " # directories = sorted(pd_runs_selected[\"Directory\"].unique())\n", + " directories = (\n", + " pd_runs_selected[\n", + " (pd_runs_selected[\"P_TP\"] == p_tp)\n", + " & (pd_runs_selected[\"P_Replicas\"] == p_r)\n", + " & (pd_runs_selected[\"D_TP\"] == d_tp)\n", + " & (pd_runs_selected[\"D_Replicas\"] == d_r)\n", + " ][\"Directory\"]\n", + " .unique()\n", + " .tolist()\n", " )\n", - " for jj, val in enumerate(conf_df.Concurrency):\n", - " plt.text(list(conf_df.Mean_TTFT_ms)[jj],\n", - " list(conf_df.Total_Token_Throughput)[jj]+pd_runs_selected['Thpt_per_GPU'].max()*0.02,\n", - " str(val), ha='center', color=colors[ii%len(colors)])\n", - " else:\n", - " # This configuration is standalone\n", - " if seg_by_dir:\n", - " conf_df = sa_runs_selected[\n", - " (sa_runs_selected['Replicas'] == conf['rep']) &\n", - " (sa_runs_selected['TP'] == conf['tp']) &\n", - " (sa_runs_selected['Directory'] == conf['dir'])\n", - " ].drop('Directory', axis=1).sort_values(by='Concurrency')\n", - " else:\n", - " conf_df = sa_runs_selected[\n", - " (sa_runs_selected['Replicas'] == conf['rep']) &\n", - " (sa_runs_selected['TP'] == conf['tp'])\n", - " ].sort_values(by='Concurrency')\n", "\n", - " # Plot throughputs for configuration\n", - " plt.semilogx(conf_df.Mean_TTFT_ms, conf_df.Total_Token_Throughput,\n", - " label=f'Replicas: {conf['rep']} TP{conf['tp']}',\n", - " marker='o', markersize=4,\n", - " color=colors[ii%len(colors)]\n", + " # Get the traces corresponding to each of the directories\n", + " for dir in directories:\n", + " selection = pd_runs_selected[\n", + " (pd_runs_selected[\"P_TP\"] == p_tp)\n", + " & (pd_runs_selected[\"P_Replicas\"] == p_r)\n", + " & (pd_runs_selected[\"D_TP\"] == d_tp)\n", + " & (pd_runs_selected[\"D_Replicas\"] == d_r)\n", + " & (pd_runs_selected[\"Directory\"] == dir)\n", + " ]\n", + " x_data = selection[\"Concurrency\"]\n", + " y_data = selection[metric]\n", + " plt.plot(\n", + " x_data,\n", + " y_data,\n", + " marker=\"o\",\n", + " linestyle=\"-\",\n", + " label=f\"PD (P={p_r}x{p_tp}, D={d_r}x{d_tp}): {dir}\",\n", + " color=colors[color_index],\n", + " )\n", + " color_index += 1\n", + "\n", + " if show_sa:\n", + " # Get unique configuration combinations\n", + " configs = sa_runs_selected[[\"TP\", \"Replicas\"]].drop_duplicates().reset_index(drop=True)\n", + "\n", + " for conf_index, conf in configs.iterrows():\n", + " tp = conf[\"TP\"]\n", + " r = conf[\"Replicas\"]\n", + "\n", + " if not seg_by_dir:\n", + " selection = sa_runs_selected[(sa_runs_selected[\"TP\"] == tp) & (sa_runs_selected[\"Replicas\"] == r)]\n", + " x_data = selection[\"Concurrency\"]\n", + " y_data = selection[metric]\n", + " plt.plot(\n", + " x_data,\n", + " y_data,\n", + " marker=\"x\",\n", + " linestyle=\"--\",\n", + " label=f\"SA ({r}x{tp})\",\n", + " color=colors[color_index],\n", + " )\n", + " color_index += 1\n", + "\n", + " if seg_by_dir:\n", + " # Get the ordered list of unique directories in the selection\n", + " # directories = sorted(sa_runs_selected[\"Directory\"].unique())\n", + " directories = (\n", + " sa_runs_selected[(sa_runs_selected[\"TP\"] == tp) & (sa_runs_selected[\"Replicas\"] == r)][\"Directory\"]\n", + " .unique()\n", + " .tolist()\n", " )\n", - " for jj, val in enumerate(conf_df.Concurrency):\n", - " plt.text(list(conf_df.Mean_TTFT_ms)[jj],\n", - " list(conf_df.Total_Token_Throughput)[jj]+sa_runs_selected['Total_Token_Throughput'].max()*0.02,\n", - " str(val), ha='center', color=colors[ii%len(colors)])\n", "\n", - "if configs:\n", - " plt.title(f'Throughput vs Latency\\nGPU: {gpu}\\nModel: {model}\\nISL: {isl} OSL: {osl}')\n", - " plt.xlabel('Mean TTFT (ms)', fontsize='16')\n", - " plt.ylabel('Total Throughput (Tok/s)', fontsize='16')\n", - " plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", - " plt.grid(True, linewidth=1, ls='--', color='gray')\n", - " plt.axis([None, None, 0, None])\n", + " # Get the traces corresponding to each of the directories\n", + " for dir in directories:\n", + " selection = sa_runs_selected[\n", + " (sa_runs_selected[\"TP\"] == tp)\n", + " & (sa_runs_selected[\"Replicas\"] == r)\n", + " & (sa_runs_selected[\"Directory\"] == dir)\n", + " ]\n", + " x_data = selection[\"Concurrency\"]\n", + " y_data = selection[metric]\n", + " plt.plot(\n", + " x_data,\n", + " y_data,\n", + " marker=\"x\",\n", + " linestyle=\"--\",\n", + " label=f\"SA ({r}x{tp}): {dir}\",\n", + " color=colors[color_index],\n", + " )\n", + " color_index += 1\n", + "\n", + " plt.xlabel(\"Concurrency\")\n", + " plt.ylabel(metric_label)\n", + " plt.title(plot_title)\n", + " # plt.suptitle(f\"scenario_id = {idx}; {plot_title}; {model}; {gpu}\", size=16)\n", + " plt.legend()\n", + " plt.grid(True)\n", + " if log_scale:\n", + " plt.yscale(\"log\")\n", + " plt.tight_layout()\n", " plt.show()\n", "\n", - "################################################################################\n", "\n", - "# Plot output throughput vs TTFT\n", - "# Sweep through configurations\n", - "for ii, conf in enumerate(configs):\n", - " is_pd = 'P_TP' in conf\n", - " # Make a DataFrame for specific configuration\n", - " if conf['is_pd']:\n", - " # This configuration is PD\n", - " if seg_by_dir:\n", - " conf_df = pd_runs_selected[\n", - " (pd_runs_selected['P_Replicas'] == conf['p_rep']) &\n", - " (pd_runs_selected['P_TP'] == conf['p_tp']) &\n", - " (pd_runs_selected['D_Replicas'] == conf['d_rep']) &\n", - " (pd_runs_selected['D_TP'] == conf['d_tp']) &\n", - " (pd_runs_selected['Directory'] == conf['dir'])\n", - " ].drop('Directory', axis=1).sort_values(by='Concurrency')\n", - " else:\n", - " conf_df = pd_runs_selected[\n", - " (pd_runs_selected['P_Replicas'] == conf['p_rep']) &\n", - " (pd_runs_selected['P_TP'] == conf['p_tp']) &\n", - " (pd_runs_selected['D_Replicas'] == conf['d_rep']) &\n", - " (pd_runs_selected['D_TP'] == conf['d_tp'])\n", - " ].sort_values(by='Concurrency')\n", - " \n", - " # Plot throughputs for configuration\n", - " plt.semilogx(conf_df.Mean_TTFT_ms, conf_df.Output_Token_Throughput,\n", - " label=f'{conf['p_rep']}P-TP{conf['p_tp']} {conf['d_rep']}D-TP{conf['d_tp']}',\n", - " marker='o', markersize=4,\n", - " color=colors[ii%len(colors)]\n", - " )\n", - " for jj, val in enumerate(conf_df.Concurrency):\n", - " plt.text(list(conf_df.Mean_TTFT_ms)[jj],\n", - " list(conf_df.Output_Token_Throughput)[jj]+pd_runs_selected['Thpt_per_GPU'].max()*0.02,\n", - " str(val), ha='center', color=colors[ii%len(colors)])\n", - " else:\n", - " # This configuration is standalone\n", - " if seg_by_dir:\n", - " conf_df = sa_runs_selected[\n", - " (sa_runs_selected['Replicas'] == conf['rep']) &\n", - " (sa_runs_selected['TP'] == conf['tp']) &\n", - " (sa_runs_selected['Directory'] == conf['dir'])\n", - " ].drop('Directory', axis=1).sort_values(by='Concurrency')\n", - " else:\n", - " conf_df = sa_runs_selected[\n", - " (sa_runs_selected['Replicas'] == conf['rep']) &\n", - " (sa_runs_selected['TP'] == conf['tp'])\n", - " ].sort_values(by='Concurrency')\n", + "################################################################################\n", + "# QPS plots\n", + "################################################################################\n", "\n", - " # Plot throughputs for configuration\n", - " plt.semilogx(conf_df.Mean_TTFT_ms, conf_df.Output_Token_Throughput,\n", - " label=f'Replicas: {conf['rep']} TP{conf['tp']}',\n", - " marker='o', markersize=4,\n", - " color=colors[ii%len(colors)]\n", - " )\n", - " for jj, val in enumerate(conf_df.Concurrency):\n", - " plt.text(list(conf_df.Mean_TTFT_ms)[jj],\n", - " list(conf_df.Output_Token_Throughput)[jj]+sa_runs_selected['Output_Token_Throughput'].max()*0.02,\n", - " str(val), ha='center', color=colors[ii%len(colors)])\n", + "plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " \"QPS\",\n", + " \"QPS\",\n", + " \"Queries per Second [queries/s]\",\n", + " True,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + ")\n", "\n", - "if configs:\n", - " plt.title(f'Throughput vs Latency\\nGPU: {gpu}\\nModel: {model}\\nISL: {isl} OSL: {osl}')\n", - " plt.xlabel('Mean TTFT (ms)', fontsize='16')\n", - " plt.ylabel('Output Throughput (Tok/s)', fontsize='16')\n", - " plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", - " plt.grid(True, linewidth=1, ls='--', color='gray')\n", - " plt.axis([None, None, 0, None])\n", - " plt.show()\n", "\n", "################################################################################\n", + "# TTFT plots\n", + "################################################################################\n", "\n", - "# Plot total throughput vs TPOT\n", - "# Sweep through configurations\n", - "for ii, conf in enumerate(configs):\n", - " is_pd = 'P_TP' in conf\n", - " # Make a DataFrame for specific configuration\n", - " if conf['is_pd']:\n", - " # This configuration is PD\n", - " if seg_by_dir:\n", - " conf_df = pd_runs_selected[\n", - " (pd_runs_selected['P_Replicas'] == conf['p_rep']) &\n", - " (pd_runs_selected['P_TP'] == conf['p_tp']) &\n", - " (pd_runs_selected['D_Replicas'] == conf['d_rep']) &\n", - " (pd_runs_selected['D_TP'] == conf['d_tp']) &\n", - " (pd_runs_selected['Directory'] == conf['dir'])\n", - " ].drop('Directory', axis=1).sort_values(by='Concurrency')\n", - " else:\n", - " conf_df = pd_runs_selected[\n", - " (pd_runs_selected['P_Replicas'] == conf['p_rep']) &\n", - " (pd_runs_selected['P_TP'] == conf['p_tp']) &\n", - " (pd_runs_selected['D_Replicas'] == conf['d_rep']) &\n", - " (pd_runs_selected['D_TP'] == conf['d_tp'])\n", - " ].sort_values(by='Concurrency')\n", - " \n", - " # Plot throughputs for configuration\n", - " plt.semilogx(conf_df.Mean_TPOT_ms, conf_df.Total_Token_Throughput,\n", - " label=f'{conf['p_rep']}P-TP{conf['p_tp']} {conf['d_rep']}D-TP{conf['d_tp']}',\n", - " marker='o', markersize=4,\n", - " color=colors[ii%len(colors)]\n", - " )\n", - " for jj, val in enumerate(conf_df.Concurrency):\n", - " plt.text(list(conf_df.Mean_TPOT_ms)[jj],\n", - " list(conf_df.Total_Token_Throughput)[jj]+pd_runs_selected['Thpt_per_GPU'].max()*0.02,\n", - " str(val), ha='center', color=colors[ii%len(colors)])\n", - " else:\n", - " # This configuration is standalone\n", - " if seg_by_dir:\n", - " conf_df = sa_runs_selected[\n", - " (sa_runs_selected['Replicas'] == conf['rep']) &\n", - " (sa_runs_selected['TP'] == conf['tp']) &\n", - " (sa_runs_selected['Directory'] == conf['dir'])\n", - " ].drop('Directory', axis=1).sort_values(by='Concurrency')\n", - " else:\n", - " conf_df = sa_runs_selected[\n", - " (sa_runs_selected['Replicas'] == conf['rep']) &\n", - " (sa_runs_selected['TP'] == conf['tp'])\n", - " ].sort_values(by='Concurrency')\n", + "plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " \"TTFT avg\",\n", + " \"TTFT_avg\",\n", + " \"TTFT avg [ms]\",\n", + " False,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + ")\n", + "plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " \"TTFT p99\",\n", + " \"TTFT_p99\",\n", + " \"TTFT p99 [ms]\",\n", + " False,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + ")\n", + "plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " \"TTFT stdev\",\n", + " \"TTFT_stdev\",\n", + " \"TTFT stdev [ms]\",\n", + " False,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + ")\n", "\n", - " # Plot throughputs for configuration\n", - " plt.semilogx(conf_df.Mean_TPOT_ms, conf_df.Total_Token_Throughput,\n", - " label=f'Replicas: {conf['rep']} TP{conf['tp']}',\n", - " marker='o', markersize=4,\n", - " color=colors[ii%len(colors)]\n", - " )\n", - " for jj, val in enumerate(conf_df.Concurrency):\n", - " plt.text(list(conf_df.Mean_TPOT_ms)[jj],\n", - " list(conf_df.Total_Token_Throughput)[jj]+sa_runs_selected['Total_Token_Throughput'].max()*0.02,\n", - " str(val), ha='center', color=colors[ii%len(colors)])\n", "\n", - "if configs:\n", - " plt.title(f'Throughput vs Latency\\nGPU: {gpu}\\nModel: {model}\\nISL: {isl} OSL: {osl}')\n", - " plt.xlabel('Mean TPOT (ms)', fontsize='16')\n", - " plt.ylabel('Total Throughput (Tok/s)', fontsize='16')\n", - " plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", - " plt.grid(True, linewidth=1, ls='--', color='gray')\n", - " plt.axis([None, None, 0, None])\n", - " plt.show()\n", + "################################################################################\n", + "# TPOT plots\n", + "################################################################################\n", + "\n", + "plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " \"TPOT avg\",\n", + " \"TPOT_avg\",\n", + " \"TPOT avg [ms/token]\",\n", + " False,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + ")\n", + "plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " \"TPOT p99\",\n", + " \"TPOT_p99\",\n", + " \"TPOT p99 [ms/token]\",\n", + " False,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + ")\n", + "plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " \"TPOT stdev\",\n", + " \"TPOT_stdev\",\n", + " \"TPOT stdev [ms/token]\",\n", + " False,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + ")\n", + "\n", "\n", "################################################################################\n", + "# ITL plots\n", + "################################################################################\n", "\n", - "# Plot output throughput vs TPOT\n", - "# Sweep through configurations\n", - "for ii, conf in enumerate(configs):\n", - " is_pd = 'P_TP' in conf\n", - " # Make a DataFrame for specific configuration\n", - " if conf['is_pd']:\n", - " # This configuration is PD\n", - " if seg_by_dir:\n", - " conf_df = pd_runs_selected[\n", - " (pd_runs_selected['P_Replicas'] == conf['p_rep']) &\n", - " (pd_runs_selected['P_TP'] == conf['p_tp']) &\n", - " (pd_runs_selected['D_Replicas'] == conf['d_rep']) &\n", - " (pd_runs_selected['D_TP'] == conf['d_tp']) &\n", - " (pd_runs_selected['Directory'] == conf['dir'])\n", - " ].drop('Directory', axis=1).sort_values(by='Concurrency')\n", - " else:\n", - " conf_df = pd_runs_selected[\n", - " (pd_runs_selected['P_Replicas'] == conf['p_rep']) &\n", - " (pd_runs_selected['P_TP'] == conf['p_tp']) &\n", - " (pd_runs_selected['D_Replicas'] == conf['d_rep']) &\n", - " (pd_runs_selected['D_TP'] == conf['d_tp'])\n", - " ].sort_values(by='Concurrency')\n", - " \n", - " # Plot throughputs for configuration\n", - " plt.semilogx(conf_df.Mean_TPOT_ms, conf_df.Output_Token_Throughput,\n", - " label=f'{conf['p_rep']}P-TP{conf['p_tp']} {conf['d_rep']}D-TP{conf['d_tp']}',\n", - " marker='o', markersize=4,\n", - " color=colors[ii%len(colors)]\n", - " )\n", - " for jj, val in enumerate(conf_df.Concurrency):\n", - " plt.text(list(conf_df.Mean_TPOT_ms)[jj],\n", - " list(conf_df.Output_Token_Throughput)[jj]+pd_runs_selected['Thpt_per_GPU'].max()*0.02,\n", - " str(val), ha='center', color=colors[ii%len(colors)])\n", - " else:\n", - " # This configuration is standalone\n", - " if seg_by_dir:\n", - " conf_df = sa_runs_selected[\n", - " (sa_runs_selected['Replicas'] == conf['rep']) &\n", - " (sa_runs_selected['TP'] == conf['tp']) &\n", - " (sa_runs_selected['Directory'] == conf['dir'])\n", - " ].drop('Directory', axis=1).sort_values(by='Concurrency')\n", - " else:\n", - " conf_df = sa_runs_selected[\n", - " (sa_runs_selected['Replicas'] == conf['rep']) &\n", - " (sa_runs_selected['TP'] == conf['tp'])\n", - " ].sort_values(by='Concurrency')\n", + "plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " \"ITL avg\",\n", + " \"ITL_avg\",\n", + " \"ITL avg [ms]\",\n", + " False,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + ")\n", + "plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " \"ITL p99\",\n", + " \"ITL_p99\",\n", + " \"ITL p99 [ms]\",\n", + " False,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + ")\n", + "plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " \"ITL stdev\",\n", + " \"ITL_stdev\",\n", + " \"ITL stdev [ms]\",\n", + " False,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + ")\n", "\n", - " # Plot throughputs for configuration\n", - " plt.semilogx(conf_df.Mean_TPOT_ms, conf_df.Output_Token_Throughput,\n", - " label=f'Replicas: {conf['rep']} TP{conf['tp']}',\n", - " marker='o', markersize=4,\n", - " color=colors[ii%len(colors)]\n", - " )\n", - " for jj, val in enumerate(conf_df.Concurrency):\n", - " plt.text(list(conf_df.Mean_TPOT_ms)[jj],\n", - " list(conf_df.Output_Token_Throughput)[jj]+sa_runs_selected['Output_Token_Throughput'].max()*0.02,\n", - " str(val), ha='center', color=colors[ii%len(colors)])\n", "\n", - "if configs:\n", - " plt.title(f'Throughput vs Latency\\nGPU: {gpu}\\nModel: {model}\\nISL: {isl} OSL: {osl}')\n", - " plt.xlabel('Mean TPOT (ms)', fontsize='16')\n", - " plt.ylabel('Output Throughput (Tok/s)', fontsize='16')\n", - " plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", - " plt.grid(True, linewidth=1, ls='--', color='gray')\n", - " plt.axis([None, None, 0, None])\n", - " plt.show()" + "################################################################################\n", + "# Throughput plots\n", + "################################################################################\n", + "\n", + "plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " \"Input Token Throughput\",\n", + " \"Input_Token_Throughput\",\n", + " \"Input token throughput [tokens/s]\",\n", + " True,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + ")\n", + "plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " \"Output Token Throughput\",\n", + " \"Output_Token_Throughput\",\n", + " \"Output token throughput [tokens/s]\",\n", + " True,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + ")\n", + "plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " \"Total Token Throughput\",\n", + " \"Total_Token_Throughput\",\n", + " \"Total token throughput [tokens/s]\",\n", + " True,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + ")\n", + "plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " \"Throughput per GPU\",\n", + " \"Thpt_per_GPU\",\n", + " \"Throughput per GPU [tokens/s]\",\n", + " True,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + ")\n", + "plot_metric(\n", + " pd_runs_selected,\n", + " sa_runs_selected,\n", + " \"Throughput per User\",\n", + " \"Thpt_per_User\",\n", + " \"Throughput per user [tokens/s]\",\n", + " True,\n", + " show_pd,\n", + " show_sa,\n", + " seg_by_dir,\n", + ")" ] }, { @@ -2565,7 +2602,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "id": "a7b7ad4c-e28c-4e02-a9e4-751cd3b8580b", "metadata": {}, "outputs": [ @@ -2581,10 +2618,6 @@ } ], "source": [ - "################################################################################\n", - "# User inputs\n", - "################################################################################\n", - "\n", "# Select scenario\n", "idx = 0\n", "\n", @@ -2606,83 +2639,109 @@ "\n", "# Filter on column values\n", "pd_runs_selected = runs[\n", - " (runs['Model'] == model) &\n", - " (runs['GPU'] == gpu) &\n", - " (runs['ISL'] == isl) &\n", - " (runs['OSL'] == osl) &\n", - " (runs['Is_PD'] == True) ]\n", + " (runs[\"Model\"] == model) & (runs[\"GPU\"] == gpu) & (runs[\"ISL\"] == isl) & (runs[\"OSL\"] == osl) & runs[\"Is_PD\"]\n", + "]\n", "\n", "sa_runs_selected = runs[\n", - " (runs['Model'] == model) &\n", - " (runs['GPU'] == gpu) &\n", - " (runs['ISL'] == isl) &\n", - " (runs['OSL'] == osl) &\n", - " (runs['Is_PD'] == False) ]\n", + " (runs[\"Model\"] == model) & (runs[\"GPU\"] == gpu) & (runs[\"ISL\"] == isl) & (runs[\"OSL\"] == osl) & ~runs[\"Is_PD\"]\n", + "]\n", "\n", "# Plot performance results\n", - "colors = ['#FF0000', '#FFAA00', '#DDDD00', '#00DD00', '#00FFFF', '#0000FF',\n", - " '#FF00FF', '#666666', '#000000',\n", - " '#990000', '#777700', '#007700', '#009999', '#000099']\n", + "colors = [\n", + " \"#FF0000\",\n", + " \"#FFAA00\",\n", + " \"#DDDD00\",\n", + " \"#00DD00\",\n", + " \"#00FFFF\",\n", + " \"#0000FF\",\n", + " \"#FF00FF\",\n", + " \"#606060\",\n", + "]\n", "\n", - "# Unique configurations of replicas and TP, described as a tuple\n", - "# Tuple format is (rep, tp, p_rep, p_tp, d_rep, d_tp, dir, is_pd)\n", + "# Build list of config sets present in the DataFrames, maintaining order\n", "config_sets = []\n", - "if seg_by_dir:\n", - " configs_pd = list(set(pd_runs_selected.set_index(['P_Replicas', 'P_TP', 'D_Replicas', 'D_TP', 'Directory']).index))\n", - " configs_sa = list(set(sa_runs_selected.set_index(['Replicas', 'TP', 'Directory']).index))\n", - " if show_pd:\n", - " for conf in configs_pd:\n", - " config_sets.append((\n", - " 0, # Replicas\n", - " 0, # TP\n", - " conf[0], # P replicas\n", - " conf[1], # P TP\n", - " conf[2], # D replicas\n", - " conf[3], # D TP\n", - " conf[4], # Directory\n", - " True, # Is PD\n", - " ))\n", - " if show_sa:\n", - " for conf in configs_sa:\n", - " config_sets.append((\n", - " conf[0], # Replicas\n", - " conf[1], # TP\n", - " 0, # P replicas\n", - " 0, # P TP\n", - " 0, # D replicas\n", - " 0, # D TP\n", - " conf[2], # Directory\n", - " False # Is PD\n", - " ))\n", - "else:\n", - " pd_runs_selected = pd_runs_selected.drop('Directory', axis=1)\n", - " sa_runs_selected = sa_runs_selected.drop('Directory', axis=1)\n", - " configs_pd = list(set(pd_runs_selected.set_index(['P_Replicas', 'P_TP', 'D_Replicas', 'D_TP']).index))\n", - " configs_sa = list(set(sa_runs_selected.set_index(['Replicas', 'TP']).index))\n", - " if show_pd:\n", - " for conf in configs_pd:\n", - " config_sets.append((\n", - " 0, # Replicas\n", - " 0, # TP\n", - " conf[0], # P replicas\n", - " conf[1], # P TP\n", - " conf[2], # D replicas\n", - " conf[3], # D TP\n", - " 0, # Directory\n", - " True, # Is PD\n", - " ))\n", + "if show_pd:\n", + " configs_pd = (\n", + " pd_runs_selected[[\"P_Replicas\", \"P_TP\", \"D_Replicas\", \"D_TP\"]].drop_duplicates().reset_index(drop=True)\n", + " )\n", + " if seg_by_dir:\n", + " configs_pd = (\n", + " pd_runs_selected[[\"P_Replicas\", \"P_TP\", \"D_Replicas\", \"D_TP\", \"Directory\"]]\n", + " .drop_duplicates()\n", + " .reset_index(drop=True)\n", + " )\n", + "if show_sa:\n", + " configs_sa = sa_runs_selected[[\"Replicas\", \"TP\"]].drop_duplicates().reset_index(drop=True)\n", + " if seg_by_dir:\n", + " configs_sa = sa_runs_selected[[\"Replicas\", \"TP\", \"Directory\"]].drop_duplicates().reset_index(drop=True)\n", + "\n", + "if show_pd and seg_by_dir:\n", + " for conf in configs_pd.values.tolist():\n", + " config_sets.append(\n", + " (\n", + " conf[0], # P Replicas\n", + " conf[1], # P TP\n", + " conf[2], # D Replicas\n", + " conf[3], # D TP\n", + " conf[4], # Directory\n", + " True, # Is PD\n", + " )\n", + " )\n", + "elif show_pd:\n", + " for conf in configs_pd.values.tolist():\n", + " config_sets.append(\n", + " (\n", + " conf[0], # P Replicas\n", + " conf[1], # P TP\n", + " conf[2], # D Replicas\n", + " conf[3], # D TP\n", + " 0, # Directory\n", + " True, # Is PD\n", + " )\n", + " )\n", + "\n", + "if show_sa and seg_by_dir:\n", + " for conf in configs_sa.values.tolist():\n", + " config_sets.append(\n", + " (\n", + " conf[0], # Replicas\n", + " conf[1], # TP\n", + " 0, # P replicas\n", + " 0, # P TP\n", + " 0, # D replicas\n", + " 0, # D TP\n", + " conf[2], # Directory\n", + " False, # Is PD\n", + " )\n", + " )\n", + "elif show_sa:\n", + " for conf in configs_sa.values.tolist():\n", + " config_sets.append(\n", + " (\n", + " conf[0], # Replicas\n", + " conf[1], # TP\n", + " 0, # P replicas\n", + " 0, # P TP\n", + " 0, # D replicas\n", + " 0, # D TP\n", + " 0, # Directory\n", + " False, # Is PD\n", + " )\n", + " )\n", " if show_sa:\n", " for conf in configs_sa:\n", - " config_sets.append((\n", - " conf[0], # Replicas\n", - " conf[1], # TP\n", - " 0, # P replicas\n", - " 0, # P TP\n", - " 0, # D replicas\n", - " 0, # D TP\n", - " 0, # Directory\n", - " False # Is PD\n", - " ))\n", + " config_sets.append(\n", + " (\n", + " conf[0], # Replicas\n", + " conf[1], # TP\n", + " 0, # P replicas\n", + " 0, # P TP\n", + " 0, # D replicas\n", + " 0, # D TP\n", + " 0, # Directory\n", + " False, # Is PD\n", + " )\n", + " )\n", "\n", "# Sort so prinouts/plots are organized\n", "config_sets.sort()\n", @@ -2690,86 +2749,109 @@ "# Convert the list of sets to a list of dicts, to make code following clearer\n", "configs = []\n", "for conf in config_sets:\n", - " configs.append({\n", - " 'rep': conf[0],\n", - " 'tp': conf[1],\n", - " 'p_rep': conf[2],\n", - " 'p_tp': conf[3],\n", - " 'd_rep': conf[4],\n", - " 'd_tp': conf[5],\n", - " 'dir': conf[6],\n", - " 'is_pd': conf[7],\n", - " })\n", + " configs.append(\n", + " {\n", + " \"rep\": conf[0],\n", + " \"tp\": conf[1],\n", + " \"p_rep\": conf[2],\n", + " \"p_tp\": conf[3],\n", + " \"d_rep\": conf[4],\n", + " \"d_tp\": conf[5],\n", + " \"dir\": conf[6],\n", + " \"is_pd\": conf[7],\n", + " }\n", + " )\n", "\n", "if not configs:\n", " if show_pd:\n", - " print('No P/D configurations for this scenario!')\n", + " print(\"No P/D configurations for this scenario!\")\n", " if show_sa:\n", - " print('No standalone configurations for this scenario!')\n", + " print(\"No standalone configurations for this scenario!\")\n", "\n", "# Sweep through configurations\n", "for ii, conf in enumerate(configs):\n", - " is_pd = 'P_TP' in conf\n", + " is_pd = \"P_TP\" in conf\n", " # Make a DataFrame for specific configuration\n", - " if conf['is_pd']:\n", + " if conf[\"is_pd\"]:\n", " # This configuration is PD\n", " if seg_by_dir:\n", - " conf_df = pd_runs_selected[\n", - " (pd_runs_selected['P_Replicas'] == conf['p_rep']) &\n", - " (pd_runs_selected['P_TP'] == conf['p_tp']) &\n", - " (pd_runs_selected['D_Replicas'] == conf['d_rep']) &\n", - " (pd_runs_selected['D_TP'] == conf['d_tp']) &\n", - " (pd_runs_selected['Directory'] == conf['dir'])\n", - " ].drop('Directory', axis=1).sort_values(by='Concurrency')\n", + " conf_df = (\n", + " pd_runs_selected[\n", + " (pd_runs_selected[\"P_Replicas\"] == conf[\"p_rep\"])\n", + " & (pd_runs_selected[\"P_TP\"] == conf[\"p_tp\"])\n", + " & (pd_runs_selected[\"D_Replicas\"] == conf[\"d_rep\"])\n", + " & (pd_runs_selected[\"D_TP\"] == conf[\"d_tp\"])\n", + " & (pd_runs_selected[\"Directory\"] == conf[\"dir\"])\n", + " ]\n", + " .drop(\"Directory\", axis=1)\n", + " .sort_values(by=\"Concurrency\")\n", + " )\n", " else:\n", " conf_df = pd_runs_selected[\n", - " (pd_runs_selected['P_Replicas'] == conf['p_rep']) &\n", - " (pd_runs_selected['P_TP'] == conf['p_tp']) &\n", - " (pd_runs_selected['D_Replicas'] == conf['d_rep']) &\n", - " (pd_runs_selected['D_TP'] == conf['d_tp'])\n", - " ].sort_values(by='Concurrency')\n", - " \n", + " (pd_runs_selected[\"P_Replicas\"] == conf[\"p_rep\"])\n", + " & (pd_runs_selected[\"P_TP\"] == conf[\"p_tp\"])\n", + " & (pd_runs_selected[\"D_Replicas\"] == conf[\"d_rep\"])\n", + " & (pd_runs_selected[\"D_TP\"] == conf[\"d_tp\"])\n", + " ].sort_values(by=\"Concurrency\")\n", + "\n", " # Plot throughputs for configuration\n", - " plt.semilogx(conf_df.Mean_TTFT_ms, conf_df.Mean_TPOT_ms,\n", - " label=f'{conf['p_rep']}P-TP{conf['p_tp']} {conf['d_rep']}D-TP{conf['d_tp']}',\n", - " marker='o', markersize=4,\n", - " color=colors[ii%len(colors)]\n", - " )\n", + " plt.semilogx(\n", + " conf_df.Mean_TTFT_ms,\n", + " conf_df.Mean_TPOT_ms,\n", + " label=f\"{conf['p_rep']}P-TP{conf['p_tp']} {conf['d_rep']}D-TP{conf['d_tp']}\",\n", + " marker=\"o\",\n", + " markersize=4,\n", + " color=colors[ii % len(colors)],\n", + " )\n", " for jj, val in enumerate(conf_df.Concurrency):\n", - " plt.text(list(conf_df.Mean_TTFT_ms)[jj],\n", - " list(conf_df.Mean_TPOT_ms)[jj]+pd_runs_selected['Mean_TPOT_ms'].max()*0.02,\n", - " str(val), ha='center', color=colors[ii%len(colors)])\n", + " plt.text(\n", + " list(conf_df.Mean_TTFT_ms)[jj],\n", + " list(conf_df.Mean_TPOT_ms)[jj] + pd_runs_selected[\"Mean_TPOT_ms\"].max() * 0.02,\n", + " str(val),\n", + " ha=\"center\",\n", + " color=colors[ii % len(colors)],\n", + " )\n", " else:\n", " # This configuration is standalone\n", " if seg_by_dir:\n", - " conf_df = sa_runs_selected[\n", - " (sa_runs_selected['Replicas'] == conf['rep']) &\n", - " (sa_runs_selected['TP'] == conf['tp']) &\n", - " (sa_runs_selected['Directory'] == conf['dir'])\n", - " ].drop('Directory', axis=1).sort_values(by='Concurrency')\n", + " conf_df = (\n", + " sa_runs_selected[\n", + " (sa_runs_selected[\"Replicas\"] == conf[\"rep\"])\n", + " & (sa_runs_selected[\"TP\"] == conf[\"tp\"])\n", + " & (sa_runs_selected[\"Directory\"] == conf[\"dir\"])\n", + " ]\n", + " .drop(\"Directory\", axis=1)\n", + " .sort_values(by=\"Concurrency\")\n", + " )\n", " else:\n", " conf_df = sa_runs_selected[\n", - " (sa_runs_selected['Replicas'] == conf['rep']) &\n", - " (sa_runs_selected['TP'] == conf['tp'])\n", - " ].sort_values(by='Concurrency')\n", + " (sa_runs_selected[\"Replicas\"] == conf[\"rep\"]) & (sa_runs_selected[\"TP\"] == conf[\"tp\"])\n", + " ].sort_values(by=\"Concurrency\")\n", "\n", " # Plot throughputs for configuration\n", - " plt.semilogx(conf_df.Mean_TTFT_ms, conf_df.Mean_TPOT_ms,\n", - " label=f'Replicas: {conf['rep']} TP{conf['tp']}',\n", - " marker='o', markersize=4,\n", - " color=colors[ii%len(colors)]\n", - " )\n", + " plt.semilogx(\n", + " conf_df.Mean_TTFT_ms,\n", + " conf_df.Mean_TPOT_ms,\n", + " label=f\"Replicas: {conf['rep']} TP{conf['tp']}\",\n", + " marker=\"o\",\n", + " markersize=4,\n", + " color=colors[ii % len(colors)],\n", + " )\n", " for jj, val in enumerate(conf_df.Concurrency):\n", - " plt.text(list(conf_df.Mean_TTFT_ms)[jj],\n", - " list(conf_df.Mean_TPOT_ms)[jj]+sa_runs_selected['Mean_TPOT_ms'].max()*0.02,\n", - " str(val), ha='center', color=colors[ii%len(colors)])\n", + " plt.text(\n", + " list(conf_df.Mean_TTFT_ms)[jj],\n", + " list(conf_df.Mean_TPOT_ms)[jj] + sa_runs_selected[\"Mean_TPOT_ms\"].max() * 0.02,\n", + " str(val),\n", + " ha=\"center\",\n", + " color=colors[ii % len(colors)],\n", + " )\n", "\n", "if configs:\n", - " plt.title(f'TPOT vs TTFT\\nGPU: {gpu}\\nModel: {model}\\nISL: {isl} OSL: {osl}')\n", - " plt.xlabel('Mean TTFT (ms)', fontsize='16')\n", - " plt.ylabel('Mean TPOT (ms)', fontsize='16')\n", - " plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)\n", - " plt.grid(True, linewidth=1, ls='--', color='gray')\n", + " plt.title(f\"TPOT vs TTFT\\nGPU: {gpu}\\nModel: {model}\\nISL: {isl} OSL: {osl}\")\n", + " plt.xlabel(\"Mean TTFT (ms)\", fontsize=\"16\")\n", + " plt.ylabel(\"Mean TPOT (ms)\", fontsize=\"16\")\n", + " plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)\n", + " plt.grid(True, linewidth=1, ls=\"--\", color=\"gray\")\n", " plt.axis([None, None, 0, None])\n", " plt.show()" ] diff --git a/analysis/convert.py b/llm_d_benchmark/analysis/convert.py similarity index 100% rename from analysis/convert.py rename to llm_d_benchmark/analysis/convert.py diff --git a/analysis/fmperf-analyze_results.py b/llm_d_benchmark/analysis/fmperf-analyze_results.py similarity index 57% rename from analysis/fmperf-analyze_results.py rename to llm_d_benchmark/analysis/fmperf-analyze_results.py index 79503116..a683c283 100755 --- a/analysis/fmperf-analyze_results.py +++ b/llm_d_benchmark/analysis/fmperf-analyze_results.py @@ -1,21 +1,21 @@ #!/usr/bin/env python3 -import pandas as pd -import matplotlib.pyplot as plt -import seaborn as sns +import argparse import glob +import logging import os -import argparse import shutil -import logging + +import matplotlib.pyplot as plt +import pandas as pd +import seaborn as sns + # Configure logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(levelname)s - %(message)s' -) +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) + def load_and_combine_csvs(directory): """Load all CSV files from the directory and combine them.""" all_data = [] @@ -34,12 +34,12 @@ def load_and_combine_csvs(directory): for csv_file in csv_files: try: # Extract QPS from filename - qps = float(os.path.basename(csv_file).split('_')[-1].replace('.csv', '').replace('qps','')) + qps = float(os.path.basename(csv_file).split("_")[-1].replace(".csv", "").replace("qps", "")) df = pd.read_csv(csv_file) - df['qps'] = qps + df["qps"] = qps # Add model name from parent directory model_name = os.path.basename(os.path.dirname(csv_file)) - df['model'] = model_name + df["model"] = model_name all_data.append(df) logger.info(f"Loaded data from: {csv_file}") except Exception as e: @@ -52,11 +52,12 @@ def load_and_combine_csvs(directory): return pd.concat(all_data, ignore_index=True) + def create_plots_readme(plots_dir): """Create a README.md file describing the plots.""" script_dir = os.path.dirname(os.path.abspath(__file__)) - template_path = os.path.join(script_dir, 'readme-analyze-template.md') - readme_path = os.path.join(plots_dir, 'README.md') + template_path = os.path.join(script_dir, "readme-analyze-template.md") + readme_path = os.path.join(plots_dir, "README.md") if os.path.exists(template_path): shutil.copyfile(template_path, readme_path) @@ -105,108 +106,117 @@ def create_plots_readme(plots_dir): - Helps understand the input/output token ratio - Useful for capacity planning """ - with open(readme_path, 'w') as f: + with open(readme_path, "w") as f: f.write(readme_content) logger.info(f"Created README.md at: {readme_path}") + # --- Chart Prettification Settings --- def set_pretty_plot_style(): sns.set_theme(style="whitegrid", palette="Set2", font_scale=1.2) - plt.rcParams['axes.titlesize'] = 16 - plt.rcParams['axes.titleweight'] = 'bold' - plt.rcParams['axes.labelsize'] = 14 - plt.rcParams['axes.labelweight'] = 'normal' - plt.rcParams['legend.fontsize'] = 12 - plt.rcParams['xtick.labelsize'] = 12 - plt.rcParams['ytick.labelsize'] = 12 - plt.rcParams['figure.figsize'] = [15, 10] - plt.rcParams['savefig.dpi'] = 150 - plt.rcParams['savefig.transparent'] = True + plt.rcParams["axes.titlesize"] = 16 + plt.rcParams["axes.titleweight"] = "bold" + plt.rcParams["axes.labelsize"] = 14 + plt.rcParams["axes.labelweight"] = "normal" + plt.rcParams["legend.fontsize"] = 12 + plt.rcParams["xtick.labelsize"] = 12 + plt.rcParams["ytick.labelsize"] = 12 + plt.rcParams["figure.figsize"] = [15, 10] + plt.rcParams["savefig.dpi"] = 150 + plt.rcParams["savefig.transparent"] = True + def analyze_latency(df, plots_dir): set_pretty_plot_style() fig, axes = plt.subplots(2, 2, figsize=(18, 12)) # Plot 1: Time to First Token (TTFT) vs QPS - sns.boxplot(x='qps', y='ttft', data=df, ax=axes[0, 0]) - axes[0, 0].set_title('Time to First Token vs QPS') - axes[0, 0].set_xlabel('Queries per Second') - axes[0, 0].set_ylabel('TTFT (seconds)') + sns.boxplot(x="qps", y="ttft", data=df, ax=axes[0, 0]) + axes[0, 0].set_title("Time to First Token vs QPS") + axes[0, 0].set_xlabel("Queries per Second") + axes[0, 0].set_ylabel("TTFT (seconds)") # Plot 2: Generation Time vs QPS - sns.boxplot(x='qps', y='generation_time', data=df, ax=axes[0, 1]) - axes[0, 1].set_title('Generation Time vs QPS') - axes[0, 1].set_xlabel('Queries per Second') - axes[0, 1].set_ylabel('Generation Time (seconds)') + sns.boxplot(x="qps", y="generation_time", data=df, ax=axes[0, 1]) + axes[0, 1].set_title("Generation Time vs QPS") + axes[0, 1].set_xlabel("Queries per Second") + axes[0, 1].set_ylabel("Generation Time (seconds)") # Plot 3: Total Time (TTFT + Generation) vs QPS - df['total_time'] = df['ttft'] + df['generation_time'] - sns.boxplot(x='qps', y='total_time', data=df, ax=axes[1, 0]) - axes[1, 0].set_title('Total Time vs QPS') - axes[1, 0].set_xlabel('Queries per Second') - axes[1, 0].set_ylabel('Total Time (seconds)') + df["total_time"] = df["ttft"] + df["generation_time"] + sns.boxplot(x="qps", y="total_time", data=df, ax=axes[1, 0]) + axes[1, 0].set_title("Total Time vs QPS") + axes[1, 0].set_xlabel("Queries per Second") + axes[1, 0].set_ylabel("Total Time (seconds)") # Plot 4: Token Generation Rate vs QPS - df['tokens_per_second'] = df['generation_tokens'] / df['generation_time'] - sns.boxplot(x='qps', y='tokens_per_second', data=df, ax=axes[1, 1]) - axes[1, 1].set_title('Token Generation Rate vs QPS') - axes[1, 1].set_xlabel('Queries per Second') - axes[1, 1].set_ylabel('Tokens per Second') + df["tokens_per_second"] = df["generation_tokens"] / df["generation_time"] + sns.boxplot(x="qps", y="tokens_per_second", data=df, ax=axes[1, 1]) + axes[1, 1].set_title("Token Generation Rate vs QPS") + axes[1, 1].set_xlabel("Queries per Second") + axes[1, 1].set_ylabel("Tokens per Second") for ax in axes.flat: sns.despine(ax=ax) plt.tight_layout(pad=2) - plt.savefig(os.path.join(plots_dir, 'latency_analysis.png')) + plt.savefig(os.path.join(plots_dir, "latency_analysis.png")) plt.close() + def analyze_throughput(df, plots_dir): set_pretty_plot_style() fig, axes = plt.subplots(1, 2, figsize=(18, 5)) # Calculate throughput metrics - throughput_data = df.groupby('qps').agg({ - 'prompt_tokens': 'mean', - 'generation_tokens': 'mean', - 'generation_time': 'mean' - }).reset_index() - throughput_data['tokens_per_second'] = ( - throughput_data['prompt_tokens'] + throughput_data['generation_tokens'] - ) / throughput_data['generation_time'] + throughput_data = ( + df.groupby("qps") + .agg({"prompt_tokens": "mean", "generation_tokens": "mean", "generation_time": "mean"}) + .reset_index() + ) + throughput_data["tokens_per_second"] = ( + throughput_data["prompt_tokens"] + throughput_data["generation_tokens"] + ) / throughput_data["generation_time"] # Plot throughput - sns.barplot(x='qps', y='tokens_per_second', data=throughput_data, ax=axes[0]) - axes[0].set_title('Throughput (Tokens/Second) vs QPS') - axes[0].set_xlabel('Queries per Second') - axes[0].set_ylabel('Tokens per Second') + sns.barplot(x="qps", y="tokens_per_second", data=throughput_data, ax=axes[0]) + axes[0].set_title("Throughput (Tokens/Second) vs QPS") + axes[0].set_xlabel("Queries per Second") + axes[0].set_ylabel("Tokens per Second") # Plot token counts throughput_data_melted = pd.melt( throughput_data, - id_vars=['qps'], - value_vars=['prompt_tokens', 'generation_tokens'], - var_name='Token Type', - value_name='Count' + id_vars=["qps"], + value_vars=["prompt_tokens", "generation_tokens"], + var_name="Token Type", + value_name="Count", ) - sns.barplot(x='qps', y='Count', hue='Token Type', data=throughput_data_melted, ax=axes[1]) - axes[1].set_title('Token Counts vs QPS') - axes[1].set_xlabel('Queries per Second') - axes[1].set_ylabel('Number of Tokens') - axes[1].legend(title='Token Type', loc='upper right') + sns.barplot(x="qps", y="Count", hue="Token Type", data=throughput_data_melted, ax=axes[1]) + axes[1].set_title("Token Counts vs QPS") + axes[1].set_xlabel("Queries per Second") + axes[1].set_ylabel("Number of Tokens") + axes[1].legend(title="Token Type", loc="upper right") for ax in axes.flat: sns.despine(ax=ax) plt.tight_layout(pad=2) - plt.savefig(os.path.join(plots_dir, 'throughput_analysis.png')) + plt.savefig(os.path.join(plots_dir, "throughput_analysis.png")) plt.close() + def print_statistics(df, data_dir): """Print key statistics about the benchmark results.""" - sep="=" * 50 - - qps_stats = df.groupby('qps').agg({ - 'ttft': ['mean', 'std', 'min', 'max'], - 'generation_time': ['mean', 'std', 'min', 'max'], - 'prompt_tokens': 'mean', - 'generation_tokens': 'mean' - }).round(4) + sep = "=" * 50 + + qps_stats = ( + df.groupby("qps") + .agg( + { + "ttft": ["mean", "std", "min", "max"], + "generation_time": ["mean", "std", "min", "max"], + "prompt_tokens": "mean", + "generation_tokens": "mean", + } + ) + .round(4) + ) - token_stats = df.agg({ - 'prompt_tokens': ['mean', 'std', 'min', 'max'], - 'generation_tokens': ['mean', 'std', 'min', 'max'] - }).round(4) + token_stats = df.agg( + {"prompt_tokens": ["mean", "std", "min", "max"], "generation_tokens": ["mean", "std", "min", "max"]} + ).round(4) - _msg=f"\nBenchmark Statistics:\ + _msg = f"\nBenchmark Statistics:\ \n{sep}\ \nnOverall Statistics:\ \nTotal number of requests: {len(df)}\ @@ -219,39 +229,45 @@ def print_statistics(df, data_dir): print(_msg) - with open(f"{data_dir}/stats.txt", 'w') as fp : + with open(f"{data_dir}/stats.txt", "w") as fp: fp.write(_msg) + def main(): # Parse command line arguments env_vars = os.environ - if 'LLMDBENCH_RUN_EXPERIMENT_ANALYZE_LOCALLY' in env_vars and 'LLMDBENCH_RUN_EXPERIMENT_LAUNCHER' in env_vars : - if env_vars['LLMDBENCH_RUN_EXPERIMENT_ANALYZE_LOCALLY'] == "1" and env_vars['LLMDBENCH_RUN_EXPERIMENT_LAUNCHER'] == "1" : - logger.info(f"\nEnviroment variable \"LLMDBENCH_RUN_EXPERIMENT_ANALYZE_LOCALLY\" is set to \"1\", and this is a pod. Will skip execution") + if "LLMDBENCH_RUN_EXPERIMENT_ANALYZE_LOCALLY" in env_vars and "LLMDBENCH_RUN_EXPERIMENT_LAUNCHER" in env_vars: + if ( + env_vars["LLMDBENCH_RUN_EXPERIMENT_ANALYZE_LOCALLY"] == "1" + and env_vars["LLMDBENCH_RUN_EXPERIMENT_LAUNCHER"] == "1" + ): + logger.info( + '\nEnviroment variable "LLMDBENCH_RUN_EXPERIMENT_ANALYZE_LOCALLY" is set to "1", and this is a pod. Will skip execution' + ) exit(0) default_dir = "/tmp/" - if 'LLMDBENCH_CONTROL_WORK_DIR' in env_vars: + if "LLMDBENCH_CONTROL_WORK_DIR" in env_vars: default_dir = f"{env_vars['LLMDBENCH_CONTROL_WORK_DIR']}" - if os.path.exists(f"{default_dir}/results") : + if os.path.exists(f"{default_dir}/results"): default_dir = f"{default_dir}/results" - parser = argparse.ArgumentParser(description='Analyze benchmark results from CSV files.') - parser.add_argument('--results-dir', - default=default_dir, - help=f'Directory containing the CSV files (default: {default_dir}') + parser = argparse.ArgumentParser(description="Analyze benchmark results from CSV files.") + parser.add_argument( + "--results-dir", default=default_dir, help=f"Directory containing the CSV files (default: {default_dir}" + ) args = parser.parse_args() # Set style sns.set_style("whitegrid") - plt.rcParams['figure.figsize'] = [12, 8] + plt.rcParams["figure.figsize"] = [12, 8] # Create plots directory - plots_dir = f"{args.results_dir.replace('/results','')}/analysis/plots" - data_dir = f"{args.results_dir.replace('/results','')}/analysis/data" + plots_dir = f"{args.results_dir.replace('/results', '')}/analysis/plots" + data_dir = f"{args.results_dir.replace('/results', '')}/analysis/data" os.makedirs(plots_dir, exist_ok=True) os.makedirs(data_dir, exist_ok=True) @@ -278,5 +294,6 @@ def main(): logger.info(f"- {os.path.join(plots_dir, 'throughput_analysis.png')}") logger.info(f"- {os.path.join(plots_dir, 'README.md')}") + if __name__ == "__main__": main() diff --git a/analysis/guidellm-analyze_results.sh b/llm_d_benchmark/analysis/guidellm-analyze_results.sh similarity index 100% rename from analysis/guidellm-analyze_results.sh rename to llm_d_benchmark/analysis/guidellm-analyze_results.sh diff --git a/analysis/inference-perf-analyze_results.sh b/llm_d_benchmark/analysis/inference-perf-analyze_results.sh similarity index 100% rename from analysis/inference-perf-analyze_results.sh rename to llm_d_benchmark/analysis/inference-perf-analyze_results.sh diff --git a/analysis/nop-analyze_results.py b/llm_d_benchmark/analysis/nop-analyze_results.py similarity index 93% rename from analysis/nop-analyze_results.py rename to llm_d_benchmark/analysis/nop-analyze_results.py index 56dab471..1dcd6d1a 100755 --- a/analysis/nop-analyze_results.py +++ b/llm_d_benchmark/analysis/nop-analyze_results.py @@ -4,16 +4,17 @@ Benchmark 'nop' analysis """ -from datetime import datetime import io -import os import logging +import os +from datetime import datetime from typing import Any + import pandas as pd import yaml - from schema import BenchmarkReport + # Configure logging logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) @@ -122,11 +123,7 @@ def write_benchmark_reports(file: io.TextIOWrapper, benchmark_report: BenchmarkR write_benchmark_scenario(file, benchmark_report) file.write("\n") - time_iso = ( - datetime.fromtimestamp(benchmark_report.metrics.time.start) - .astimezone() - .isoformat() - ) + time_iso = datetime.fromtimestamp(benchmark_report.metrics.time.start).astimezone().isoformat() duration = benchmark_report.metrics.time.duration metrics_metadata = benchmark_report.metrics.metadata @@ -148,9 +145,7 @@ def write_benchmark_reports(file: io.TextIOWrapper, benchmark_report: BenchmarkR if load_cached_compiled_graph is not None or compile_graph is not None: file.write(" Compiled Graph\n") if load_cached_compiled_graph is not None: - file.write( - f" Load from Cache(secs) : {load_cached_compiled_graph['value']:7.3f}\n" - ) + file.write(f" Load from Cache(secs) : {load_cached_compiled_graph['value']:7.3f}\n") if compile_graph is not None: file.write(f" Compile(secs) : {compile_graph['value']:7.3f}\n") file.write(" Sleep\n") @@ -197,13 +192,9 @@ def main(): logger.addHandler(console_handler) # read possible existent universal yaml file - benchmark_report_filepath = os.path.join( - requests_dir, "benchmark_report", "result.yaml" - ) + benchmark_report_filepath = os.path.join(requests_dir, "benchmark_report", "result.yaml") if not os.path.isfile(benchmark_report_filepath): - logger.info( - "no benchmark reports file found on path: %s", benchmark_report_filepath - ) + logger.info("no benchmark reports file found on path: %s", benchmark_report_filepath) return benchmark_report = None diff --git a/analysis/schema.py b/llm_d_benchmark/analysis/schema.py similarity index 100% rename from analysis/schema.py rename to llm_d_benchmark/analysis/schema.py diff --git a/analysis/to_be_incorporated/README.md b/llm_d_benchmark/analysis/to_be_incorporated/README.md similarity index 100% rename from analysis/to_be_incorporated/README.md rename to llm_d_benchmark/analysis/to_be_incorporated/README.md diff --git a/llm_d_benchmark/analysis/to_be_incorporated/plot_benchmark_metrics.py b/llm_d_benchmark/analysis/to_be_incorporated/plot_benchmark_metrics.py new file mode 100644 index 00000000..36ab198b --- /dev/null +++ b/llm_d_benchmark/analysis/to_be_incorporated/plot_benchmark_metrics.py @@ -0,0 +1,244 @@ +import argparse +import glob +import os +import re + +import matplotlib.pyplot as plt +import pandas as pd + + +# Define method types and their display names +METHOD_TYPES = { + "vllm": "vLLM v1", + "llm-d": "LLM-d", + "vllm-prod": "vLLM + LMCache", + "lmcache": "vLLM Production Stack + LMCache", + "lmcache-0310": "vLLM Production Stack + LMCache (03-10-2025)", + "vllm-70b": "vLLM v1", + "baseline-llm-d-70b": "llm-d w/o KVCache offloading", + "lmcache-llm-d-70b": "llm-d w KVCache offloading", + "lmcache-indexing-llm-d-70b": "llm-d w KVCache offloading + KVCache indexing", + "lmcache-vllm-70b": "Production Stack(vLLM v1) + LMCache", + "vllm-70b-2replicas": "vLLM v1 (2 replicas) + Round Robin", + "llm-d-70b-2replicas": "llm-d (2 replicas)" + "\n" + "KVCache (score=2) & Load (score=1) aware routing", + "vllm-standalone-llama-3-70b-2replicas-H100": "vLLM v1 (2 replicas) + Round Robin (H100)", + "llm-d-70b-2replicas-H100": "llm-d (2 replicas)" + "\n" + "Prefix (score=2) & Load (score=1) aware routing (H100)", + "llm-d-70b-2replicas-H100-no-router": "llm-d (2 replicas)" + "\n" + "Round Robin (H100)", + "vllm-llama4-tp4": "vLLM v1 (TP=4)", + "llm-d-llama4-tp4": "llm-d (TP=4)", + "lmcache-llm-d-llama4-tp4": "llm-d w KVCache offloading (TP=4)", +} + +# Define benchmark types and their titles +BENCHMARK_TYPES = { + "sharegpt": "ShareGPT", + "long_input": "Long Input Short Output", + "short_input": "Short Input Short Output", +} + +# Define QPS ranges for each benchmark type +BENCHMARK_QPS_RANGES = { + # 'sharegpt': (0, 1.4), + "sharegpt": (0, 100.0), + "long_input": (0, 1.2), + "short_input": (0, 10.0), +} + +# Define y-axis ranges for each metric +BENCHMARK_Y_RANGES = { + "itl": (0, 0.1), # Inter-token Latency in seconds + "ttft": (0, 1.0), # Time to First Token in seconds + "throughput": (5000, 30000), # Throughput in tokens per second +} + + +def extract_qps(filename): + # Try to extract QPS value from filename + # Pattern 1: LMBench_sharegpt_output_0.5.csv -> 0.5 + # Pattern 2: LMBench_short_input_qps0.5.csv -> 0.5 + match = re.search(r"(?:output_|qps)(\d+\.?\d*)\.csv", filename) + if match: + return float(match.group(1)) + return None + + +def calculate_itl(df): + # Calculate ITL (Inter-token Latency) as generation_time / generation_tokens + return df["generation_time"] / df["generation_tokens"] + + +def calculate_throughput(df): + # Calculate total tokens (input + output) + total_tokens = df["prompt_tokens"].sum() + df["generation_tokens"].sum() + + # Calculate total time (latest finish time - earliest launch time) + total_time = df["finish_time"].max() - df["launch_time"].min() + + # Calculate throughput (tokens per second) + return total_tokens / total_time + + +def process_csv_files(benchmark_type, method, benchmark_dir): + # Get all CSV files matching the pattern + data_dir = os.path.join(benchmark_dir, method) + pattern = f"LMBench_{benchmark_type}_*.csv" + csv_files = glob.glob(os.path.join(data_dir, pattern)) + + if not csv_files: + print(f"No CSV files found for {benchmark_type} in {data_dir}") + return None + + # Store results + results = {"qps": [], "itl": [], "ttft": [], "throughput": []} + + # Process each file + for file in sorted(csv_files): + qps = extract_qps(file) + if qps is None: + print(f"Could not extract QPS from filename: {file}") + continue + + try: + # Read CSV file + df = pd.read_csv(file) + + # Calculate metrics + itl = calculate_itl(df).mean() + ttft = df["ttft"].mean() + throughput = calculate_throughput(df) + + results["qps"].append(qps) + results["itl"].append(itl) + results["ttft"].append(ttft) + results["throughput"].append(throughput) + + print(f"Processed {file}:") + print(f" QPS={qps}") + print(f" Avg ITL={itl:.4f}s") + print(f" Avg TTFT={ttft:.4f}s") + print(f" Throughput={throughput:.2f} tokens/s") + except Exception as e: + print(f"Error processing {file}: {str(e)}") + continue + + if not results["qps"]: + print(f"No valid data found for {benchmark_type}") + return None + + # Sort all metrics by QPS + sorted_indices = sorted(range(len(results["qps"])), key=lambda i: results["qps"][i]) + for key in results: + results[key] = [results[key][i] for i in sorted_indices] + + return results + + +def plot_metrics(results_dict, benchmark_type, title, benchmark_dir, model_name): + if not results_dict: + return + + # Create figure with three subplots + fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 6)) + + # Add main title with model name + fig.suptitle(f"{title} - {model_name}", fontsize=20, y=1.02) + + # Define colors for different methods + colors = ["bo-", "ro-", "go-", "mo-", "co-", "yo-"] + + # Get QPS range for this benchmark type + qps_min, qps_max = BENCHMARK_QPS_RANGES[benchmark_type] + + # Plot ITL + for i, (method, results) in enumerate(results_dict.items()): + if results: + ax1.plot( + results["qps"], + results["itl"], + colors[i % len(colors)], + linewidth=2, + markersize=8, + label=METHOD_TYPES[method], + ) + ax1.set_xlabel("QPS") + ax1.set_ylabel("Average Inter-token Latency (s)") + ax1.set_title("Average Inter-token Latency vs QPS") + ax1.set_xlim(qps_min, qps_max) + ax1.set_ylim(BENCHMARK_Y_RANGES["itl"]) + ax1.grid(True) + ax1.legend() + + # Plot TTFT + for i, (method, results) in enumerate(results_dict.items()): + if results: + ax2.plot( + results["qps"], + results["ttft"], + colors[i % len(colors)], + linewidth=2, + markersize=8, + label=METHOD_TYPES[method], + ) + ax2.set_xlabel("QPS") + ax2.set_ylabel("Average Time to First Token (s)") + ax2.set_title("Average Time to First Token vs QPS") + ax2.set_xlim(qps_min, qps_max) + ax2.set_ylim(BENCHMARK_Y_RANGES["ttft"]) + ax2.grid(True) + ax2.legend() + + # Plot Throughput + for i, (method, results) in enumerate(results_dict.items()): + if results: + ax3.plot( + results["qps"], + results["throughput"], + colors[i % len(colors)], + linewidth=2, + markersize=8, + label=METHOD_TYPES[method], + ) + ax3.set_xlabel("QPS") + ax3.set_ylabel("Throughput (tokens/s)") + ax3.set_title("Throughput vs QPS") + ax3.set_xlim(qps_min, qps_max) + ax3.set_ylim(BENCHMARK_Y_RANGES["throughput"]) + ax3.grid(True) + ax3.legend() + + # Adjust layout and save + plt.tight_layout() + output_file = os.path.join(os.path.dirname(__file__), f"benchmark_metrics_{benchmark_type}.png") + plt.savefig(output_file, bbox_inches="tight") + plt.close() + print(f"Plot for {title} saved to {output_file}") + + +def main(): + # Set up argument parser + parser = argparse.ArgumentParser(description="Plot benchmark metrics from CSV files") + parser.add_argument( + "--benchmark-dir", + default=os.path.join(os.path.dirname(__file__), "..", "data", "k8s", "lmbenchmark"), + help="Path to the benchmark directory containing the method subdirectories", + ) + parser.add_argument( + "--model-name", + default="Llama-3.1-8B-Instruct", + help="Name of the model being benchmarked (default: Llama-3.1-8B-Instruct)", + ) + args = parser.parse_args() + + # Process and plot each benchmark type + for benchmark_type, title in BENCHMARK_TYPES.items(): + print(f"\nProcessing {title} benchmark for {args.model_name}...") + results_dict = {} + for method in METHOD_TYPES.keys(): + results = process_csv_files(benchmark_type, method, args.benchmark_dir) + if results: + results_dict[method] = results + plot_metrics(results_dict, benchmark_type, title, args.benchmark_dir, args.model_name) + + +if __name__ == "__main__": + main() diff --git a/analysis/to_be_incorporated/plot_itl_vs_qps.py b/llm_d_benchmark/analysis/to_be_incorporated/plot_itl_vs_qps.py similarity index 72% rename from analysis/to_be_incorporated/plot_itl_vs_qps.py rename to llm_d_benchmark/analysis/to_be_incorporated/plot_itl_vs_qps.py index 7e388406..f554d9e4 100644 --- a/analysis/to_be_incorporated/plot_itl_vs_qps.py +++ b/llm_d_benchmark/analysis/to_be_incorporated/plot_itl_vs_qps.py @@ -1,76 +1,81 @@ -import pandas as pd -import matplotlib.pyplot as plt import glob import os import re +import matplotlib.pyplot as plt +import pandas as pd + + def extract_qps(filename): # Extract QPS value from filename (e.g., LMBench_sharegpt_output_0.5.csv -> 0.5) - match = re.search(r'output_(\d+\.?\d*)\.csv', filename) + match = re.search(r"output_(\d+\.?\d*)\.csv", filename) if match: return float(match.group(1)) return None + def calculate_itl(df): # Calculate ITL (Inter-token Latency) as generation_time / generation_tokens - return df['generation_time'] / df['generation_tokens'] + return df["generation_time"] / df["generation_tokens"] + def main(): # Get all CSV files matching the pattern - data_dir = os.path.join(os.path.dirname(__file__), '..', 'data', 'k8s', 'lmbenchmark') - csv_files = glob.glob(os.path.join(data_dir, 'LMBench_sharegpt_output_*.csv')) - + data_dir = os.path.join(os.path.dirname(__file__), "..", "data", "k8s", "lmbenchmark") + csv_files = glob.glob(os.path.join(data_dir, "LMBench_sharegpt_output_*.csv")) + if not csv_files: print(f"No CSV files found in {data_dir}") return - + # Store results qps_values = [] avg_itl_values = [] - + # Process each file for file in sorted(csv_files): qps = extract_qps(file) if qps is None: print(f"Could not extract QPS from filename: {file}") continue - + try: # Read CSV file df = pd.read_csv(file) - + # Calculate ITL itl = calculate_itl(df) avg_itl = itl.mean() - + qps_values.append(qps) avg_itl_values.append(avg_itl) print(f"Processed {file}: QPS={qps}, Avg ITL={avg_itl:.4f}s") except Exception as e: print(f"Error processing {file}: {str(e)}") continue - + if not qps_values: print("No valid data found in any CSV files") return - + # Sort QPS and ITL values sorted_pairs = sorted(zip(qps_values, avg_itl_values)) qps_values, avg_itl_values = zip(*sorted_pairs) - + # Create the plot plt.figure(figsize=(10, 6)) - plt.plot(qps_values, avg_itl_values, 'bo-', linewidth=2, markersize=8) - plt.xlabel('QPS') - plt.ylabel('Average Inter-token Latency (s)') - plt.title('Average Inter-token Latency vs QPS') + plt.plot(qps_values, avg_itl_values, "bo-", linewidth=2, markersize=8) + plt.xlabel("QPS") + plt.ylabel("Average Inter-token Latency (s)") + plt.title("Average Inter-token Latency vs QPS") plt.grid(True) - + # Save the plot - output_file = os.path.join(os.path.dirname(__file__), 'itl_vs_qps.png') + output_file = os.path.join(os.path.dirname(__file__), "itl_vs_qps.png") plt.savefig(output_file) plt.close() print(f"Plot saved to {output_file}") + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/analysis/to_be_incorporated/plot_pd_results.py b/llm_d_benchmark/analysis/to_be_incorporated/plot_pd_results.py similarity index 56% rename from analysis/to_be_incorporated/plot_pd_results.py rename to llm_d_benchmark/analysis/to_be_incorporated/plot_pd_results.py index eab2a3ed..0aa4fec4 100644 --- a/analysis/to_be_incorporated/plot_pd_results.py +++ b/llm_d_benchmark/analysis/to_be_incorporated/plot_pd_results.py @@ -1,123 +1,117 @@ -import json import glob -import numpy as np -import matplotlib.pyplot as plt +import json import os +import matplotlib.pyplot as plt +import numpy as np + + def load_and_average_metrics(directory): """Load all JSON files in a directory and calculate average metrics.""" json_files = glob.glob(os.path.join(directory, "*.json")) print(f"Found {len(json_files)} JSON files in {directory}") - - metrics = { - 'mean_ttft_ms': [], - 'p95_ttft_ms': [], - 'mean_itl_ms': [], - 'p95_itl_ms': [] - } - + + metrics = {"mean_ttft_ms": [], "p95_ttft_ms": [], "mean_itl_ms": [], "p95_itl_ms": []} + for file in json_files: - with open(file, 'r') as f: + with open(file, "r") as f: data = json.load(f) for metric in metrics.keys(): metrics[metric].append(data[metric]) - + # Calculate averages averages = {k: np.mean(v) for k, v in metrics.items()} print(f"Averages for {directory}:", averages) return averages + def plot_comparison(llm_d_metrics, vllm_metrics, title_prefix, output_path): """Create a plot with two subplots comparing TTFT and ITL metrics.""" print(f"\nPlotting comparison for {title_prefix}") print("llm-d metrics:", llm_d_metrics) print("vllm metrics:", vllm_metrics) - + fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6)) - + # Data for plotting metrics = ["Mean", "P95"] x = np.arange(len(metrics)) bar_width = 0.35 - + # TTFT data reshaping - llm_d_ttft = [llm_d_metrics['mean_ttft_ms'], llm_d_metrics['p95_ttft_ms']] - vllm_ttft = [vllm_metrics['mean_ttft_ms'], vllm_metrics['p95_ttft_ms']] - + llm_d_ttft = [llm_d_metrics["mean_ttft_ms"], llm_d_metrics["p95_ttft_ms"]] + vllm_ttft = [vllm_metrics["mean_ttft_ms"], vllm_metrics["p95_ttft_ms"]] + # ITL data reshaping - llm_d_itl = [llm_d_metrics['mean_itl_ms'], llm_d_metrics['p95_itl_ms']] - vllm_itl = [vllm_metrics['mean_itl_ms'], vllm_metrics['p95_itl_ms']] - + llm_d_itl = [llm_d_metrics["mean_itl_ms"], llm_d_metrics["p95_itl_ms"]] + vllm_itl = [vllm_metrics["mean_itl_ms"], vllm_metrics["p95_itl_ms"]] + # TTFT subplot - ax1.bar(x - bar_width/2, llm_d_ttft, bar_width, label='llm-d', color='skyblue', alpha=0.8) - ax1.bar(x + bar_width/2, vllm_ttft, bar_width, label='vLLM v1', color='lightcoral', alpha=0.8) - + ax1.bar(x - bar_width / 2, llm_d_ttft, bar_width, label="llm-d", color="skyblue", alpha=0.8) + ax1.bar(x + bar_width / 2, vllm_ttft, bar_width, label="vLLM v1", color="lightcoral", alpha=0.8) + # Add value labels for i, v in enumerate(llm_d_ttft): - ax1.text(i - bar_width/2, v, f'{v:.1f}', ha='center', va='bottom') + ax1.text(i - bar_width / 2, v, f"{v:.1f}", ha="center", va="bottom") for i, v in enumerate(vllm_ttft): - ax1.text(i + bar_width/2, v, f'{v:.1f}', ha='center', va='bottom') - - ax1.set_xlabel('Metric') - ax1.set_ylabel('Time (ms)') - ax1.set_title(f'{title_prefix} - TTFT') + ax1.text(i + bar_width / 2, v, f"{v:.1f}", ha="center", va="bottom") + + ax1.set_xlabel("Metric") + ax1.set_ylabel("Time (ms)") + ax1.set_title(f"{title_prefix} - TTFT") ax1.set_xticks(x) ax1.set_xticklabels(metrics) ax1.legend() - ax1.grid(True, axis='y', linestyle='--', alpha=0.7) - + ax1.grid(True, axis="y", linestyle="--", alpha=0.7) + # ITL subplot - ax2.bar(x - bar_width/2, llm_d_itl, bar_width, label='llm-d', color='skyblue', alpha=0.8) - ax2.bar(x + bar_width/2, vllm_itl, bar_width, label='vLLM v1', color='lightcoral', alpha=0.8) - + ax2.bar(x - bar_width / 2, llm_d_itl, bar_width, label="llm-d", color="skyblue", alpha=0.8) + ax2.bar(x + bar_width / 2, vllm_itl, bar_width, label="vLLM v1", color="lightcoral", alpha=0.8) + # Add value labels for i, v in enumerate(llm_d_itl): - ax2.text(i - bar_width/2, v, f'{v:.1f}', ha='center', va='bottom') + ax2.text(i - bar_width / 2, v, f"{v:.1f}", ha="center", va="bottom") for i, v in enumerate(vllm_itl): - ax2.text(i + bar_width/2, v, f'{v:.1f}', ha='center', va='bottom') - - ax2.set_xlabel('Metric') - ax2.set_ylabel('Time (ms)') - ax2.set_title(f'{title_prefix} - ITL') + ax2.text(i + bar_width / 2, v, f"{v:.1f}", ha="center", va="bottom") + + ax2.set_xlabel("Metric") + ax2.set_ylabel("Time (ms)") + ax2.set_title(f"{title_prefix} - ITL") ax2.set_xticks(x) ax2.set_xticklabels(metrics) ax2.legend() - ax2.grid(True, axis='y', linestyle='--', alpha=0.7) - + ax2.grid(True, axis="y", linestyle="--", alpha=0.7) + plt.tight_layout() print(f"Saving plot to {output_path}") - plt.savefig(output_path, dpi=300, bbox_inches='tight') + plt.savefig(output_path, dpi=300, bbox_inches="tight") plt.close() + def main(): script_dir = os.path.dirname(os.path.abspath(__file__)) base_dir = os.path.join(script_dir, "../../collected/data/openshift/exp-7/H100") output_dir = os.path.join(script_dir, "../../") # or wherever you want the plots os.makedirs(output_dir, exist_ok=True) - + # Load metrics for all setups llm_d_1p1d = load_and_average_metrics(os.path.join(base_dir, "llm-d-1p1d")) vllm_2replicas = load_and_average_metrics(os.path.join(base_dir, "vllm-2replicas")) llm_d_2p1d = load_and_average_metrics(os.path.join(base_dir, "llm-d-2p1d")) vllm_3replicas = load_and_average_metrics(os.path.join(base_dir, "vllm-3replicas")) - + # Plot 1P1D vs 2 Replicas comparison plot_comparison( - llm_d_1p1d, - vllm_2replicas, - "1P1D vs 2 Replicas", - os.path.join(output_dir, 'comparison_1p1d_vs_2replicas.png') + llm_d_1p1d, vllm_2replicas, "1P1D vs 2 Replicas", os.path.join(output_dir, "comparison_1p1d_vs_2replicas.png") ) - + # Plot 2P1D vs 3 Replicas comparison plot_comparison( - llm_d_2p1d, - vllm_3replicas, - "2P1D vs 3 Replicas", - os.path.join(output_dir, 'comparison_2p1d_vs_3replicas.png') + llm_d_2p1d, vllm_3replicas, "2P1D vs 3 Replicas", os.path.join(output_dir, "comparison_2p1d_vs_3replicas.png") ) - + print(f"Plots have been saved to {output_dir}") + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/analysis/to_be_incorporated/plot_throughput_vs_qps.py b/llm_d_benchmark/analysis/to_be_incorporated/plot_throughput_vs_qps.py similarity index 71% rename from analysis/to_be_incorporated/plot_throughput_vs_qps.py rename to llm_d_benchmark/analysis/to_be_incorporated/plot_throughput_vs_qps.py index 7a95d668..7ad3c287 100644 --- a/analysis/to_be_incorporated/plot_throughput_vs_qps.py +++ b/llm_d_benchmark/analysis/to_be_incorporated/plot_throughput_vs_qps.py @@ -1,81 +1,86 @@ -import pandas as pd -import matplotlib.pyplot as plt import glob import os import re +import matplotlib.pyplot as plt +import pandas as pd + + def extract_qps(filename): # Extract QPS value from filename (e.g., LMBench_sharegpt_output_0.5.csv -> 0.5) - match = re.search(r'output_(\d+\.?\d*)\.csv', filename) + match = re.search(r"output_(\d+\.?\d*)\.csv", filename) if match: return float(match.group(1)) return None + def calculate_throughput(df): # Calculate total tokens (input + output) - total_tokens = df['prompt_tokens'].sum() + df['generation_tokens'].sum() - + total_tokens = df["prompt_tokens"].sum() + df["generation_tokens"].sum() + # Calculate total time (latest finish time - earliest launch time) - total_time = df['finish_time'].max() - df['launch_time'].min() - + total_time = df["finish_time"].max() - df["launch_time"].min() + # Calculate throughput (tokens per second) return total_tokens / total_time + def main(): # Get all CSV files matching the pattern - data_dir = os.path.join(os.path.dirname(__file__), '..', 'data', 'k8s', 'lmbenchmark') - csv_files = glob.glob(os.path.join(data_dir, 'LMBench_sharegpt_output_*.csv')) - + data_dir = os.path.join(os.path.dirname(__file__), "..", "data", "k8s", "lmbenchmark") + csv_files = glob.glob(os.path.join(data_dir, "LMBench_sharegpt_output_*.csv")) + if not csv_files: print(f"No CSV files found in {data_dir}") return - + # Store results qps_values = [] throughput_values = [] - + # Process each file for file in sorted(csv_files): qps = extract_qps(file) if qps is None: print(f"Could not extract QPS from filename: {file}") continue - + try: # Read CSV file df = pd.read_csv(file) - + # Calculate throughput throughput = calculate_throughput(df) - + qps_values.append(qps) throughput_values.append(throughput) print(f"Processed {file}: QPS={qps}, Throughput={throughput:.2f} tokens/s") except Exception as e: print(f"Error processing {file}: {str(e)}") continue - + if not qps_values: print("No valid data found in any CSV files") return - + # Sort QPS and throughput values sorted_pairs = sorted(zip(qps_values, throughput_values)) qps_values, throughput_values = zip(*sorted_pairs) - + # Create the plot plt.figure(figsize=(10, 6)) - plt.plot(qps_values, throughput_values, 'go-', linewidth=2, markersize=8) - plt.xlabel('QPS') - plt.ylabel('Throughput (tokens/s)') - plt.title('Throughput vs QPS') + plt.plot(qps_values, throughput_values, "go-", linewidth=2, markersize=8) + plt.xlabel("QPS") + plt.ylabel("Throughput (tokens/s)") + plt.title("Throughput vs QPS") plt.grid(True) - + # Save the plot - output_file = os.path.join(os.path.dirname(__file__), 'throughput_vs_qps.png') + output_file = os.path.join(os.path.dirname(__file__), "throughput_vs_qps.png") plt.savefig(output_file) plt.close() print(f"Plot saved to {output_file}") + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/analysis/to_be_incorporated/plot_ttft_vs_qps.py b/llm_d_benchmark/analysis/to_be_incorporated/plot_ttft_vs_qps.py similarity index 70% rename from analysis/to_be_incorporated/plot_ttft_vs_qps.py rename to llm_d_benchmark/analysis/to_be_incorporated/plot_ttft_vs_qps.py index 170f7df7..cf5b7e90 100644 --- a/analysis/to_be_incorporated/plot_ttft_vs_qps.py +++ b/llm_d_benchmark/analysis/to_be_incorporated/plot_ttft_vs_qps.py @@ -1,71 +1,75 @@ -import pandas as pd -import matplotlib.pyplot as plt import glob import os import re +import matplotlib.pyplot as plt +import pandas as pd + + def extract_qps(filename): # Extract QPS value from filename (e.g., LMBench_sharegpt_output_0.5.csv -> 0.5) - match = re.search(r'output_(\d+\.?\d*)\.csv', filename) + match = re.search(r"output_(\d+\.?\d*)\.csv", filename) if match: return float(match.group(1)) return None + def main(): # Get all CSV files matching the pattern - data_dir = os.path.join(os.path.dirname(__file__), '..', 'data', 'k8s', 'lmbenchmark') - csv_files = glob.glob(os.path.join(data_dir, 'LMBench_sharegpt_output_*.csv')) - + data_dir = os.path.join(os.path.dirname(__file__), "..", "data", "k8s", "lmbenchmark") + csv_files = glob.glob(os.path.join(data_dir, "LMBench_sharegpt_output_*.csv")) + if not csv_files: print(f"No CSV files found in {data_dir}") return - + # Store results qps_values = [] avg_ttft_values = [] - + # Process each file for file in sorted(csv_files): qps = extract_qps(file) if qps is None: print(f"Could not extract QPS from filename: {file}") continue - + try: # Read CSV file df = pd.read_csv(file) - + # Calculate average TTFT - avg_ttft = df['ttft'].mean() - + avg_ttft = df["ttft"].mean() + qps_values.append(qps) avg_ttft_values.append(avg_ttft) print(f"Processed {file}: QPS={qps}, Avg TTFT={avg_ttft:.4f}s") except Exception as e: print(f"Error processing {file}: {str(e)}") continue - + if not qps_values: print("No valid data found in any CSV files") return - + # Sort QPS and TTFT values sorted_pairs = sorted(zip(qps_values, avg_ttft_values)) qps_values, avg_ttft_values = zip(*sorted_pairs) - + # Create the plot plt.figure(figsize=(10, 6)) - plt.plot(qps_values, avg_ttft_values, 'ro-', linewidth=2, markersize=8) - plt.xlabel('QPS') - plt.ylabel('Average Time to First Token (s)') - plt.title('Average Time to First Token vs QPS') + plt.plot(qps_values, avg_ttft_values, "ro-", linewidth=2, markersize=8) + plt.xlabel("QPS") + plt.ylabel("Average Time to First Token (s)") + plt.title("Average Time to First Token vs QPS") plt.grid(True) - + # Save the plot - output_file = os.path.join(os.path.dirname(__file__), 'ttft_vs_qps.png') + output_file = os.path.join(os.path.dirname(__file__), "ttft_vs_qps.png") plt.savefig(output_file) plt.close() print(f"Plot saved to {output_file}") + if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/analysis/to_be_incorporated/requirements.txt b/llm_d_benchmark/analysis/to_be_incorporated/requirements.txt similarity index 100% rename from analysis/to_be_incorporated/requirements.txt rename to llm_d_benchmark/analysis/to_be_incorporated/requirements.txt diff --git a/analysis/vllm-benchmark-analyze_results.sh b/llm_d_benchmark/analysis/vllm-benchmark-analyze_results.sh similarity index 100% rename from analysis/vllm-benchmark-analyze_results.sh rename to llm_d_benchmark/analysis/vllm-benchmark-analyze_results.sh diff --git a/config_explorer/Home.py b/llm_d_benchmark/config_explorer/Home.py similarity index 68% rename from config_explorer/Home.py rename to llm_d_benchmark/config_explorer/Home.py index 195e38a3..aa827ce6 100644 --- a/config_explorer/Home.py +++ b/llm_d_benchmark/config_explorer/Home.py @@ -2,18 +2,43 @@ Main Page """ -from matplotlib import pyplot as plt -import streamlit as st -import db -import util -from src.config_explorer.capacity_planner import * from decimal import Decimal +import streamlit as st +from matplotlib import pyplot as plt + +from llm_d_benchmark.config_explorer import db, util +from llm_d_benchmark.config_explorer.capacity_planner import ( + AttentionType, + KVCacheDetail, + allocatable_kv_cache_memory, + available_gpu_memory, + experts_per_ep_group, + find_possible_tp, + get_ep_size, + get_model_config_from_hf, + get_model_info_from_hf, + get_num_experts, + get_text_config, + gpus_required, + inference_dtype, + is_moe, + kv_cache_req, + max_concurrent_requests, + max_context_len, + model_memory_req, + parameter_memory_req, + per_gpu_model_memory_required, + precision_to_byte, +) + + def update_gpu_spec(): """ Update user selected GPU spec in session state """ - st.session_state['scenario'].gpu_spec = st.session_state['gpu_spec'][st.session_state['selected_gpu_spec']] + st.session_state["scenario"].gpu_spec = st.session_state["gpu_spec"][st.session_state["selected_gpu_spec"]] + @st.dialog("Register a new accelerator") def register_new_accelerator(): @@ -25,13 +50,10 @@ def register_new_accelerator(): if st.button("Register", use_container_width=True): if acc_name: - - db.gpu_specs[acc_name] = { - "name": acc_name, - "memory": acc_mem - } + db.gpu_specs[acc_name] = {"name": acc_name, "memory": acc_mem} st.rerun() + def model_specification(): """ Get model inputs like model name, precision @@ -44,11 +66,12 @@ def model_specification(): with st.container(border=True): st.write("**Model Specification**") - selected_model = st.text_input("Model (Hugging Face format)", - value=user_scenario.get_model_name(), - key=util.SELECTED_MODEL_KEY, - on_change=util.on_update_model_name, - ) + selected_model = st.text_input( + "Model (Hugging Face format)", + value=user_scenario.get_model_name(), + key=util.SELECTED_MODEL_KEY, + on_change=util.on_update_model_name, + ) hf_token = None if selected_model and selected_model != "": @@ -83,7 +106,9 @@ def model_specification(): try: model_gpu_memory_req = round(model_memory_req(model_info), 2) except Exception as e: - st.warning(f"Cannot retrieve relevant information about the model, {e}. The Capacity Planner only has partial information and functionality.") + st.warning( + f"Cannot retrieve relevant information about the model, {e}. The Capacity Planner only has partial information and functionality." + ) return None # Display first precision @@ -91,7 +116,9 @@ def model_specification(): col1.info(f"Size of model in memory: ~{model_gpu_memory_req} GB") with col2.expander("See how model size is calculated below"): - st.write("""Below shows how model memory is estimated. The number of parameters and precision are fetched from Hugging Face. Common data types include `BF16` (floating point 16-bit) and `F8_E4M3` (floating point 8-bit, 4 for exponents and 3 for mantissa). The total is then summed.""") + st.write( + """Below shows how model memory is estimated. The number of parameters and precision are fetched from Hugging Face. Common data types include `BF16` (floating point 16-bit) and `F8_E4M3` (floating point 8-bit, 4 for exponents and 3 for mantissa). The total is then summed.""" + ) data_types = [] bytes_list = [] @@ -118,11 +145,14 @@ def model_specification(): } st.dataframe(data, hide_index=True) - st.write("In addition, vLLM [profiles memory](https://github.com/vllm-project/vllm/blob/dcf2f3ec067711ff69e5ab7478fca6ffb4f11daf/vllm/worker/worker.py#L229) by doing a forward pass with `--max-model-len` with dummy data to estimate the non-torch and torch activation peak memory consumption. This means the estimation of the model memory is actually an underestimation. Estimating intermediate memory footprint is currently work in progress.") + st.write( + "In addition, vLLM [profiles memory](https://github.com/vllm-project/vllm/blob/dcf2f3ec067711ff69e5ab7478fca6ffb4f11daf/vllm/worker/worker.py#L229) by doing a forward pass with `--max-model-len` with dummy data to estimate the non-torch and torch activation peak memory consumption. This means the estimation of the model memory is actually an underestimation. Estimating intermediate memory footprint is currently work in progress." + ) else: return None + def parallelism_specification(): """ Parallelism configuration @@ -141,31 +171,34 @@ def parallelism_specification(): # Display some useful info col1, col2 = st.columns(2) possible_tp_sizes = find_possible_tp(model_config) - tp_size = col1.selectbox("Tensor parallel size (shard model weights across GPUs)", - options=possible_tp_sizes, - index=possible_tp_sizes.index(user_scenario.tp_size), - key=util.SELECTED_TP_SIZE_KEY, - help=f"Must be divisible by the number of attention heads (`{model_config.num_attention_heads}` for this model)", - on_change=util.on_update_parallelism, - args=[util.SELECTED_TP_SIZE_KEY, "tp_size"] - ) - pp_size = col2.number_input("Pipeline parallel size (shard layers across GPUs)", - min_value=1, - max_value=model_config.num_hidden_layers, - key=util.SELECTED_PP_SIZE_KEY, - value=user_scenario.pp_size, - help=f"This number is capped by the number of hidden layers (`{model_config.num_hidden_layers}` for this model). \ + tp_size = col1.selectbox( + "Tensor parallel size (shard model weights across GPUs)", + options=possible_tp_sizes, + index=possible_tp_sizes.index(user_scenario.tp_size), + key=util.SELECTED_TP_SIZE_KEY, + help=f"Must be divisible by the number of attention heads (`{model_config.num_attention_heads}` for this model)", + on_change=util.on_update_parallelism, + args=[util.SELECTED_TP_SIZE_KEY, "tp_size"], + ) + pp_size = col2.number_input( + "Pipeline parallel size (shard layers across GPUs)", + min_value=1, + max_value=model_config.num_hidden_layers, + key=util.SELECTED_PP_SIZE_KEY, + value=user_scenario.pp_size, + help=f"This number is capped by the number of hidden layers (`{model_config.num_hidden_layers}` for this model). \ Also, vLLM handles uneven splits, see the [documentation](https://docs.vllm.ai/en/latest/api/vllm/distributed/index.html#vllm.distributed.get_pp_indices)", - on_change=util.on_update_parallelism, - args=[util.SELECTED_PP_SIZE_KEY, "pp_size"] - ) - dp_size = col1.number_input("Data parallel size (replicas of model)", - min_value=1, - key=util.SELECTED_DP_SIZE_KEY, - value=user_scenario.dp_size, - on_change=util.on_update_parallelism, - args=[util.SELECTED_DP_SIZE_KEY, "dp_size"] - ) + on_change=util.on_update_parallelism, + args=[util.SELECTED_PP_SIZE_KEY, "pp_size"], + ) + dp_size = col1.number_input( + "Data parallel size (replicas of model)", + min_value=1, + key=util.SELECTED_DP_SIZE_KEY, + value=user_scenario.dp_size, + on_change=util.on_update_parallelism, + args=[util.SELECTED_DP_SIZE_KEY, "dp_size"], + ) # Enable EP is_moe_model = is_moe(model_config) @@ -177,14 +210,15 @@ def parallelism_specification(): Tensor parallelism splits expert weights across GPUs. Expert parallelism splits incoming token's hidden state across GPUs. In vLLM, enabling data parallelism on MoE models essentially achieves the latter purpose. """ - enable_ep = col2.toggle("Enable expert parallelism", - value=user_scenario.enable_ep, - disabled=not is_moe_model, - help=help, - key=util.SELECTED_ENABLE_EP_KEY, - on_change=util.update_scenario, - args=[util.SELECTED_ENABLE_EP_KEY, "enable_ep"] - ) + enable_ep = col2.toggle( + "Enable expert parallelism", + value=user_scenario.enable_ep, + disabled=not is_moe_model, + help=help, + key=util.SELECTED_ENABLE_EP_KEY, + on_change=util.update_scenario, + args=[util.SELECTED_ENABLE_EP_KEY, "enable_ep"], + ) if enable_ep: total_experts = get_num_experts(model_config) ep_size = get_ep_size(tp_size, dp_size) @@ -196,11 +230,14 @@ def parallelism_specification(): `EP size = (TP x DP) = {ep_size}`, meaning each group will get `{total_experts} / {ep_size} = {experts_per_ep_str}` experts per group. """) if experts_per_ep < 1: - col2.warning("Since some EP groups will get 0 expert, this is an under-utilization of GPU resources. We recommend decreasing TP or DP for better use of your accelerators.") - - if not Decimal(experts_per_ep) % 1 == 0: - col2.caption("The total number of experts is not divisible by EP size you selected. However, vLLM handles uneven split of experts (see this [PR](https://github.com/vllm-project/vllm/pull/21497)), so some EP groups will have fewer experts than others.") + col2.warning( + "Since some EP groups will get 0 expert, this is an under-utilization of GPU resources. We recommend decreasing TP or DP for better use of your accelerators." + ) + if Decimal(experts_per_ep) % 1 != 0: + col2.caption( + "The total number of experts is not divisible by EP size you selected. However, vLLM handles uneven split of experts (see this [PR](https://github.com/vllm-project/vllm/pull/21497)), so some EP groups will have fewer experts than others." + ) st.info(f"GPUs required (`TP x PP x DP`): `{gpus_required(tp_size, pp_size, dp_size)}`") @@ -229,7 +266,9 @@ def workload_specification(): st.warning("Model config not available, cannot estimate KV cache size.") return None - st.caption(f"Estimate KV cache memory requirements for the selected model based on workload. Note that the model uses data type of `{inference_dtype(model_config)}` for KV cache during inference.") + st.caption( + f"Estimate KV cache memory requirements for the selected model based on workload. Note that the model uses data type of `{inference_dtype(model_config)}` for KV cache during inference." + ) col1, col2 = st.columns(2) @@ -241,21 +280,24 @@ def workload_specification(): value=user_scenario.max_model_len, key=util.SELECTED_MAX_MODEL_LEN_KEY, on_change=util.on_update_max_model_len, - ) - col1.caption("Maximum model length for the model: how many tokens (input + output) the model can process. \ + ) + col1.caption( + "Maximum model length for the model: how many tokens (input + output) the model can process. \ Higher max model length means fewer concurrent requests can be served, \ because for the same GPU memory available for KV cache, \ each request requires more memory allocation. \ -") +" + ) - col2.number_input("Input the max number of concurrent requests to process", + col2.number_input( + "Input the max number of concurrent requests to process", min_value=0, step=1, key=util.SELECTED_CONCURRENCY_KEY, value=user_scenario.concurrency, on_change=util.update_scenario, - args=[util.SELECTED_CONCURRENCY_KEY, "concurrency"] - ) + args=[util.SELECTED_CONCURRENCY_KEY, "concurrency"], + ) try: max_concurrent_requests_num = max_concurrent_requests( @@ -270,7 +312,9 @@ def workload_specification(): ) except Exception: - col2.warning("Model does not have safetensors data available, cannot estimate KV cache memory requirement.") + col2.warning( + "Model does not have safetensors data available, cannot estimate KV cache memory requirement." + ) return None try: @@ -284,7 +328,9 @@ def workload_specification(): col2.warning(f"There is not enough information to estimate KV cache requirement per request: {e}") return None - col2.info(f"Assuming the worst case scenario, such that every request contains `--max-model-len` tokens, each request takes {util.pretty_round(kv_details.per_request_kv_cache_gb)} GB for KV cache, which means the maximum concurrent requests that can be processed is {max_concurrent_requests_num}.") + col2.info( + f"Assuming the worst case scenario, such that every request contains `--max-model-len` tokens, each request takes {util.pretty_round(kv_details.per_request_kv_cache_gb)} GB for KV cache, which means the maximum concurrent requests that can be processed is {max_concurrent_requests_num}." + ) # Display details on how KV cache is estimated with st.expander("See how KV cache is calculated below"): @@ -335,7 +381,6 @@ def workload_specification(): """) - def hardware_specification(): """ Get hardware inputs like name and number of accelerators available @@ -344,9 +389,7 @@ def hardware_specification(): user_scenario = st.session_state[util.USER_SCENARIO_KEY] model_info = user_scenario.model_info model_config = user_scenario.model_config - text_config = user_scenario.text_config - concurrency = user_scenario.concurrency tp = user_scenario.tp_size pp = user_scenario.pp_size dp = user_scenario.dp_size @@ -354,7 +397,9 @@ def hardware_specification(): # Hardware with st.container(border=True): st.write("**Hardware Specification**") - st.caption("Identify suitable accelerators for serving the model based on parallelism optimization and workload.") + st.caption( + "Identify suitable accelerators for serving the model based on parallelism optimization and workload." + ) if model_config is None: st.warning("Model config not found.") @@ -366,23 +411,25 @@ def hardware_specification(): if user_scenario.gpu_name in db.gpu_specs.keys(): index = list(db.gpu_specs.keys()).index(user_scenario.gpu_name) - col1.number_input("GPU utilization ratio", - key=util.SELECTED_GPU_MEMORY_UTIL_KEY, - value=user_scenario.gpu_mem_util, - min_value=0.0, - step=0.01, - on_change=util.update_scenario, - args=[util.SELECTED_GPU_MEMORY_UTIL_KEY, "gpu_mem_util"] - ) + col1.number_input( + "GPU utilization ratio", + key=util.SELECTED_GPU_MEMORY_UTIL_KEY, + value=user_scenario.gpu_mem_util, + min_value=0.0, + step=0.01, + on_change=util.update_scenario, + args=[util.SELECTED_GPU_MEMORY_UTIL_KEY, "gpu_mem_util"], + ) # Select GPU type - selected_gpu_name = col1.selectbox("Accelerator", - key=util.SELECTED_GPU_NAME_KEY, - index=index, - options=db.gpu_specs, - on_change=util.update_scenario, - args=[util.SELECTED_GPU_NAME_KEY, "gpu_name"], - ) + selected_gpu_name = col1.selectbox( + "Accelerator", + key=util.SELECTED_GPU_NAME_KEY, + index=index, + options=db.gpu_specs, + on_change=util.update_scenario, + args=[util.SELECTED_GPU_NAME_KEY, "gpu_name"], + ) # Dialog for registering new accelerator data col2.write("\n\nDon't see your accelerator? Register a new one below") @@ -391,7 +438,6 @@ def hardware_specification(): # For the selected GPU, show memory requirements if selected_gpu_name: - # Get info gpu_memory = user_scenario.get_gpu_memory(db.gpu_specs) available_gpu_count = gpus_required(tp, pp, dp) @@ -404,18 +450,21 @@ def hardware_specification(): return None model_size_per_gpu = per_gpu_model_memory_required(model_info, tp, pp) - allocatable_kv_cache = allocatable_kv_cache_memory(model_info, - model_config, - gpu_memory, - user_scenario.gpu_mem_util, - tp, - pp, - dp, - ) - kv_details = KVCacheDetail(model_info, model_config, - user_scenario.max_model_len, - user_scenario.concurrency, - ) + allocatable_kv_cache = allocatable_kv_cache_memory( + model_info, + model_config, + gpu_memory, + user_scenario.gpu_mem_util, + tp, + pp, + dp, + ) + kv_details = KVCacheDetail( + model_info, + model_config, + user_scenario.max_model_len, + user_scenario.concurrency, + ) per_request_kv_cache_memory = kv_details.per_request_kv_cache_gb all_request_kv_cache_memory = kv_details.kv_cache_size_gb @@ -475,11 +524,12 @@ def hardware_specification(): --pipeline-parallel-size {pp} \\ --data-parallel-size {user_scenario.dp_size}""" if user_scenario.enable_ep: - vllm_serve_cmd += f""" \\ + vllm_serve_cmd += """ \\ --enable-expert-parallel """ col2.code(vllm_serve_cmd) + def memory_util_chart(st_context): """ Show memory utilization chart @@ -488,7 +538,6 @@ def memory_util_chart(st_context): user_scenario = st.session_state[util.USER_SCENARIO_KEY] model_info = user_scenario.model_info model_config = user_scenario.model_config - text_config = user_scenario.text_config gpu_memory = user_scenario.get_gpu_memory(db.gpu_specs) gpu_memory_util = user_scenario.gpu_mem_util concurrency = user_scenario.concurrency @@ -510,7 +559,12 @@ def memory_util_chart(st_context): # Display chart iff model and cache size are selected labels = ["Model", "KV Cache", "Free", "Reserved"] - sizes = [util.pretty_round(model_size), util.pretty_round(max_concurrency_kv_cache), util.pretty_round(free), util.pretty_round(reserved)] + sizes = [ + util.pretty_round(model_size), + util.pretty_round(max_concurrency_kv_cache), + util.pretty_round(free), + util.pretty_round(reserved), + ] colors = ["#ff9999", "#66b3ff", "#99ff99", "#808080"] # Create donut chart @@ -518,14 +572,16 @@ def memory_util_chart(st_context): wedges, texts = ax.pie( sizes, colors=colors, - startangle=90, # Start at top - wedgeprops=dict(width=0.4), # <-- Makes it a donut, - labeldistance=1.1, # Push labels outward - pctdistance=0.7, # Adjust percentage position + startangle=90, # Start at top + wedgeprops={"width": 0.4}, # <-- Makes it a donut, + labeldistance=1.1, # Push labels outward + pctdistance=0.7, # Adjust percentage position ) # Add total as text in the center of the donut - ax.text(0, 0, f"Total\n{util.pretty_round(total_memory)} GB", ha="center", va="center", fontsize=12, fontweight="bold") + ax.text( + 0, 0, f"Total\n{util.pretty_round(total_memory)} GB", ha="center", va="center", fontsize=12, fontweight="bold" + ) # Create a custom legend, including the total legend_labels = [f"{labels[i]}: {sizes[i]} GB" for i in range(len(labels))] @@ -536,34 +592,40 @@ def memory_util_chart(st_context): legend_labels, title="Total Storage Breakdown", loc="center left", - bbox_to_anchor=(1, 0, 0.5, 1) + bbox_to_anchor=(1, 0, 0.5, 1), ) # Render in Streamlit - _, col, _ = st_context.columns([.5, 1, .5]) + _, col, _ = st_context.columns([0.5, 1, 0.5]) with col: st.pyplot(fig, bbox_inches="tight") -if __name__ == '__main__': +if __name__ == "__main__": # Set up streamlit config - st.set_page_config(page_title="Configuration Explorer", - page_icon=None, - layout="wide", - initial_sidebar_state="expanded", - menu_items=None) + st.set_page_config( + page_title="Configuration Explorer", + page_icon=None, + layout="wide", + initial_sidebar_state="expanded", + menu_items=None, + ) st.title("Configuration Explorer") - st.caption("This tool helps you find the most cost-effective, optimal configuration for serving models on llm-d based on hardware specification, workload characteristics, and SLO requirements.") + st.caption( + "This tool helps you find the most cost-effective, optimal configuration for serving models on llm-d based on hardware specification, workload characteristics, and SLO requirements." + ) util.init_session_state() # Display Capacity Planner headings st.subheader("Capacity Planner") - st.caption("Determine how many GPUs you need to fit your model and how many requests can be served at once depending on request patterns.") + st.caption( + "Determine how many GPUs you need to fit your model and how many requests can be served at once depending on request patterns." + ) # Get user inputs and show outputs model_specification() parallelism_specification() workload_specification() - hardware_specification() \ No newline at end of file + hardware_specification() diff --git a/config_explorer/README.md b/llm_d_benchmark/config_explorer/README.md similarity index 100% rename from config_explorer/README.md rename to llm_d_benchmark/config_explorer/README.md diff --git a/config_explorer/__init__.py b/llm_d_benchmark/config_explorer/__init__.py similarity index 100% rename from config_explorer/__init__.py rename to llm_d_benchmark/config_explorer/__init__.py diff --git a/config_explorer/src/config_explorer/capacity_planner.py b/llm_d_benchmark/config_explorer/capacity_planner.py similarity index 79% rename from config_explorer/src/config_explorer/capacity_planner.py rename to llm_d_benchmark/config_explorer/capacity_planner.py index 552545ba..5b3ba748 100644 --- a/config_explorer/src/config_explorer/capacity_planner.py +++ b/llm_d_benchmark/config_explorer/capacity_planner.py @@ -2,14 +2,15 @@ Capacity planner provides functionality to estimate the minimum number of GPUs required for loading model and KV cache """ +import math +import re from dataclasses import dataclass from enum import StrEnum -import math from functools import reduce -import re -from typing import List + from huggingface_hub import HfApi, ModelInfo -from transformers import AutoConfig, AutoModel +from transformers import AutoConfig + class AttentionType(StrEnum): """ @@ -21,6 +22,7 @@ class AttentionType(StrEnum): GQA = "Grouped-query attention" MQA = "Multi-query attention" + @dataclass class KVCacheDetail: # Required inputs from model config @@ -38,8 +40,8 @@ class KVCacheDetail: num_attention_group: int per_token_memory_bytes: int per_request_kv_cache_bytes: int - per_request_kv_cache_gb: float # Single request kv cache - kv_cache_size_gb: float # Batch size kv cache + per_request_kv_cache_gb: float # Single request kv cache + kv_cache_size_gb: float # Batch size kv cache # Workload inputs context_len: int = 1 @@ -49,7 +51,7 @@ class KVCacheDetail: kv_lora_rank: int | None = None qk_rope_head_dim: int | None = None - def __init__(self, model_info: ModelInfo, model_config: AutoConfig, context_len: int=1, batch_size: int=1): + def __init__(self, model_info: ModelInfo, model_config: AutoConfig, context_len: int = 1, batch_size: int = 1): """ KVCacheDetail stores information that are relevant to calculating KV cache memory requirement """ @@ -61,8 +63,7 @@ def __init__(self, model_info: ModelInfo, model_config: AutoConfig, context_len: self.hidden_size = model_config.hidden_size self.num_attention_heads = model_config.num_attention_heads self.num_key_value_heads = model_config.num_key_value_heads - self.head_dimension = getattr(model_config, - "head_dim", self.hidden_size / self.num_attention_heads) + self.head_dimension = getattr(model_config, "head_dim", self.hidden_size / self.num_attention_heads) # Determine attention type if use_mla(self.model): @@ -100,21 +101,30 @@ def set_batch_size(self, batch_size: int): self.__recalculate() def __recalculate(self): - """" + """ " Recalculates per token memory, kv cache size in bytes, and in GB """ # Calculate per token memory bytes depending on attention type if self.attention_type == AttentionType.MLA: - self.per_token_memory_bytes = self.num_hidden_layers * (self.kv_lora_rank + self.qk_rope_head_dim) * self.precision_in_bytes + self.per_token_memory_bytes = ( + self.num_hidden_layers * (self.kv_lora_rank + self.qk_rope_head_dim) * self.precision_in_bytes + ) else: self.num_attention_group = int(self.num_attention_heads / self.num_key_value_heads) - self.per_token_memory_bytes = self.num_hidden_layers * 2 * self.head_dimension * (self.num_key_value_heads / self.num_attention_group) * self.precision_in_bytes + self.per_token_memory_bytes = ( + self.num_hidden_layers + * 2 + * self.head_dimension + * (self.num_key_value_heads / self.num_attention_group) + * self.precision_in_bytes + ) # Calculate kv cache size in bytes and in gb self.per_request_kv_cache_bytes = self.per_token_memory_bytes * self.context_len - self.per_request_kv_cache_gb = self.per_request_kv_cache_bytes / (1024 ** 3) + self.per_request_kv_cache_gb = self.per_request_kv_cache_bytes / (1024**3) self.kv_cache_size_gb = self.per_request_kv_cache_gb * self.batch_size + # Model def get_model_info_from_hf(model_name: str, hf_token: str | None = None) -> ModelInfo: """ @@ -124,7 +134,8 @@ def get_model_info_from_hf(model_name: str, hf_token: str | None = None) -> Mode model_info = api.model_info(model_name) return model_info -def get_model_config_from_hf(model_name: str, hf_token: str=None) -> AutoConfig: + +def get_model_config_from_hf(model_name: str, hf_token: str | None = None) -> AutoConfig: """ Returns LLM model config """ @@ -137,6 +148,7 @@ def get_model_config_from_hf(model_name: str, hf_token: str=None) -> AutoConfig: return model_config + def get_text_config(model_config: AutoConfig) -> dict: """ Returns text config (for LLMs) @@ -150,18 +162,21 @@ def get_text_config(model_config: AutoConfig) -> dict: return model_config + def model_total_params(model_info: ModelInfo) -> int: """ Returns the total parameters of the model """ return model_info.safetensors.total + def max_context_len(model_config: AutoConfig) -> int: """ Returns the max context length accepted by model """ return model_config.max_position_embeddings + def __estimate_vllm_non_torch_memory() -> int: """ Estimate non-torch memory consumption. @@ -170,10 +185,8 @@ def __estimate_vllm_non_torch_memory() -> int: return 1 -def __estimate_vllm_peak_memory(config: AutoConfig, - seq_len: int, - batch_size=1, - include_hidden=True): + +def __estimate_vllm_peak_memory(config: AutoConfig, seq_len: int, batch_size=1, include_hidden=True): """ Estimate peak activation memory for vLLM inference in bytes without running PyTorch. """ @@ -192,6 +205,7 @@ def __estimate_vllm_peak_memory(config: AutoConfig, total_bytes = kv_bytes + hidden_bytes return total_bytes + def precision_to_byte(precision: str) -> int: """ Returns the byte requirement for a parameter for the highest precision of the model @@ -206,7 +220,6 @@ def precision_to_byte(precision: str) -> int: "F8_E5M2": 1, "F8_E4M3": 1, "FP4": 0.5, - # Integers "I64": 8, "INT64": 8, @@ -220,7 +233,6 @@ def precision_to_byte(precision: str) -> int: "U4": 0.5, "I4": 0.5, "INT4": 0.5, - # Boolean "BOOL": 1, # stored as byte per element } @@ -237,13 +249,15 @@ def precision_to_byte(precision: str) -> int: raise ValueError("Unsupported precision type.") + def parameter_memory_req(parameter: int, precision: str) -> float: """ Calculates the memory requirement (in GiB) for the number of parameters for the specified precision """ precision_byte = precision_to_byte(precision) - return parameter * precision_byte / (1024 ** 3) + return parameter * precision_byte / (1024**3) + def model_memory_req(model_info: ModelInfo) -> float: """ @@ -257,6 +271,7 @@ def model_memory_req(model_info: ModelInfo) -> float: return memory + def inference_dtype(model_config: AutoConfig) -> str: """ Returns the inference KV cache data type used @@ -267,6 +282,7 @@ def inference_dtype(model_config: AutoConfig) -> str: return str(model_config.torch_dtype) + def use_mla(model_name: str) -> bool: """ Returns true for models that use MLA attention @@ -280,33 +296,32 @@ def use_mla(model_name: str) -> bool: return any(deepseek in model_name for deepseek in deepseek_mla_models) -def kv_cache_req(model_info: ModelInfo, - model_config: AutoConfig, - context_len: int, - batch_size: int = 1, - ) -> float: + +def kv_cache_req( + model_info: ModelInfo, + model_config: AutoConfig, + context_len: int, + batch_size: int = 1, +) -> float: """ Calculates the KV cache requirement in GiB """ return KVCacheDetail(model_info, model_config, context_len, batch_size).kv_cache_size_gb -def max_concurrent_requests(model_info: ModelInfo, - model_config: AutoConfig, - max_model_len: int, - gpu_memory: int, - gpu_mem_util: float=0.9, - tp: int=1, - pp: int=1, - dp: int=1, - ) -> int: +def max_concurrent_requests( + model_info: ModelInfo, + model_config: AutoConfig, + max_model_len: int, + gpu_memory: int, + gpu_mem_util: float = 0.9, + tp: int = 1, + pp: int = 1, + dp: int = 1, +) -> int: # Find allocatable memory for KV cache - kv_cache_allocatable = allocatable_kv_cache_memory( - model_info, model_config, - gpu_memory, gpu_mem_util, - tp, pp, dp - ) + kv_cache_allocatable = allocatable_kv_cache_memory(model_info, model_config, gpu_memory, gpu_mem_util, tp, pp, dp) # Find kv cache requirement for one request of max-model-len per_request_kv_cache_req = kv_cache_req(model_info, model_config, max_model_len) @@ -314,7 +329,8 @@ def max_concurrent_requests(model_info: ModelInfo, return 0 return max(0, math.floor(kv_cache_allocatable / per_request_kv_cache_req)) -def find_possible_tp(model_config: AutoConfig) -> List[int]: + +def find_possible_tp(model_config: AutoConfig) -> list[int]: """ Finds possible values for tp for the given model """ @@ -323,28 +339,38 @@ def find_possible_tp(model_config: AutoConfig) -> List[int]: num_attention_heads = model_config.num_attention_heads - factors = set(reduce( - list.__add__, - ([i, num_attention_heads // i] for i in range(1, int(num_attention_heads**0.5) + 1) if num_attention_heads % i == 0))) + factors = set( + reduce( + list.__add__, + ( + [i, num_attention_heads // i] + for i in range(1, int(num_attention_heads**0.5) + 1) + if num_attention_heads % i == 0 + ), + ) + ) factors = list(factors) factors.sort() return factors -def available_gpu_memory(memory: int, gpu_utilization: float=0.9) -> float: + +def available_gpu_memory(memory: int, gpu_utilization: float = 0.9) -> float: """ Returns the available GPU memory """ return memory * gpu_utilization -def gpus_required(tp: int=1, pp: int=1, dp: int=1) -> int: + +def gpus_required(tp: int = 1, pp: int = 1, dp: int = 1) -> int: """ Determines the number of GPUs required based on parallelism strategies """ return tp * pp * dp + def per_gpu_model_memory_required(model_info: ModelInfo, tp: int = 1, pp: int = 1) -> int: """ Calculates model memory requirement for each GPU @@ -353,14 +379,16 @@ def per_gpu_model_memory_required(model_info: ModelInfo, tp: int = 1, pp: int = model_memory = model_memory_req(model_info) return model_memory / (tp * pp) -def allocatable_kv_cache_memory(model_info: ModelInfo, - model_config: AutoConfig, - gpu_memory: int, - gpu_util: float = 0.9, - tp: int = 1, - pp: int = 1, - dp: int = 1, - ) -> float: + +def allocatable_kv_cache_memory( + model_info: ModelInfo, + model_config: AutoConfig, + gpu_memory: int, + gpu_util: float = 0.9, + tp: int = 1, + pp: int = 1, + dp: int = 1, +) -> float: gpu_count = tp * pp * dp available_memory = available_gpu_memory(gpu_memory, gpu_util) * gpu_count model_size = model_memory_req(model_info) * dp @@ -370,6 +398,7 @@ def allocatable_kv_cache_memory(model_info: ModelInfo, return available_memory - model_size + def is_moe(model_config: AutoConfig) -> bool: """ Returns true if model is MoE @@ -385,6 +414,7 @@ def is_moe(model_config: AutoConfig) -> bool: return True return False + def get_num_experts(model_config: AutoConfig) -> int | None: """ Returns the number of experts or None for non-MoE models @@ -396,16 +426,19 @@ def get_num_experts(model_config: AutoConfig) -> int | None: return model_config.num_experts return None + def get_ep_size(tp_size: int, dp_size: int) -> int: """ Returns EP size """ return tp_size * dp_size -def experts_per_ep_group(model_config: AutoConfig, - tp: int=1, - dp: int=1, - ) -> float: + +def experts_per_ep_group( + model_config: AutoConfig, + tp: int = 1, + dp: int = 1, +) -> float: """ Calculates the number of experts to handle on each GPU """ @@ -414,4 +447,4 @@ def experts_per_ep_group(model_config: AutoConfig, ep_size = get_ep_size(tp, dp) if num_experts is None: return 0 - return num_experts / ep_size \ No newline at end of file + return num_experts / ep_size diff --git a/llm_d_benchmark/config_explorer/db.py b/llm_d_benchmark/config_explorer/db.py new file mode 100644 index 00000000..4592499d --- /dev/null +++ b/llm_d_benchmark/config_explorer/db.py @@ -0,0 +1,20 @@ +""" +Mocks DB storing info about common accelerators used for LLM serving and inference +""" + +gpu_specs = { + # https://www.nvidia.com/content/dam/en-zz/Solutions/Data-Center/a100/pdf/nvidia-a100-datasheet-us-nvidia-1758950-r4-web.pdf + # https://medium.com/@bijit211987/top-nvidia-gpus-for-llm-inference-8a5316184a10 + # https://www.databasemart.com/blog/best-nvidia-gpus-for-llm-inference-2025?srsltid=AfmBOopcvcdN6yzBF24k7_DyRS_csYOmNyDLJK7zq9Rg89weW6AQAx5F + "NVIDIA-H100-80GB-HBM3": {"memory": 80}, + "NVIDIA-A100-40GB": {"memory": 40}, + "NVIDIA-A100-80GB": {"memory": 80}, + "NVIDIA-H100-80GB": {"memory": 80}, + "NVIDIA-L40-40GB": {"memory": 40}, + "NVIDIA-RTX-4090": {"memory": 24}, + "NVIDIA-RTX-5090": {"memory": 32}, + "NVIDIA-RTX-6000": {"memory": 48}, + "NVIDIA-A6000": {"memory": 48}, + "NVIDIA-A4000": {"memory": 16}, + "NVIDIA-T4": {"memory": 16}, +} diff --git a/config_explorer/pyproject.toml b/llm_d_benchmark/config_explorer/pyproject.toml similarity index 100% rename from config_explorer/pyproject.toml rename to llm_d_benchmark/config_explorer/pyproject.toml diff --git a/config_explorer/pytest.ini b/llm_d_benchmark/config_explorer/pytest.ini similarity index 55% rename from config_explorer/pytest.ini rename to llm_d_benchmark/config_explorer/pytest.ini index e434181c..7b113c05 100644 --- a/config_explorer/pytest.ini +++ b/llm_d_benchmark/config_explorer/pytest.ini @@ -1,3 +1,3 @@ # pytest.ini [pytest] -pythonpath = src . \ No newline at end of file +pythonpath = ../.. \ No newline at end of file diff --git a/config_explorer/requirements-streamlit.txt b/llm_d_benchmark/config_explorer/requirements-streamlit.txt similarity index 100% rename from config_explorer/requirements-streamlit.txt rename to llm_d_benchmark/config_explorer/requirements-streamlit.txt diff --git a/config_explorer/requirements.txt b/llm_d_benchmark/config_explorer/requirements.txt similarity index 100% rename from config_explorer/requirements.txt rename to llm_d_benchmark/config_explorer/requirements.txt diff --git a/config_explorer/tests/capacity_planner_test.py b/llm_d_benchmark/config_explorer/tests/capacity_planner_test.py similarity index 87% rename from config_explorer/tests/capacity_planner_test.py rename to llm_d_benchmark/config_explorer/tests/capacity_planner_test.py index b27a92c4..780360fc 100644 --- a/config_explorer/tests/capacity_planner_test.py +++ b/llm_d_benchmark/config_explorer/tests/capacity_planner_test.py @@ -2,14 +2,35 @@ Tests Capacity Planner functions """ +import math + import pytest -from src.config_explorer.capacity_planner import * + +from llm_d_benchmark.config_explorer.capacity_planner import ( + allocatable_kv_cache_memory, + experts_per_ep_group, + find_possible_tp, + get_model_config_from_hf, + get_model_info_from_hf, + get_num_experts, + get_text_config, + gpus_required, + is_moe, + kv_cache_req, + max_concurrent_requests, + model_memory_req, + model_total_params, + parameter_memory_req, + precision_to_byte, +) + # ---- Constants ---- precision_types = ["fp32", "fp16", "fp8", "int4"] small_model_id = "repo/small-model" qwen_model = "Qwen/Qwen3-0.6B" + def test_get_model_info_and_config_from_hf(): """ Tests that model info can be retrieved without error for open-sourced models @@ -52,6 +73,7 @@ def test_model_total_params(): # Num params from https://huggingface.co/Qwen/Qwen3-0.6B assert model_total_params(model_info) == 751632384 + def test_precision_to_byte(): """ Tests that precision data type is converted to byte accurately @@ -86,22 +108,23 @@ def test_precision_to_byte(): assert precision_to_byte("f64") == 8 assert precision_to_byte("ff8_e5m2") == 1 + def test_parameter_memory_req(): """ Tests parameter memory size is accurately calculated given precision """ - factor = 1024 ** 3 + factor = 1024**3 params = [10, 1000, 10000, 100000] precisions = ["FP32", "FP16", "FP8", "INT4"] prec_to_byte = [4, 2, 1, 0.5] for param in params: for j, precision in enumerate(precisions): - expected = param * prec_to_byte[j] / factor assert parameter_memory_req(param, precision) == expected + def test_model_memory_req(): """ Tests model memory can be correctly estimated @@ -115,6 +138,7 @@ def test_model_memory_req(): model_info = get_model_info_from_hf("facebook/opt-125m") model_memory_req(model_info) + def test_kv_cache_req(): """ Tests KV cache is estimated correctly @@ -172,16 +196,16 @@ def test_max_concurrent_req(): for dp in range(1, 16): avail_gpu_count = tp * pp * dp gpu_mem = 40 - actual_max_concurrent_req = max_concurrent_requests(model_info, - model_config, - max_model_len=10000, - gpu_memory=gpu_mem, - gpu_mem_util=1, - tp=tp, - pp=pp, - dp=dp, - ) - + actual_max_concurrent_req = max_concurrent_requests( + model_info, + model_config, + max_model_len=10000, + gpu_memory=gpu_mem, + gpu_mem_util=1, + tp=tp, + pp=pp, + dp=dp, + ) expected = math.floor((avail_gpu_count * gpu_mem - model_memory * dp) / per_req_kv_cache_req) if expected < 0: @@ -189,6 +213,7 @@ def test_max_concurrent_req(): assert actual_max_concurrent_req == expected + def test_find_possible_tp(): """ Tests the possible TP sizes are accurately calculated @@ -201,6 +226,7 @@ def test_find_possible_tp(): model_config = get_model_config_from_hf(deepseek) assert find_possible_tp(model_config) == [1, 2, 4, 8, 16, 32, 64, 128] + def test_gpus_required(): """ Tests GPU number required for parallelism is correctly calculated @@ -209,10 +235,10 @@ def test_gpus_required(): for tp in range(1, 16): for pp in range(1, 16): for dp in range(1, 16): - expected = tp * pp * dp assert expected == gpus_required(tp, pp, dp) + def test_allocatable_kv_cache_memory(): """ Tests allocatable kv cache memory is correctly calculated @@ -228,45 +254,32 @@ def test_allocatable_kv_cache_memory(): for tp in range(1, 16): for pp in range(1, 16): for dp in range(1, 16): - # Expected gpu_count = tp * pp * dp expected = gpu_count * gpu_memory - model_memory * dp - actual = allocatable_kv_cache_memory( - model_info, - model_config, - gpu_memory, - gpu_util, - tp, - pp, - dp - ) + actual = allocatable_kv_cache_memory(model_info, model_config, gpu_memory, gpu_util, tp, pp, dp) assert expected == actual + def test_is_moe(): """ Asserts that MOE models can be determined """ - moes = [ - "deepseek-ai/DeepSeek-R1", - "deepseek-ai/DeepSeek-V3.1" - ] + moes = ["deepseek-ai/DeepSeek-R1", "deepseek-ai/DeepSeek-V3.1"] - non_moes = [ - qwen_model, - "RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic" - ] + non_moes = [qwen_model, "RedHatAI/Llama-3.3-70B-Instruct-FP8-dynamic"] for model in moes: model_config = get_model_config_from_hf(model) - assert is_moe(model_config) == True + assert is_moe(model_config) for model in non_moes: model_config = get_model_config_from_hf(model) - assert is_moe(model_config) == False + assert not is_moe(model_config) + def test_get_num_experts(): """ @@ -277,7 +290,7 @@ def test_get_num_experts(): "deepseek-ai/DeepSeek-V3.1-Base": 256, "deepseek-ai/DeepSeek-V3.1": 256, "Qwen/Qwen3-235B-A22B-Thinking-2507": 128, - "Qwen/Qwen3-235B-A22B-FP8": 128 + "Qwen/Qwen3-235B-A22B-FP8": 128, } for model, expected_experts in model_to_experts.items(): @@ -285,6 +298,7 @@ def test_get_num_experts(): assert get_num_experts(model_config) == expected_experts + def test_experts_per_gpu(): """ Tests that experts per GPU is calculated correctly for MOE models @@ -295,7 +309,7 @@ def test_experts_per_gpu(): "deepseek-ai/DeepSeek-V3.1-Base", "deepseek-ai/DeepSeek-V3.1", "Qwen/Qwen3-235B-A22B-Thinking-2507", - "Qwen/Qwen3-235B-A22B-FP8" + "Qwen/Qwen3-235B-A22B-FP8", } for model in moe_models: diff --git a/config_explorer/util.py b/llm_d_benchmark/config_explorer/util.py similarity index 81% rename from config_explorer/util.py rename to llm_d_benchmark/config_explorer/util.py index 2a90ea50..595829a6 100644 --- a/config_explorer/util.py +++ b/llm_d_benchmark/config_explorer/util.py @@ -1,11 +1,13 @@ """ Streamlit frontend utilities """ + +from dataclasses import dataclass + import streamlit as st from huggingface_hub import ModelInfo from transformers import AutoConfig -from dataclasses import dataclass -from src.config_explorer.capacity_planner import * + # Session state variables pertaining to user selected values USER_SCENARIO_KEY = "scenario" @@ -24,18 +26,22 @@ SELECTED_DP_SIZE_KEY = "selected_dp_size" SELECTED_ENABLE_EP_KEY = "selected_enable_ep" + @dataclass class Scenario: """Scenario stores info about an user scenario in Streamlit""" - model_name: str = 'deepseek-ai/DeepSeek-V3.1' + + model_name: str = "deepseek-ai/DeepSeek-V3.1" model_info: ModelInfo | None = None - model_config: AutoConfig | None = None # Info about model - text_config: AutoConfig | None = None # Info about the model like max positional embeddings can be nested inside text_config for certain architectures like MistralConfig + model_config: AutoConfig | None = None # Info about model + text_config: AutoConfig | None = ( + None # Info about the model like max positional embeddings can be nested inside text_config for certain architectures like MistralConfig + ) max_model_len: int = 1 concurrency: int = 1 # GPU - gpu_name: str = 'NVIDIA-H100-80GB-HBM3' + gpu_name: str = "NVIDIA-H100-80GB-HBM3" gpu_count_avail: int = 1 gpu_mem_util: float = 0.9 @@ -47,20 +53,26 @@ class Scenario: def get_model_name(self) -> str: if not self.model_name: - self.model_name = 'deepseek-ai/DeepSeek-V3.1' + self.model_name = "deepseek-ai/DeepSeek-V3.1" return self.model_name def get_gpu_spec(self, gpu_specs_db: dict) -> dict: return gpu_specs_db[self.gpu_name] def get_gpu_memory(self, gpu_specs_db: dict) -> int: - return self.get_gpu_spec(gpu_specs_db)['memory'] + return self.get_gpu_spec(gpu_specs_db)["memory"] def can_show_mem_util_chart(self, min_gpu_req: int): - if self.model_name and self.model_info and self.model_config and \ - self.max_model_len and self.concurrency and \ - self.gpu_name and self.gpu_count_avail and \ - self.gpu_count_avail >= min_gpu_req: + if ( + self.model_name + and self.model_info + and self.model_config + and self.max_model_len + and self.concurrency + and self.gpu_name + and self.gpu_count_avail + and self.gpu_count_avail >= min_gpu_req + ): return True return False @@ -68,13 +80,13 @@ def reset(self) -> None: """ Resets inputs """ - self.model_name = 'deepseek-ai/DeepSeek-V3.1' + self.model_name = "deepseek-ai/DeepSeek-V3.1" self.model_info = None self.model_config = None self.max_model_len = 1 self.concurrency = 1 - self.gpu_name = 'NVIDIA-H100-80GB-HBM3' + self.gpu_name = "NVIDIA-H100-80GB-HBM3" self.gpu_count_avail = 1 self.gpu_mem_util = 0.9 @@ -83,6 +95,7 @@ def reset(self) -> None: self.dp_size = 1 self.enable_ep = False + def init_session_state(): """ Inits session state for data persistence @@ -91,12 +104,14 @@ def init_session_state(): if USER_SCENARIO_KEY not in st.session_state: st.session_state[USER_SCENARIO_KEY] = Scenario() + def update_scenario(session_state_key: str, scenario_attr: str): """ Update session state value and scenario """ st.session_state[USER_SCENARIO_KEY].__setattr__(scenario_attr, st.session_state[session_state_key]) + def on_update_parallelism(session_state_key: str, scenario_attr: str): """ Update session state values for parallelism and resets other parallelism calculation @@ -105,6 +120,7 @@ def on_update_parallelism(session_state_key: str, scenario_attr: str): scenario.__setattr__(scenario_attr, st.session_state[session_state_key]) scenario.concurrency = 1 + def on_update_gpu_count(): """ Reset concurrency to none @@ -115,6 +131,7 @@ def on_update_gpu_count(): scenario.tp_size = 1 scenario.dp_size = 1 + def on_update_gpu_per_node(): """ Reset concurrency to none @@ -123,6 +140,7 @@ def on_update_gpu_per_node(): scenario.gpu_per_node = st.session_state[SELECTED_GPU_PER_NODE_KEY] scenario.concurrency = 1 + def on_update_node_count(): """ Reset concurrency to none @@ -131,6 +149,7 @@ def on_update_node_count(): scenario.node_count = st.session_state[SELECTED_NODE_COUNT_KEY] scenario.concurrency = 1 + def on_update_model_name(): """ Reset model name @@ -142,6 +161,7 @@ def on_update_model_name(): scenario.model_name = st.session_state[SELECTED_MODEL_KEY] + def on_update_max_model_len(): """ Reset max model length @@ -150,8 +170,9 @@ def on_update_max_model_len(): scenario.max_model_len = st.session_state[SELECTED_MAX_MODEL_LEN_KEY] scenario.concurrency = 1 + def pretty_round(num): """ Pretty round to two digits """ - return round(num, 2) \ No newline at end of file + return round(num, 2) diff --git a/deploy/common/patch-service.yaml b/llm_d_benchmark/deploy/common/patch-service.yaml similarity index 100% rename from deploy/common/patch-service.yaml rename to llm_d_benchmark/deploy/common/patch-service.yaml diff --git a/deploy/common/patch-statefulset.yaml b/llm_d_benchmark/deploy/common/patch-statefulset.yaml similarity index 100% rename from deploy/common/patch-statefulset.yaml rename to llm_d_benchmark/deploy/common/patch-statefulset.yaml diff --git a/deploy/common/service.yaml b/llm_d_benchmark/deploy/common/service.yaml similarity index 100% rename from deploy/common/service.yaml rename to llm_d_benchmark/deploy/common/service.yaml diff --git a/deploy/common/statefulset.yaml b/llm_d_benchmark/deploy/common/statefulset.yaml similarity index 100% rename from deploy/common/statefulset.yaml rename to llm_d_benchmark/deploy/common/statefulset.yaml diff --git a/deploy/kustomization.yaml b/llm_d_benchmark/deploy/kustomization.yaml similarity index 100% rename from deploy/kustomization.yaml rename to llm_d_benchmark/deploy/kustomization.yaml diff --git a/deploy/openshift/patch-route.yaml b/llm_d_benchmark/deploy/openshift/patch-route.yaml similarity index 100% rename from deploy/openshift/patch-route.yaml rename to llm_d_benchmark/deploy/openshift/patch-route.yaml diff --git a/deploy/openshift/route.yaml b/llm_d_benchmark/deploy/openshift/route.yaml similarity index 100% rename from deploy/openshift/route.yaml rename to llm_d_benchmark/deploy/openshift/route.yaml diff --git a/deploy/rbac/exec-rbac-role.yaml b/llm_d_benchmark/deploy/rbac/exec-rbac-role.yaml similarity index 100% rename from deploy/rbac/exec-rbac-role.yaml rename to llm_d_benchmark/deploy/rbac/exec-rbac-role.yaml diff --git a/deploy/rbac/exec-rbac-rolebinding.yaml b/llm_d_benchmark/deploy/rbac/exec-rbac-rolebinding.yaml similarity index 100% rename from deploy/rbac/exec-rbac-rolebinding.yaml rename to llm_d_benchmark/deploy/rbac/exec-rbac-rolebinding.yaml diff --git a/deploy/rbac/patch-rbac-role.yaml b/llm_d_benchmark/deploy/rbac/patch-rbac-role.yaml similarity index 100% rename from deploy/rbac/patch-rbac-role.yaml rename to llm_d_benchmark/deploy/rbac/patch-rbac-role.yaml diff --git a/deploy/rbac/patch-rbac-rolebinding.yaml b/llm_d_benchmark/deploy/rbac/patch-rbac-rolebinding.yaml similarity index 100% rename from deploy/rbac/patch-rbac-rolebinding.yaml rename to llm_d_benchmark/deploy/rbac/patch-rbac-rolebinding.yaml diff --git a/docs/architecture.drawio b/llm_d_benchmark/docs/architecture.drawio similarity index 100% rename from docs/architecture.drawio rename to llm_d_benchmark/docs/architecture.drawio diff --git a/docs/benchmark_report.md b/llm_d_benchmark/docs/benchmark_report.md similarity index 100% rename from docs/benchmark_report.md rename to llm_d_benchmark/docs/benchmark_report.md diff --git a/docs/doe.md b/llm_d_benchmark/docs/doe.md similarity index 100% rename from docs/doe.md rename to llm_d_benchmark/docs/doe.md diff --git a/docs/faq.md b/llm_d_benchmark/docs/faq.md similarity index 100% rename from docs/faq.md rename to llm_d_benchmark/docs/faq.md diff --git a/docs/flexibility.md b/llm_d_benchmark/docs/flexibility.md similarity index 100% rename from docs/flexibility.md rename to llm_d_benchmark/docs/flexibility.md diff --git a/docs/images/architecture.drawio b/llm_d_benchmark/docs/images/architecture.drawio similarity index 100% rename from docs/images/architecture.drawio rename to llm_d_benchmark/docs/images/architecture.drawio diff --git a/docs/images/architecture.drawio.png b/llm_d_benchmark/docs/images/architecture.drawio.png similarity index 100% rename from docs/images/architecture.drawio.png rename to llm_d_benchmark/docs/images/architecture.drawio.png diff --git a/docs/images/scenarios_1_2_3_comparison.png b/llm_d_benchmark/docs/images/scenarios_1_2_3_comparison.png similarity index 100% rename from docs/images/scenarios_1_2_3_comparison.png rename to llm_d_benchmark/docs/images/scenarios_1_2_3_comparison.png diff --git a/docs/lifecycle.md b/llm_d_benchmark/docs/lifecycle.md similarity index 100% rename from docs/lifecycle.md rename to llm_d_benchmark/docs/lifecycle.md diff --git a/docs/observability.md b/llm_d_benchmark/docs/observability.md similarity index 100% rename from docs/observability.md rename to llm_d_benchmark/docs/observability.md diff --git a/docs/quickstart.md b/llm_d_benchmark/docs/quickstart.md similarity index 100% rename from docs/quickstart.md rename to llm_d_benchmark/docs/quickstart.md diff --git a/docs/reproducibility.md b/llm_d_benchmark/docs/reproducibility.md similarity index 100% rename from docs/reproducibility.md rename to llm_d_benchmark/docs/reproducibility.md diff --git a/docs/resource_requirements.md b/llm_d_benchmark/docs/resource_requirements.md similarity index 100% rename from docs/resource_requirements.md rename to llm_d_benchmark/docs/resource_requirements.md diff --git a/docs/run.md b/llm_d_benchmark/docs/run.md similarity index 100% rename from docs/run.md rename to llm_d_benchmark/docs/run.md diff --git a/docs/standup.md b/llm_d_benchmark/docs/standup.md similarity index 100% rename from docs/standup.md rename to llm_d_benchmark/docs/standup.md diff --git a/experiments/inference-scheduling.yaml b/llm_d_benchmark/experiments/inference-scheduling.yaml similarity index 100% rename from experiments/inference-scheduling.yaml rename to llm_d_benchmark/experiments/inference-scheduling.yaml diff --git a/experiments/pd-disaggregation.yaml b/llm_d_benchmark/experiments/pd-disaggregation.yaml similarity index 100% rename from experiments/pd-disaggregation.yaml rename to llm_d_benchmark/experiments/pd-disaggregation.yaml diff --git a/experiments/precise-prefix-cache-aware.yaml b/llm_d_benchmark/experiments/precise-prefix-cache-aware.yaml similarity index 100% rename from experiments/precise-prefix-cache-aware.yaml rename to llm_d_benchmark/experiments/precise-prefix-cache-aware.yaml diff --git a/run.sh b/llm_d_benchmark/run.sh similarity index 100% rename from run.sh rename to llm_d_benchmark/run.sh diff --git a/scenarios/cicd/kind_sim_fb.sh b/llm_d_benchmark/scenarios/cicd/kind_sim_fb.sh similarity index 100% rename from scenarios/cicd/kind_sim_fb.sh rename to llm_d_benchmark/scenarios/cicd/kind_sim_fb.sh diff --git a/scenarios/cicd/ocp_L40_fb.sh b/llm_d_benchmark/scenarios/cicd/ocp_L40_fb.sh similarity index 100% rename from scenarios/cicd/ocp_L40_fb.sh rename to llm_d_benchmark/scenarios/cicd/ocp_L40_fb.sh diff --git a/scenarios/examples/aiu.sh b/llm_d_benchmark/scenarios/examples/aiu.sh similarity index 100% rename from scenarios/examples/aiu.sh rename to llm_d_benchmark/scenarios/examples/aiu.sh diff --git a/scenarios/examples/gpu.sh b/llm_d_benchmark/scenarios/examples/gpu.sh similarity index 100% rename from scenarios/examples/gpu.sh rename to llm_d_benchmark/scenarios/examples/gpu.sh diff --git a/scenarios/guides/inference-scheduling.sh b/llm_d_benchmark/scenarios/guides/inference-scheduling.sh similarity index 100% rename from scenarios/guides/inference-scheduling.sh rename to llm_d_benchmark/scenarios/guides/inference-scheduling.sh diff --git a/scenarios/guides/pd-disaggregation.sh b/llm_d_benchmark/scenarios/guides/pd-disaggregation.sh similarity index 100% rename from scenarios/guides/pd-disaggregation.sh rename to llm_d_benchmark/scenarios/guides/pd-disaggregation.sh diff --git a/scenarios/guides/precise-prefix-cache-aware.sh b/llm_d_benchmark/scenarios/guides/precise-prefix-cache-aware.sh similarity index 100% rename from scenarios/guides/precise-prefix-cache-aware.sh rename to llm_d_benchmark/scenarios/guides/precise-prefix-cache-aware.sh diff --git a/scenarios/guides/sim.sh b/llm_d_benchmark/scenarios/guides/sim.sh similarity index 100% rename from scenarios/guides/sim.sh rename to llm_d_benchmark/scenarios/guides/sim.sh diff --git a/scenarios/guides/wide-ep-lws.sh b/llm_d_benchmark/scenarios/guides/wide-ep-lws.sh similarity index 100% rename from scenarios/guides/wide-ep-lws.sh rename to llm_d_benchmark/scenarios/guides/wide-ep-lws.sh diff --git a/scenarios/well-lit b/llm_d_benchmark/scenarios/well-lit similarity index 100% rename from scenarios/well-lit rename to llm_d_benchmark/scenarios/well-lit diff --git a/setup/e2e.sh b/llm_d_benchmark/setup/e2e.sh similarity index 99% rename from setup/e2e.sh rename to llm_d_benchmark/setup/e2e.sh index bee79ee3..08a7a279 100755 --- a/setup/e2e.sh +++ b/llm_d_benchmark/setup/e2e.sh @@ -209,7 +209,9 @@ sweeptmpdir=$(mktemp -d -t sweepXXX) generate_standup_parameter_scenarios $sweeptmpdir $LLMDBENCH_SCENARIO_FULL_PATH $LLMDBENCH_HARNESS_EXPERIMENT_TREATMENTS announce "ℹ️ A list of tretaments for standup paramaters was generated at \"${sweeptmpdir}\"" -sleep 5 +if [[ $LLMDBENCH_CONTROL_DRY_RUN -eq 0 ]]; then + sleep 5 +fi for scenario in $(ls $sweeptmpdir/setup/treatment_list/); do export LLMDBENCH_CLIOVERRIDE_DEPLOY_SCENARIO=$sweeptmpdir/setup/treatment_list/$scenario diff --git a/setup/env.sh b/llm_d_benchmark/setup/env.sh similarity index 99% rename from setup/env.sh rename to llm_d_benchmark/setup/env.sh index 9b3be8fa..a8bea4cb 100644 --- a/setup/env.sh +++ b/llm_d_benchmark/setup/env.sh @@ -378,7 +378,9 @@ if [[ ! -f $LLMDBENCH_CONTROL_WORK_DIR/environment/context.ctx ]]; then echo "WARNING: environment variable LLMDBENCH_CLUSTER_URL=$LLMDBENCH_CLUSTER_URL. Will attempt to use current context \"${current_context}\"." echo "" export LLMDBENCH_CONTROL_WARNING_DISPLAYED=1 - sleep 5 + if [[ $LLMDBENCH_CONTROL_DRY_RUN -eq 0 ]]; then + sleep 5 + fi fi export LLMDBENCH_CONTROL_REMOTE_KUBECONFIG_FILENAME=config else diff --git a/setup/functions.sh b/llm_d_benchmark/setup/functions.sh similarity index 98% rename from setup/functions.sh rename to llm_d_benchmark/setup/functions.sh index a823c57d..5d91a1d3 100755 --- a/setup/functions.sh +++ b/llm_d_benchmark/setup/functions.sh @@ -151,7 +151,9 @@ function llmdbench_execute_cmd { if [[ $ecode -ne 0 && ${attempts} -gt 1 ]] then counter="$(( ${counter} + 1 ))" - sleep ${delay} + if [[ $LLMDBENCH_CONTROL_DRY_RUN -eq 0 ]]; then + sleep ${delay} + fi else break fi @@ -466,6 +468,11 @@ function check_storage_class { fi fi + if [[ $LLMDBENCH_CONTROL_DRY_RUN -eq 1 ]]; then + announce "ℹ️ Dry-run mode enabled. Skipping storage class validation for \"$LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS\"." + return 0 + fi + local has_sc=$($LLMDBENCH_CONTROL_KCMD get storageclasses | grep $LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS || true) if [[ -z $has_sc ]]; then announce "❌ ERROR. Environment variable LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS=$LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS but could not find such storage class" @@ -778,15 +785,18 @@ export -f wait_for_download_job function run_step { local script_name=$1 + if [[ $script_name == "__init__" ]]; then + return 0 + fi local step_nr=$(echo $script_name | cut -d '_' -f 1) - local script_implementaton=LLMDBENCH_CONTROL_STEP_${step_nr}_IMPLEMENTATION - - if [[ -f $script_name.${!script_implementaton} ]]; then - local script_path=$script_name.${!script_implementaton} + local script_implementation=LLMDBENCH_CONTROL_STEP_${step_nr}_IMPLEMENTATION + local impl_type=${!script_implementation:-sh} + if [[ -f $script_name.${impl_type} ]]; then + local script_path=$script_name.${impl_type} else - local script_path=$(ls ${LLMDBENCH_STEPS_DIR}/${script_name}*.${!script_implementaton}) + local script_path=$(ls ${LLMDBENCH_STEPS_DIR}/${script_name}*.${impl_type}) fi if [ -f $script_path ]; then local step_id=$(basename "$script_path") @@ -796,9 +806,9 @@ function run_step { echo -e "[DRY RUN] $script_path\n" fi - if [[ ${!script_implementaton} == sh ]]; then + if [[ ${impl_type} == sh ]]; then source $script_path - elif [[ ${!script_implementaton} == py ]]; then + elif [[ ${impl_type} == py ]]; then python3 $script_path local ec=$? if [[ $ec -ne 0 ]]; then diff --git a/setup/install_deps.sh b/llm_d_benchmark/setup/install_deps.sh similarity index 80% rename from setup/install_deps.sh rename to llm_d_benchmark/setup/install_deps.sh index c65a37ff..4ae3dfe2 100755 --- a/setup/install_deps.sh +++ b/llm_d_benchmark/setup/install_deps.sh @@ -159,32 +159,4 @@ if ! command -v pip3 &> /dev/null; then echo "pip3 installed successfully." fi -python_deps="kubernetes pykube-ng kubernetes-asyncio GitPython requests PyYAML Jinja2 requests huggingface_hub==0.34.4 transformers==4.55.4" - -for dep in $python_deps; do - pkg_name=$(echo "${dep}" | cut -d= -f1) - if pip3 show "${pkg_name}" &>/dev/null; then - # check if a version was specified - if [[ "${dep}" == *"=="* ]]; then - required_version=$(echo "${dep}" | cut -d= -f3) - installed_version=$(pip3 show "${pkg_name}" | awk '/Version:/{print $2}') - if [[ "${installed_version}" == "${required_version}" ]]; then - echo "${pkg_name}==${installed_version} is already installed." >> ~/.llmdbench_dependencies_checked - continue - else - echo "${pkg_name} installed but version mismatch (${installed_version} != ${required_version}). Upgrading..." - fi - else - echo "${pkg_name} is already installed." >> ~/.llmdbench_dependencies_checked - continue - fi - fi - - echo "Installing ${dep}..." - if ! pip3 install "${dep}"; then - echo "ERROR: Failed to install Python package ${dep}!" - exit 1 - fi -done - popd &>/dev/null diff --git a/setup/preprocess/standalone-preprocess.py b/llm_d_benchmark/setup/preprocess/standalone-preprocess.py similarity index 93% rename from setup/preprocess/standalone-preprocess.py rename to llm_d_benchmark/setup/preprocess/standalone-preprocess.py index 566907a8..06dead4e 100755 --- a/setup/preprocess/standalone-preprocess.py +++ b/llm_d_benchmark/setup/preprocess/standalone-preprocess.py @@ -13,10 +13,9 @@ import psutil + # Configure logging -logging.basicConfig( - level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" -) +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) @@ -53,9 +52,7 @@ def vllm_health( elapsed = time.perf_counter() - start if elapsed > max_health_wait: # if vllm hasn't responded - logger.info( - "vLLM process is stuck for more than %.2f secs, aborting ...", elapsed - ) + logger.info("vLLM process is stuck for more than %.2f secs, aborting ...", elapsed) kill_process(proc) return @@ -183,9 +180,7 @@ def logging_config(path: str) -> None: if vllm_formatter is not None: format_str = vllm_formatter.get("format") if format_str is not None: - vllm_formatter["format"] = format_str.replace( - "%(asctime)s", "%(asctime)s.%(msecs)03d" - ) + vllm_formatter["format"] = format_str.replace("%(asctime)s", "%(asctime)s.%(msecs)03d") # change default config json_string = json.dumps(json_data, indent=4) @@ -209,9 +204,7 @@ def create_logging_config(path: str): process.join() if process.exitcode is not None and process.exitcode != 0: - raise RuntimeError( - f"Custom logging config process exited with code '{process.exitcode}'" - ) + raise RuntimeError(f"Custom logging config process exited with code '{process.exitcode}'") def preprocess_run() -> str: diff --git a/setup/preprocess/standalone-preprocess.sh b/llm_d_benchmark/setup/preprocess/standalone-preprocess.sh similarity index 100% rename from setup/preprocess/standalone-preprocess.sh rename to llm_d_benchmark/setup/preprocess/standalone-preprocess.sh diff --git a/setup/presets/gaie/default.yaml b/llm_d_benchmark/setup/presets/gaie/default.yaml similarity index 100% rename from setup/presets/gaie/default.yaml rename to llm_d_benchmark/setup/presets/gaie/default.yaml diff --git a/setup/presets/gaie/inf-sche-kv.yaml b/llm_d_benchmark/setup/presets/gaie/inf-sche-kv.yaml similarity index 100% rename from setup/presets/gaie/inf-sche-kv.yaml rename to llm_d_benchmark/setup/presets/gaie/inf-sche-kv.yaml diff --git a/setup/presets/gaie/inf-sche-none.yaml b/llm_d_benchmark/setup/presets/gaie/inf-sche-none.yaml similarity index 100% rename from setup/presets/gaie/inf-sche-none.yaml rename to llm_d_benchmark/setup/presets/gaie/inf-sche-none.yaml diff --git a/setup/presets/gaie/inf-sche-prefix-kv-queue.yaml b/llm_d_benchmark/setup/presets/gaie/inf-sche-prefix-kv-queue.yaml similarity index 100% rename from setup/presets/gaie/inf-sche-prefix-kv-queue.yaml rename to llm_d_benchmark/setup/presets/gaie/inf-sche-prefix-kv-queue.yaml diff --git a/setup/presets/gaie/inf-sche-prefix-kv.yaml b/llm_d_benchmark/setup/presets/gaie/inf-sche-prefix-kv.yaml similarity index 100% rename from setup/presets/gaie/inf-sche-prefix-kv.yaml rename to llm_d_benchmark/setup/presets/gaie/inf-sche-prefix-kv.yaml diff --git a/setup/presets/gaie/inf-sche-prefix.yaml b/llm_d_benchmark/setup/presets/gaie/inf-sche-prefix.yaml similarity index 100% rename from setup/presets/gaie/inf-sche-prefix.yaml rename to llm_d_benchmark/setup/presets/gaie/inf-sche-prefix.yaml diff --git a/setup/presets/gaie/inf-sche-queue.yaml b/llm_d_benchmark/setup/presets/gaie/inf-sche-queue.yaml similarity index 100% rename from setup/presets/gaie/inf-sche-queue.yaml rename to llm_d_benchmark/setup/presets/gaie/inf-sche-queue.yaml diff --git a/setup/presets/gaie/pd-config.yaml b/llm_d_benchmark/setup/presets/gaie/pd-config.yaml similarity index 100% rename from setup/presets/gaie/pd-config.yaml rename to llm_d_benchmark/setup/presets/gaie/pd-config.yaml diff --git a/setup/presets/gaie/prefix-cache-estimate-config.yaml b/llm_d_benchmark/setup/presets/gaie/prefix-cache-estimate-config.yaml similarity index 100% rename from setup/presets/gaie/prefix-cache-estimate-config.yaml rename to llm_d_benchmark/setup/presets/gaie/prefix-cache-estimate-config.yaml diff --git a/setup/presets/gaie/prefix-cache-tracking-config.yaml b/llm_d_benchmark/setup/presets/gaie/prefix-cache-tracking-config.yaml similarity index 100% rename from setup/presets/gaie/prefix-cache-tracking-config.yaml rename to llm_d_benchmark/setup/presets/gaie/prefix-cache-tracking-config.yaml diff --git a/setup/run.sh b/llm_d_benchmark/setup/run.sh similarity index 100% rename from setup/run.sh rename to llm_d_benchmark/setup/run.sh diff --git a/setup/standup.sh b/llm_d_benchmark/setup/standup.sh similarity index 98% rename from setup/standup.sh rename to llm_d_benchmark/setup/standup.sh index e894f9dc..d9077f10 100755 --- a/setup/standup.sh +++ b/llm_d_benchmark/setup/standup.sh @@ -153,7 +153,10 @@ if [[ $LLMDBENCH_STEP_LIST == $(find $LLMDBENCH_STEPS_DIR -name "*.sh" -o -name fi extract_environment -sleep 5 +# if LLMDBENCH_CONTROL_DRY_RUN, not sleep +if [[ $LLMDBENCH_CONTROL_DRY_RUN -eq 0 ]]; then + sleep 5 +fi for step in ${LLMDBENCH_STEP_LIST//,/ }; do if [[ ${#step} -lt 2 ]] diff --git a/setup/steps/00_ensure_llm-d-infra.py b/llm_d_benchmark/setup/steps/00_ensure_llm-d-infra.py similarity index 52% rename from setup/steps/00_ensure_llm-d-infra.py rename to llm_d_benchmark/setup/steps/00_ensure_llm-d-infra.py index 4676528c..0189ce25 100755 --- a/setup/steps/00_ensure_llm-d-infra.py +++ b/llm_d_benchmark/setup/steps/00_ensure_llm-d-infra.py @@ -1,67 +1,44 @@ import os import re import sys -from pathlib import Path from dataclasses import dataclass -from typing import List, Tuple - -current_file = Path(__file__).resolve() -workspace_root = current_file.parents[2] -setup_dir = current_file.parents[1] -config_explorer_src = workspace_root / "config_explorer" / "src" -sys.path.insert(0, str(config_explorer_src)) -sys.path.insert(1, str(setup_dir)) -sys.path.insert(2, str(workspace_root)) -if "PYTHONPATH" in os.environ: - os.environ["PYTHONPATH"] = f"{config_explorer_src}:{setup_dir}:{workspace_root}:{os.environ['PYTHONPATH']}" -else: - os.environ["PYTHONPATH"] = f"{config_explorer_src}:{setup_dir}:{workspace_root}" - -print(f"Workspace root directory added to PYTHONPATH: {os.environ['PYTHONPATH']}") - -try: - from transformers import AutoConfig - from huggingface_hub import ModelInfo - from huggingface_hub.errors import GatedRepoError, HfHubHTTPError -except ModuleNotFoundError as e: - print(f"❌ ERROR: Required dependency not installed: {e}") - print("Please install the required dependencies:") - print(f" pip install -r {workspace_root / 'config_explorer' / 'requirements.txt'}") - sys.exit(1) - -# Import config_explorer module -try: - from config_explorer.capacity_planner import KVCacheDetail, gpus_required, get_model_info_from_hf, get_model_config_from_hf, get_text_config, find_possible_tp, max_context_len, available_gpu_memory, model_total_params, model_memory_req, allocatable_kv_cache_memory, kv_cache_req, max_concurrent_requests -except ModuleNotFoundError as e: - print(f"❌ ERROR: Failed to import config_explorer module: {e}") - print(f"\nTry: pip install -r {workspace_root / 'config_explorer' / 'requirements.txt'}") - sys.exit(1) -except Exception as e: - print(f"❌ ERROR: An unexpected error occurred while importing config_explorer: {e}") - import traceback - traceback.print_exc() - sys.exit(1) - - - -# ---------------- Import local packages ---------------- -try: - from functions import announce, environment_variable_to_dict, get_accelerator_nr, is_standalone_deployment, get_accelerator_type -except ImportError as e: - # Fallback for when dependencies are not available - print(f"❌ ERROR: Could not import required modules: {e}") - print("This script requires the llm-d environment to be properly set up.") - print("Please run: ./setup/install_deps.sh") - sys.exit(1) + +from huggingface_hub import ModelInfo +from huggingface_hub.errors import GatedRepoError, HfHubHTTPError +from transformers import AutoConfig + +from llm_d_benchmark.config_explorer.capacity_planner import ( + KVCacheDetail, + allocatable_kv_cache_memory, + available_gpu_memory, + find_possible_tp, + get_model_config_from_hf, + get_model_info_from_hf, + get_text_config, + gpus_required, + max_concurrent_requests, + max_context_len, + model_memory_req, + model_total_params, +) +from llm_d_benchmark.setup.utils.functions import ( + announce, + environment_variable_to_dict, + get_accelerator_nr, + get_accelerator_type, + is_standalone_deployment, +) + # ---------------- Data structure for validating vllm args ---------------- COMMON = "COMMON" PREFILL = "PREFILL" -DECODE= "DECODE" +DECODE = "DECODE" + @dataclass class ValidationParam: - models: List[str] + models: list[str] hf_token: str replicas: int gpu_type: str @@ -73,8 +50,10 @@ class ValidationParam: gpu_memory_util: float max_model_len: int + # ---------------- Helpers ---------------- + def announce_failed(msg: str, ignore_if_failed: bool): """ Prints out failure message and exits execution if ignore_if_failed==False, otherwise continue @@ -84,6 +63,7 @@ def announce_failed(msg: str, ignore_if_failed: bool): if not ignore_if_failed: sys.exit(1) + def convert_accelerator_memory(gpu_name: str, accelerator_memory_param: str) -> int: """ Try to guess the accelerator memory from its name @@ -98,7 +78,9 @@ def convert_accelerator_memory(gpu_name: str, accelerator_memory_param: str) -> result = 0 if gpu_name == "auto": - announce(f"⚠️ Accelerator (LLMDBENCH_VLLM_COMMON_AFFINITY) type is set to be automatically detected, but requires connecting to kube client. The affinity check is invoked at a later step. To exercise the capacity planner, set LLMDBENCH_COMMON_ACCELERATOR_MEMORY. Otherwise, capacity planner will use 0 as the GPU memory.") + announce( + "⚠️ Accelerator (LLMDBENCH_VLLM_COMMON_AFFINITY) type is set to be automatically detected, but requires connecting to kube client. The affinity check is invoked at a later step. To exercise the capacity planner, set LLMDBENCH_COMMON_ACCELERATOR_MEMORY. Otherwise, capacity planner will use 0 as the GPU memory." + ) match = re.search(r"(\d+)\s*GB", gpu_name, re.IGNORECASE) if match: @@ -110,10 +92,13 @@ def convert_accelerator_memory(gpu_name: str, accelerator_memory_param: str) -> result = int(match2.group(1)) if result > 0: - announce(f"Determined GPU memory={result} from the accelerator's name: {gpu_name}. It may be incorrect, please set LLMDBENCH_VLLM_COMMON_ACCELERATOR_MEMORY for accuracy.") + announce( + f"Determined GPU memory={result} from the accelerator's name: {gpu_name}. It may be incorrect, please set LLMDBENCH_VLLM_COMMON_ACCELERATOR_MEMORY for accuracy." + ) return result + def get_model_info(model_name: str, hf_token: str, ignore_if_failed: bool) -> ModelInfo | None: """ Obtains model info from HF @@ -123,15 +108,23 @@ def get_model_info(model_name: str, hf_token: str, ignore_if_failed: bool) -> Mo return get_model_info_from_hf(model_name, hf_token) except GatedRepoError: - announce_failed("Model is gated and the token provided via LLMDBENCH_HF_TOKEN does not, work. Please double check.", ignore_if_failed) + announce_failed( + "Model is gated and the token provided via LLMDBENCH_HF_TOKEN does not, work. Please double check.", + ignore_if_failed, + ) except HfHubHTTPError as hf_exp: - announce_failed(f"Error reaching Hugging Face API: Is LLMDBENCH_HF_TOKEN correctly set? {hf_exp}", ignore_if_failed) + announce_failed( + f"Error reaching Hugging Face API: Is LLMDBENCH_HF_TOKEN correctly set? {hf_exp}", ignore_if_failed + ) except Exception as e: announce_failed(f"Cannot retrieve ModelInfo: {e}", ignore_if_failed) return None -def get_model_config_and_text_config(model_name: str, hf_token: str, ignore_if_failed: bool) -> Tuple[AutoConfig | None, AutoConfig | None]: + +def get_model_config_and_text_config( + model_name: str, hf_token: str, ignore_if_failed: bool +) -> tuple[AutoConfig | None, AutoConfig | None]: """ Obtains model config and text config from HF """ @@ -141,15 +134,21 @@ def get_model_config_and_text_config(model_name: str, hf_token: str, ignore_if_f return config, get_text_config(config) except GatedRepoError: - announce_failed("Model is gated and the token provided via LLMDBENCH_HF_TOKEN does not work. Please double check.", ignore_if_failed) + announce_failed( + "Model is gated and the token provided via LLMDBENCH_HF_TOKEN does not work. Please double check.", + ignore_if_failed, + ) except HfHubHTTPError as hf_exp: - announce_failed(f"Error reaching Hugging Face API. Is LLMDBENCH_HF_TOKEN correctly set? {hf_exp}", ignore_if_failed) + announce_failed( + f"Error reaching Hugging Face API. Is LLMDBENCH_HF_TOKEN correctly set? {hf_exp}", ignore_if_failed + ) except Exception as e: announce_failed(f"Cannot retrieve model config: {e}", ignore_if_failed) return None, None -def validate_vllm_params(param: ValidationParam, ignore_if_failed: bool, type: str=COMMON): + +def validate_vllm_params(param: ValidationParam, ignore_if_failed: bool, type: str = COMMON): """ Given a list of vLLM parameters, validate using capacity planner """ @@ -170,7 +169,10 @@ def validate_vllm_params(param: ValidationParam, ignore_if_failed: bool, type: s # Sanity check on user inputs if gpu_memory is None: - announce_failed("Cannot determine accelerator memory. Please set LLMDBENCH_VLLM_COMMON_ACCELERATOR_MEMORY to enable Capacity Planner.", ignore_if_failed) + announce_failed( + "Cannot determine accelerator memory. Please set LLMDBENCH_VLLM_COMMON_ACCELERATOR_MEMORY to enable Capacity Planner.", + ignore_if_failed, + ) per_replica_requirement = gpus_required(tp=tp, dp=dp) if replicas == 0: @@ -178,10 +180,15 @@ def validate_vllm_params(param: ValidationParam, ignore_if_failed: bool, type: s total_gpu_requirement = per_replica_requirement if total_gpu_requirement > user_requested_gpu_count: - announce_failed(f"Accelerator requested is {user_requested_gpu_count} but it is not enough to stand up the model. Set LLMDBENCH_VLLM_{env_var_prefix}_ACCELERATOR_NR to TP x DP = {tp} x {dp} = {total_gpu_requirement}", ignore_if_failed) + announce_failed( + f"Accelerator requested is {user_requested_gpu_count} but it is not enough to stand up the model. Set LLMDBENCH_VLLM_{env_var_prefix}_ACCELERATOR_NR to TP x DP = {tp} x {dp} = {total_gpu_requirement}", + ignore_if_failed, + ) if total_gpu_requirement < user_requested_gpu_count: - announce(f"⚠️ For each replica, model requires {total_gpu_requirement}, but you requested {user_requested_gpu_count} for the deployment. Note that some GPUs will be idle.") + announce( + f"⚠️ For each replica, model requires {total_gpu_requirement}, but you requested {user_requested_gpu_count} for the deployment. Note that some GPUs will be idle." + ) # Use capacity planner for further validation for model in models_list: @@ -193,10 +200,16 @@ def validate_vllm_params(param: ValidationParam, ignore_if_failed: bool, type: s try: valid_tp_values = find_possible_tp(text_config) if tp not in valid_tp_values: - announce_failed(f"TP={tp} is invalid. Please select from these options ({valid_tp_values}) for {model}.", ignore_if_failed) - except AttributeError: + announce_failed( + f"TP={tp} is invalid. Please select from these options ({valid_tp_values}) for {model}.", + ignore_if_failed, + ) + except AttributeError as e: # Error: config['num_attention_heads'] not in config - announce_failed(f"Cannot obtain data on the number of attention heads, cannot find valid tp values: {e}", ignore_if_failed) + announce_failed( + f"Cannot obtain data on the number of attention heads, cannot find valid tp values: {e}", + ignore_if_failed, + ) # Check if model context length is valid valid_max_context_len = 0 @@ -207,15 +220,22 @@ def validate_vllm_params(param: ValidationParam, ignore_if_failed: bool, type: s announce_failed(f"Cannot obtain data on the max context length for model: {e}", ignore_if_failed) if max_model_len > valid_max_context_len: - announce_failed(f"Max model length = {max_model_len} exceeds the acceptable for {model}. Set LLMDBENCH_VLLM_COMMON_MAX_MODEL_LEN to a value below or equal to {valid_max_context_len}", ignore_if_failed) + announce_failed( + f"Max model length = {max_model_len} exceeds the acceptable for {model}. Set LLMDBENCH_VLLM_COMMON_MAX_MODEL_LEN to a value below or equal to {valid_max_context_len}", + ignore_if_failed, + ) else: - announce_failed(f"Model config on parameter shape not available.", ignore_if_failed) + announce_failed("Model config on parameter shape not available.", ignore_if_failed) # Display memory info announce("👉 Collecting GPU information....") avail_gpu_memory = available_gpu_memory(gpu_memory, gpu_memory_util) - announce(f"ℹ️ {gpu_memory} GB of memory per GPU, with {gpu_memory} GB x {gpu_memory_util} (gpu_memory_utilization) = {avail_gpu_memory} GB available to use.") - announce(f"ℹ️ Each model replica requires {per_replica_requirement} GPUs, total available GPU memory = {avail_gpu_memory * per_replica_requirement} GB.") + announce( + f"ℹ️ {gpu_memory} GB of memory per GPU, with {gpu_memory} GB x {gpu_memory_util} (gpu_memory_utilization) = {avail_gpu_memory} GB available to use." + ) + announce( + f"ℹ️ Each model replica requires {per_replica_requirement} GPUs, total available GPU memory = {avail_gpu_memory * per_replica_requirement} GB." + ) # # Calculate model memory requirement announce("👉 Collecting model information....") @@ -230,33 +250,51 @@ def validate_vllm_params(param: ValidationParam, ignore_if_failed: bool, type: s # Estimate KV cache memory and max number of requests that can be served in worst case scenario announce("👉 Estimating available KV cache....") available_kv_cache = allocatable_kv_cache_memory( - model_info, model_config, - gpu_memory, gpu_memory_util, - tp=tp, dp=dp, + model_info, + model_config, + gpu_memory, + gpu_memory_util, + tp=tp, + dp=dp, ) if available_kv_cache < 0: - announce_failed(f"There is not enough GPU memory to stand up model. Exceeds by {abs(available_kv_cache)} GB.", ignore_if_failed) + announce_failed( + f"There is not enough GPU memory to stand up model. Exceeds by {abs(available_kv_cache)} GB.", + ignore_if_failed, + ) announce(f"ℹ️ Allocatable memory for KV cache {available_kv_cache} GB") kv_details = KVCacheDetail(model_info, model_config, max_model_len, batch_size=1) - announce(f"ℹ️ KV cache memory for a request taking --max-model-len={max_model_len} requires {kv_details.per_request_kv_cache_gb} GB of memory") + announce( + f"ℹ️ KV cache memory for a request taking --max-model-len={max_model_len} requires {kv_details.per_request_kv_cache_gb} GB of memory" + ) total_concurrent_reqs = max_concurrent_requests( - model_info, model_config, max_model_len, - gpu_memory, gpu_memory_util, - tp=tp, dp=dp, + model_info, + model_config, + max_model_len, + gpu_memory, + gpu_memory_util, + tp=tp, + dp=dp, + ) + announce( + f"ℹ️ The vLLM server can process up to {total_concurrent_reqs} number of requests at the same time, assuming the worst case scenario that each request takes --max-model-len" ) - announce(f"ℹ️ The vLLM server can process up to {total_concurrent_reqs} number of requests at the same time, assuming the worst case scenario that each request takes --max-model-len") except AttributeError as e: # Model might not have safetensors data on parameters - announce_failed(f"Does not have enough information about model to estimate model memory or KV cache: {e}", ignore_if_failed) + announce_failed( + f"Does not have enough information about model to estimate model memory or KV cache: {e}", + ignore_if_failed, + ) else: - announce_failed(f"Model info on model's architecture not available.", ignore_if_failed) + announce_failed("Model info on model's architecture not available.", ignore_if_failed) -def get_validation_param(ev: dict, type: str=COMMON) -> ValidationParam: + +def get_validation_param(ev: dict, type: str = COMMON) -> ValidationParam: """ Returns validation param from type: one of prefill, decode, or None (default=common) """ @@ -266,35 +304,36 @@ def get_validation_param(ev: dict, type: str=COMMON) -> ValidationParam: prefix = f"vllm_modelservice_{type}" prefix = prefix.lower() - models_list = ev['deploy_model_list'] + models_list = ev["deploy_model_list"] models_list = [m.strip() for m in models_list.split(",")] - replicas = ev[f'{prefix}_replicas'] or 0 + replicas = ev[f"{prefix}_replicas"] or 0 replicas = int(replicas) gpu_type = get_accelerator_type(ev) - tp_size = int(ev[f'{prefix}_tensor_parallelism']) - dp_size = int(ev[f'{prefix}_data_parallelism']) - user_accelerator_nr = ev[f'{prefix}_accelerator_nr'] + tp_size = int(ev[f"{prefix}_tensor_parallelism"]) + dp_size = int(ev[f"{prefix}_data_parallelism"]) + user_accelerator_nr = ev[f"{prefix}_accelerator_nr"] - hf_token = ev['hf_token'] + hf_token = ev["hf_token"] if hf_token == "": hf_token = None validation_param = ValidationParam( - models = models_list, - hf_token = hf_token, - replicas = replicas, - gpu_type = gpu_type, - gpu_memory = convert_accelerator_memory(gpu_type, ev['vllm_common_accelerator_memory']), - tp = tp_size, - dp = dp_size, - accelerator_nr = user_accelerator_nr, - requested_accelerator_nr = get_accelerator_nr(user_accelerator_nr, tp_size, dp_size), - gpu_memory_util = float(ev[f'{prefix}_accelerator_mem_util']), - max_model_len = int(ev['vllm_common_max_model_len']), + models=models_list, + hf_token=hf_token, + replicas=replicas, + gpu_type=gpu_type, + gpu_memory=convert_accelerator_memory(gpu_type, ev["vllm_common_accelerator_memory"]), + tp=tp_size, + dp=dp_size, + accelerator_nr=user_accelerator_nr, + requested_accelerator_nr=get_accelerator_nr(user_accelerator_nr, tp_size, dp_size), + gpu_memory_util=float(ev[f"{prefix}_accelerator_mem_util"]), + max_model_len=int(ev["vllm_common_max_model_len"]), ) return validation_param + def validate_standalone_vllm_params(ev: dict, ignore_if_failed: bool): """ Validates vllm standalone configuration @@ -316,6 +355,7 @@ def validate_modelservice_vllm_params(ev: dict, ignore_if_failed: bool): announce(f"Validating decode vLLM arguments for {decode_params.models} ...") validate_vllm_params(decode_params, ignore_if_failed, type=DECODE) + def main(): """Main function following the pattern from other Python steps""" @@ -329,7 +369,7 @@ def main(): announce("DRY RUN enabled. No actual changes will be made.") # Capacity planning - ignore_failed_validation = ev['ignore_failed_validation'] + ignore_failed_validation = ev["ignore_failed_validation"] msg = "Validating vLLM configuration against Capacity Planner... " if ignore_failed_validation: msg += "deployment will continue even if validation failed." @@ -344,5 +384,6 @@ def main(): announce("Deployment method is modelservice, checking for prefill and decode deployments") validate_modelservice_vllm_params(ev, ignore_failed_validation) + if __name__ == "__main__": sys.exit(main()) diff --git a/setup/steps/00_ensure_llm-d-infra.sh b/llm_d_benchmark/setup/steps/00_ensure_llm-d-infra.sh similarity index 100% rename from setup/steps/00_ensure_llm-d-infra.sh rename to llm_d_benchmark/setup/steps/00_ensure_llm-d-infra.sh diff --git a/setup/steps/01_ensure_local_conda.py b/llm_d_benchmark/setup/steps/01_ensure_local_conda.py similarity index 73% rename from setup/steps/01_ensure_local_conda.py rename to llm_d_benchmark/setup/steps/01_ensure_local_conda.py index e74c4691..5a0f16b2 100644 --- a/setup/steps/01_ensure_local_conda.py +++ b/llm_d_benchmark/setup/steps/01_ensure_local_conda.py @@ -1,50 +1,37 @@ +import json import os -import sys import platform -import subprocess -import json import shutil +import subprocess +import sys from pathlib import Path -# Add project root to path for imports -current_file = Path(__file__).resolve() -project_root = current_file.parents[1] -sys.path.insert(0, str(project_root)) +import requests -try: - from functions import announce, environment_variable_to_dict - import requests -except ImportError as e: - # Fallback for when dependencies are not available - print(f"Warning: Could not import required modules: {e}") - print("This script requires the llm-d environment to be properly set up.") - print("Please run: ./setup/install_deps.sh") - print("And ensure requests is installed: pip install requests") - sys.exit(1) +from llm_d_benchmark.setup.utils.functions import announce, environment_variable_to_dict def get_platform_info(): """Get platform information using native Python instead of shell commands""" system = platform.system().lower() return { - 'system': system, - 'machine': platform.machine(), - 'is_mac': system == 'darwin', - 'is_linux': system == 'linux' + "system": system, + "machine": platform.machine(), + "is_mac": system == "darwin", + "is_linux": system == "linux", } def is_conda_available(): """Check if conda is available using native Python instead of shell command""" - return shutil.which('conda') is not None + return shutil.which("conda") is not None def get_conda_info(): """Get conda information using JSON output instead of shell parsing""" try: - #FIXME (USE llmdbench_execute_cmd) - result = subprocess.run(['conda', 'info', '--json'], - capture_output=True, text=True, check=True) + # FIXME (USE llmdbench_execute_cmd) + result = subprocess.run(["conda", "info", "--json"], capture_output=True, text=True, check=True) return json.loads(result.stdout) except (subprocess.CalledProcessError, json.JSONDecodeError, FileNotFoundError): return None @@ -53,9 +40,8 @@ def get_conda_info(): def check_conda_environment(env_name: str): """Check if conda environment exists using conda env list""" try: - #FIXME (USE llmdbench_execute_cmd) - result = subprocess.run(['conda', 'env', 'list'], - capture_output=True, text=True, check=True) + # FIXME (USE llmdbench_execute_cmd) + result = subprocess.run(["conda", "env", "list"], capture_output=True, text=True, check=True) return env_name in result.stdout except (subprocess.CalledProcessError, FileNotFoundError): return False @@ -72,15 +58,15 @@ def install_miniforge_macos(dry_run: bool, verbose: bool): return 0, anaconda_path, conda_sh # Check if brew is available - if not shutil.which('brew'): + if not shutil.which("brew"): raise EnvironmentError("Homebrew not found. Please install Homebrew first.") # Install miniforge using brew - cmd = ['brew', 'install', '--cask', 'miniforge'] + cmd = ["brew", "install", "--cask", "miniforge"] if verbose: announce(f"---> executing: {' '.join(cmd)}") - #FIXME (USE llmdbench_execute_cmd) + # FIXME (USE llmdbench_execute_cmd) result = subprocess.run(cmd, capture_output=not verbose, text=True) if result.returncode != 0: @@ -119,12 +105,12 @@ def install_miniforge_linux(dry_run: bool, verbose: bool): announce("---> running miniforge installer") # Run installer - #FIXME (USE llmdbench_execute_cmd) + # FIXME (USE llmdbench_execute_cmd) process = subprocess.Popen( - ['bash', '-s', '--', '-b', '-p', '/opt/miniconda'], + ["bash", "-s", "--", "-b", "-p", "/opt/miniconda"], stdin=subprocess.PIPE, stdout=subprocess.PIPE if not verbose else None, - stderr=subprocess.PIPE if not verbose else None + stderr=subprocess.PIPE if not verbose else None, ) stdout, stderr = process.communicate(input=response.content) @@ -159,7 +145,7 @@ def update_shell_rc_file(anaconda_path: str, shell_name: str, dry_run: bool): return True # Add anaconda path to RC file - with open(rc_file, 'a') as f: + with open(rc_file, "a") as f: f.write(f"\n{anaconda_path}\n") announce(f"✅ Anaconda path added to {rc_file}") @@ -179,7 +165,6 @@ def source_conda_script(conda_sh: Path, dry_run: bool, verbose: bool): if not conda_sh.exists(): raise FileNotFoundError(f"Could not find conda.sh at {conda_sh}") - announce(f"⏭️ running {conda_sh}") # Note: sourcing in subprocess doesn't affect parent shell @@ -188,9 +173,8 @@ def source_conda_script(conda_sh: Path, dry_run: bool, verbose: bool): if verbose: announce(f"---> executing: {cmd}") - #FIXME (USE llmdbench_execute_cmd) - result = subprocess.run(['bash', '-c', cmd], - capture_output=not verbose, text=True) + # FIXME (USE llmdbench_execute_cmd) + result = subprocess.run(["bash", "-c", cmd], capture_output=not verbose, text=True) if result.returncode != 0: raise RuntimeError(f"Failed to source conda.sh: {result.stderr if not verbose else ''}") @@ -201,52 +185,52 @@ def source_conda_script(conda_sh: Path, dry_run: bool, verbose: bool): def create_conda_environment(env_name: str, dry_run: bool, verbose: bool): """Create and configure conda environment""" if check_conda_environment(env_name): - announce(f"⏭️ Conda environment \"{env_name}\" already created, skipping installation") + announce(f'⏭️ Conda environment "{env_name}" already created, skipping installation') return 0 - announce(f"📜 Configuring conda environment \"{env_name}\"...") + announce(f'📜 Configuring conda environment "{env_name}"...') if dry_run: announce(f"---> would create conda environment: {env_name}") announce(f"---> would activate conda environment: {env_name}") - announce(f"---> would install requirements") + announce("---> would install requirements") return 0 try: # Create environment - cmd = ['conda', 'create', '--name', env_name, '-y'] + cmd = ["conda", "create", "--name", env_name, "-y"] if verbose: announce(f"---> executing: {' '.join(cmd)}") - #FIXME (USE llmdbench_execute_cmd) + # FIXME (USE llmdbench_execute_cmd) result = subprocess.run(cmd, capture_output=not verbose, text=True) if result.returncode != 0: raise RuntimeError(f"Failed to create conda environment: {result.stderr if not verbose else ''}") # Activate environment - cmd = ['conda', 'activate', env_name] + cmd = ["conda", "activate", env_name] if verbose: announce(f"---> executing: {' '.join(cmd)}") - #FIXME (USE llmdbench_execute_cmd) + # FIXME (USE llmdbench_execute_cmd) result = subprocess.run(cmd, capture_output=not verbose, text=True) if result.returncode != 0: announce(f"Warning: conda activate returned {result.returncode} (this is often normal)") # Install requirements if available - requirements_file = Path(os.getenv('LLMDBENCH_MAIN_DIR', '.')) / 'build' / 'requirements.txt' + requirements_file = Path(os.getenv("LLMDBENCH_MAIN_DIR", ".")) / "build" / "requirements.txt" if requirements_file.exists(): - python_cmd = os.getenv('LLMDBENCH_CONTROL_PCMD', 'python') + python_cmd = os.getenv("LLMDBENCH_CONTROL_PCMD", "python") # Show environment info announce(f"ℹ️ Python: {shutil.which(python_cmd) or 'not found'}") # Install requirements - cmd = [python_cmd, '-m', 'pip', 'install', '-r', str(requirements_file)] + cmd = [python_cmd, "-m", "pip", "install", "-r", str(requirements_file)] if verbose: announce(f"---> executing: {' '.join(cmd)}") - #FIXME (USE llmdbench_execute_cmd) + # FIXME (USE llmdbench_execute_cmd) result = subprocess.run(cmd, capture_output=not verbose, text=True) if result.returncode != 0: announce(f"Warning: pip install returned {result.returncode}") @@ -258,12 +242,7 @@ def create_conda_environment(env_name: str, dry_run: bool, verbose: bool): def ensure_local_conda( - run_locally: bool, - host_os: str, - host_shell: str, - env_name: str, - dry_run: bool, - verbose: bool + run_locally: bool, host_os: str, host_shell: str, env_name: str, dry_run: bool, verbose: bool ) -> int: """ Ensure local conda environment is set up using native Python libraries where possible. @@ -282,7 +261,9 @@ def ensure_local_conda( # Early exit check if not run_locally: - announce("⏭️ Environment variable \"LLMDBENCH_RUN_EXPERIMENT_ANALYZE_LOCALLY\" is set to 0, skipping local setup of conda environment") + announce( + '⏭️ Environment variable "LLMDBENCH_RUN_EXPERIMENT_ANALYZE_LOCALLY" is set to 0, skipping local setup of conda environment' + ) return 0 try: @@ -291,9 +272,9 @@ def ensure_local_conda( # Check if conda is already available if not is_conda_available(): # Install conda based on platform - if platform_info['is_mac']: + if platform_info["is_mac"]: exit_code, anaconda_path, conda_sh = install_miniforge_macos(dry_run, verbose) - elif platform_info['is_linux']: + elif platform_info["is_linux"]: exit_code, anaconda_path, conda_sh = install_miniforge_linux(dry_run, verbose) else: raise RuntimeError(f"Unsupported platform: {platform_info['system']}") @@ -310,11 +291,11 @@ def ensure_local_conda( if not conda_info: raise RuntimeError("Could not get conda information") - root_prefix = Path(conda_info.get('root_prefix', '')) - if platform_info['is_mac']: - conda_sh = root_prefix / 'base' / 'etc' / 'profile.d' / 'conda.sh' + root_prefix = Path(conda_info.get("root_prefix", "")) + if platform_info["is_mac"]: + conda_sh = root_prefix / "base" / "etc" / "profile.d" / "conda.sh" else: - conda_sh = root_prefix / 'etc' / 'profile.d' / 'conda.sh' + conda_sh = root_prefix / "etc" / "profile.d" / "conda.sh" # Source conda.sh source_conda_script(conda_sh, dry_run, verbose) @@ -322,7 +303,7 @@ def ensure_local_conda( # Create and configure conda environment create_conda_environment(env_name, dry_run, verbose) - announce(f"✅ Conda environment \"{env_name}\" configured") + announce(f'✅ Conda environment "{env_name}" configured') return 0 except Exception as e: @@ -339,18 +320,17 @@ def main(): ev = {} environment_variable_to_dict(ev) - if ev["control_dry_run"]: + if ev.get("control_dry_run", False): announce("DRY RUN enabled. No actual changes will be made.") - # Execute the main logic return ensure_local_conda( - run_locally=ev["run_experiment_analyze_locally"], - host_os=ev["control_deploy_host_os"], - host_shell=ev["control_deploy_host_shell"] , - env_name=ev["harness_conda_env_name"], - dry_run=ev["control_dry_run"], - verbose=ev["control_verbose"] + run_locally=ev.get("run_experiment_analyze_locally", False), + host_os=ev.get("control_deploy_host_os", ""), + host_shell=ev.get("control_deploy_host_shell", ""), + env_name=ev.get("harness_conda_env_name", ""), + dry_run=ev.get("control_dry_run", False), + verbose=ev.get("control_verbose", False), ) diff --git a/setup/steps/01_ensure_local_conda.sh b/llm_d_benchmark/setup/steps/01_ensure_local_conda.sh similarity index 100% rename from setup/steps/01_ensure_local_conda.sh rename to llm_d_benchmark/setup/steps/01_ensure_local_conda.sh diff --git a/setup/steps/02_ensure_gateway_provider.py b/llm_d_benchmark/setup/steps/02_ensure_gateway_provider.py similarity index 78% rename from setup/steps/02_ensure_gateway_provider.py rename to llm_d_benchmark/setup/steps/02_ensure_gateway_provider.py index db378f58..fb06a44e 100644 --- a/setup/steps/02_ensure_gateway_provider.py +++ b/llm_d_benchmark/setup/steps/02_ensure_gateway_provider.py @@ -1,19 +1,13 @@ #!/usr/bin/env python3 import os -import sys import subprocess -import tempfile -import re +import sys from pathlib import Path -# Add project root to path for imports -current_file = Path(__file__).resolve() -project_root = current_file.parents[1] -sys.path.insert(0, str(project_root)) try: - from functions import announce, llmdbench_execute_cmd, environment_variable_to_dict + from llm_d_benchmark.setup.utils.functions import announce, environment_variable_to_dict, llmdbench_execute_cmd except ImportError as e: # Fallback for when dependencies are not available print(f"Warning: Could not import required modules: {e}") @@ -21,22 +15,8 @@ print("Please run: ./setup/install_deps.sh") sys.exit(1) -try: - from kubernetes import client, config - import requests -except ImportError as e: - print(f"Warning: Could not import required modules: {e}") - print("Please install required dependencies: pip install kubernetes requests") - sys.exit(1) - -def ensure_helm_repository( - helm_cmd: str, - chart_name: str, - repo_url: str, - dry_run: bool, - verbose: bool -) -> int: +def ensure_helm_repository(helm_cmd: str, chart_name: str, repo_url: str, dry_run: bool, verbose: bool) -> int: """ Ensure helm repository is added and updated. @@ -52,24 +32,14 @@ def ensure_helm_repository( """ # Add helm repository add_cmd = f"{helm_cmd} repo add {chart_name} {repo_url} --force-update" - result = llmdbench_execute_cmd( - actual_cmd=add_cmd, - dry_run=dry_run, - verbose=verbose, - silent=not verbose - ) + result = llmdbench_execute_cmd(actual_cmd=add_cmd, dry_run=dry_run, verbose=verbose, silent=not verbose) if result != 0: announce(f"❌ Failed to add helm repository (exit code: {result})") return result # Update helm repositories update_cmd = f"{helm_cmd} repo update" - result = llmdbench_execute_cmd( - actual_cmd=update_cmd, - dry_run=dry_run, - verbose=verbose, - silent=not verbose - ) + result = llmdbench_execute_cmd(actual_cmd=update_cmd, dry_run=dry_run, verbose=verbose, silent=not verbose) if result != 0: announce(f"❌ Failed to update helm repositories (exit code: {result})") return result @@ -77,12 +47,7 @@ def ensure_helm_repository( return 0 -def get_latest_chart_version( - helm_cmd: str, - helm_repo: str, - dry_run: bool, - verbose: bool -) -> str: +def get_latest_chart_version(helm_cmd: str, helm_repo: str, dry_run: bool, verbose: bool) -> str: """ Get the latest version of a helm chart from repository. @@ -103,12 +68,7 @@ def get_latest_chart_version( # Run helm search repo command search_cmd = f"{helm_cmd} search repo {helm_repo}" result = subprocess.run( - search_cmd.split(), - capture_output=True, - shell=True, - executable="/bin/bash", - text=True, - timeout=30 + search_cmd.split(), capture_output=True, shell=True, executable="/bin/bash", text=True, timeout=30 ) if result.returncode != 0: @@ -117,7 +77,7 @@ def get_latest_chart_version( return "" # Parse output to get version (equivalent to: tail -1 | awk '{print $2}') - lines = result.stdout.strip().split('\n') + lines = result.stdout.strip().split("\n") if len(lines) < 2: # Need at least header + 1 data line return "" @@ -141,10 +101,10 @@ def get_latest_chart_version( def install_gateway_api_crds( - ev : dict, - dry_run : bool, - verbose : bool, - ) -> int: + ev: dict, + dry_run: bool, + verbose: bool, +) -> int: """ Install Gateway API crds. @@ -159,7 +119,9 @@ def install_gateway_api_crds( try: crd_version = ev.get("gateway_api_crd_revision") kubectl_cmd = ev.get("control_kcmd", "kubectl") - install_crds_cmd = f"{kubectl_cmd} apply -k https://github.com/kubernetes-sigs/gateway-api/config/crd/?ref={crd_version}" + install_crds_cmd = ( + f"{kubectl_cmd} apply -k https://github.com/kubernetes-sigs/gateway-api/config/crd/?ref={crd_version}" + ) announce(f"🚀 Installing Kubernetes Gateway API ({crd_version}) CRDs...") llmdbench_execute_cmd(install_crds_cmd, dry_run, verbose) @@ -172,10 +134,10 @@ def install_gateway_api_crds( def install_gateway_api_extension_crds( - ev : dict, - dry_run : bool, - verbose : bool, - ) -> int: + ev: dict, + dry_run: bool, + verbose: bool, +) -> int: """ Install Gateway API inference extension crds. @@ -203,10 +165,10 @@ def install_gateway_api_extension_crds( def install_kgateway( - ev : dict, - dry_run : bool, - verbose : bool, - ) -> int: + ev: dict, + dry_run: bool, + verbose: bool, +) -> int: """ Install gateway control plane. Uses helmfile from: https://raw.githubusercontent.com/llm-d-incubation/llm-d-infra/refs/heads/main/quickstart/gateway-control-plane-providers/kgateway.helmfile.yaml @@ -222,8 +184,8 @@ def install_kgateway( try: helm_base_dir = Path(ev["control_work_dir"]) / "setup" / "helm" helm_base_dir.mkdir(parents=True, exist_ok=True) - helmfile_path = helm_base_dir / f'helmfile-{ev["current_step"]}.yaml' - with open(helmfile_path, 'w') as f: + helmfile_path = helm_base_dir / f"helmfile-{ev['current_step']}.yaml" + with open(helmfile_path, "w") as f: f.write(""" releases: - name: kgateway-crds @@ -259,7 +221,7 @@ def install_kgateway( """) install_cmd = f"helmfile apply -f {helmfile_path}" - announce(f"🚀 Installing kgateway") + announce("🚀 Installing kgateway") llmdbench_execute_cmd(install_cmd, dry_run, verbose) announce("✅ kgateway installed") return 0 @@ -271,11 +233,12 @@ def install_kgateway( finally: True + def install_istio( - ev : dict, - dry_run : bool, - verbose : bool, - ) -> int: + ev: dict, + dry_run: bool, + verbose: bool, +) -> int: """ Install gateway control plane. @@ -290,8 +253,8 @@ def install_istio( try: helm_base_dir = Path(ev["control_work_dir"]) / "setup" / "helm" helm_base_dir.mkdir(parents=True, exist_ok=True) - helmfile_path = helm_base_dir / f'helmfile-{ev["current_step"]}.yaml' - with open(helmfile_path, 'w') as f: + helmfile_path = helm_base_dir / f"helmfile-{ev['current_step']}.yaml" + with open(helmfile_path, "w") as f: f.write(""" releases: - name: istio-base @@ -327,7 +290,7 @@ def install_istio( install_cmd = f"helmfile apply -f {helmfile_path}" - announce(f"🚀 Installing istio") + announce("🚀 Installing istio") llmdbench_execute_cmd(install_cmd, dry_run, verbose) announce("✅ istio installed") return 0 @@ -339,11 +302,12 @@ def install_istio( finally: True + def install_gateway_control_plane( - ev : dict, - dry_run : bool, - verbose : bool, - ) -> int: + ev: dict, + dry_run: bool, + verbose: bool, +) -> int: """ Install gateway control plane. @@ -355,25 +319,21 @@ def install_gateway_control_plane( Returns: int: 0 for success, non-zero for failure """ - if ev["vllm_modelservice_gateway_class_name"] == 'kgateway': + if ev["vllm_modelservice_gateway_class_name"] == "kgateway": success = install_kgateway(ev, dry_run, verbose) - elif ev["vllm_modelservice_gateway_class_name"] == 'istio': + elif ev["vllm_modelservice_gateway_class_name"] == "istio": success = install_istio(ev, dry_run, verbose) - elif ev["vllm_modelservice_gateway_class_name"] == 'gke': + elif ev["vllm_modelservice_gateway_class_name"] == "gke": success = 0 if success == 0: - announce(f'✅ Gateway control plane (provider {ev["vllm_modelservice_gateway_class_name"]}) installed.') + announce(f"✅ Gateway control plane (provider {ev['vllm_modelservice_gateway_class_name']}) installed.") else: - announce(f'❌ Gateway control plane (provider {ev["vllm_modelservice_gateway_class_name"]}) not installed.') + announce(f"❌ Gateway control plane (provider {ev['vllm_modelservice_gateway_class_name']}) not installed.") return success -def ensure_gateway_provider( - ev: dict, - dry_run: bool, - verbose: bool -) -> int: +def ensure_gateway_provider(ev: dict, dry_run: bool, verbose: bool) -> int: """ Main function to ensure gateway provider setup. @@ -388,18 +348,16 @@ def ensure_gateway_provider( if not ev["control_environment_type_modelservice_active"]: deploy_methods = ev.get("deploy_methods", "unknown") - announce(f"⏭️ Environment types are \"{deploy_methods}\". Skipping this step.") + announce(f'⏭️ Environment types are "{deploy_methods}". Skipping this step.') return 0 # Extract required environment variables - #FIXME (we shouldn't have to unpack all these variables here) + # FIXME (we shouldn't have to unpack all these variables here) helm_cmd = ev.get("control_hcmd", "helm") chart_name = ev.get("vllm_modelservice_chart_name", "") repo_url = ev.get("vllm_modelservice_helm_repository_url", "") chart_version = ev.get("vllm_modelservice_chart_version", "") helm_repo = ev.get("vllm_modelservice_helm_repository", "") - gateway_class = ev.get("vllm_modelservice_gateway_class_name", "") - release_name = ev.get("vllm_modelservice_release", "") # Step 1: Ensure helm repository result = ensure_helm_repository(helm_cmd, chart_name, repo_url, dry_run, verbose) @@ -418,9 +376,11 @@ def ensure_gateway_provider( os.environ["LLMDBENCH_VLLM_MODELSERVICE_CHART_VERSION"] = detected_version # Check gateway infrastructure setup - announce(f'🔍 Ensuring gateway infrastructure (provider {ev["vllm_modelservice_gateway_class_name"]}) is setup...') + announce( + f"🔍 Ensuring gateway infrastructure (provider {ev['vllm_modelservice_gateway_class_name']}) is setup..." + ) - if ev["user_is_admin"] : + if ev["user_is_admin"]: # Install Kubernetes Gateway API crds result = install_gateway_api_crds(ev, dry_run, verbose) if result != 0: @@ -437,7 +397,9 @@ def ensure_gateway_provider( return result else: - announce("❗No privileges to setup Gateway Provider. Will assume a user with proper privileges already performed this action.") + announce( + "❗No privileges to setup Gateway Provider. Will assume a user with proper privileges already performed this action." + ) return 0 diff --git a/setup/steps/02_ensure_gateway_provider.sh b/llm_d_benchmark/setup/steps/02_ensure_gateway_provider.sh similarity index 100% rename from setup/steps/02_ensure_gateway_provider.sh rename to llm_d_benchmark/setup/steps/02_ensure_gateway_provider.sh diff --git a/setup/steps/03_ensure_user_workload_monitoring_configuration.py b/llm_d_benchmark/setup/steps/03_ensure_user_workload_monitoring_configuration.py similarity index 70% rename from setup/steps/03_ensure_user_workload_monitoring_configuration.py rename to llm_d_benchmark/setup/steps/03_ensure_user_workload_monitoring_configuration.py index f2a5640f..dcf018df 100644 --- a/setup/steps/03_ensure_user_workload_monitoring_configuration.py +++ b/llm_d_benchmark/setup/steps/03_ensure_user_workload_monitoring_configuration.py @@ -1,28 +1,18 @@ import os import sys -import yaml from pathlib import Path + import pykube -from pykube.exceptions import PyKubeError - -# Add project root to path for imports -current_file = Path(__file__).resolve() -project_root = current_file.parents[1] -sys.path.insert(0, str(project_root)) - -try: - from functions import (announce, - llmdbench_execute_cmd, - environment_variable_to_dict, - kube_connect, - apply_configmap, - is_openshift) -except ImportError as e: - # Fallback for when dependencies are not available - print(f"Warning: Could not import required modules: {e}") - print("This script requires the llm-d environment to be properly set up.") - print("Please run: ./setup/install_deps.sh") - sys.exit(1) +import yaml + +from llm_d_benchmark.setup.utils.functions import ( + announce, + apply_configmap, + environment_variable_to_dict, + is_openshift, + kube_connect, + llmdbench_execute_cmd, +) def create_monitoring_configmap() -> dict: @@ -33,15 +23,10 @@ def create_monitoring_configmap() -> dict: dict: ConfigMap structure for enabling user workload monitoring """ return { - 'apiVersion': 'v1', - 'kind': 'ConfigMap', - 'metadata': { - 'name': 'cluster-monitoring-config', - 'namespace': 'openshift-monitoring' - }, - 'data': { - 'config.yaml': 'enableUserWorkload: true' - } + "apiVersion": "v1", + "kind": "ConfigMap", + "metadata": {"name": "cluster-monitoring-config", "namespace": "openshift-monitoring"}, + "data": {"config.yaml": "enableUserWorkload: true"}, } @@ -73,11 +58,11 @@ def write_configmap_yaml(configmap: dict, output_path: Path, dry_run: bool, verb announce(f"---> writing ConfigMap YAML to {output_path}") # Write YAML using Python yaml library instead of heredoc - with open(output_path, 'w') as f: + with open(output_path, "w") as f: yaml.dump(configmap, f, default_flow_style=False) if verbose: - announce(f"---> successfully wrote YAML file") + announce("---> successfully wrote YAML file") return True @@ -90,13 +75,7 @@ def write_configmap_yaml(configmap: dict, output_path: Path, dry_run: bool, verb def ensure_user_workload_monitoring( - api: pykube.HTTPClient, - ev: dict, - work_dir: str, - current_step: str, - kubectl_cmd: str, - dry_run: bool, - verbose: bool + api: pykube.HTTPClient, ev: dict, work_dir: str, current_step: str, kubectl_cmd: str, dry_run: bool, verbose: bool ) -> int: """ Ensure OpenShift user workload monitoring is configured using native Python. @@ -115,10 +94,10 @@ def ensure_user_workload_monitoring( """ announce("🔍 Checking for OpenShift user workload monitoring enablement...") - if is_openshift(api) : - if ev["deploy_methods"] != "modelservice" : - announce("⏭️ Standup method is not \"modelservice\", skipping user workload monitoring enablement") - else : + if is_openshift(api): + if ev["deploy_methods"] != "modelservice": + announce('⏭️ Standup method is not "modelservice", skipping user workload monitoring enablement') + else: announce("⏭️ Not an OpenShift Cluster, skipping user workload monitoring enablement") return 0 @@ -158,14 +137,14 @@ def main(): ev = {} environment_variable_to_dict(ev) - env_cmd=f'source "{ev["control_dir"]}/env.sh"' + env_cmd = f'source "{ev["control_dir"]}/env.sh"' result = llmdbench_execute_cmd(actual_cmd=env_cmd, dry_run=ev["control_dry_run"], verbose=ev["control_verbose"]) if result != 0: - announce(f"❌ Failed while running \"{env_cmd}\" (exit code: {result})") + announce(f'❌ Failed while running "{env_cmd}" (exit code: {result})') exit(result) - api = kube_connect(f'{ev["control_work_dir"]}/environment/context.ctx') - if ev["control_dry_run"] : + api = kube_connect(f"{ev['control_work_dir']}/environment/context.ctx") + if ev["control_dry_run"]: announce("DRY RUN enabled. No actual changes will be made.") # Execute the main logic @@ -176,8 +155,9 @@ def main(): current_step=ev["current_step"], kubectl_cmd=ev["control_kcmd"], dry_run=ev["control_dry_run"], - verbose=ev["control_verbose"] + verbose=ev["control_verbose"], ) + if __name__ == "__main__": sys.exit(main()) diff --git a/setup/steps/03_ensure_user_workload_monitoring_configuration.sh b/llm_d_benchmark/setup/steps/03_ensure_user_workload_monitoring_configuration.sh similarity index 100% rename from setup/steps/03_ensure_user_workload_monitoring_configuration.sh rename to llm_d_benchmark/setup/steps/03_ensure_user_workload_monitoring_configuration.sh diff --git a/setup/steps/04_ensure_model_namespace_prepared.py b/llm_d_benchmark/setup/steps/04_ensure_model_namespace_prepared.py similarity index 78% rename from setup/steps/04_ensure_model_namespace_prepared.py rename to llm_d_benchmark/setup/steps/04_ensure_model_namespace_prepared.py index 1e7ba33b..538855e7 100644 --- a/setup/steps/04_ensure_model_namespace_prepared.py +++ b/llm_d_benchmark/setup/steps/04_ensure_model_namespace_prepared.py @@ -1,36 +1,25 @@ +import asyncio +import base64 import os import sys import time -import base64 from pathlib import Path import pykube from pykube.exceptions import PyKubeError -import asyncio - - -current_file = Path(__file__).resolve() - -# get the projects root directory by going up 1 parent directories -project_root = current_file.parents[1] - -# add the project root to the system path -sys.path.insert(0, str(project_root)) - - -from functions import ( +from llm_d_benchmark.setup.utils.functions import ( + SecurityContextConstraints, announce, - wait_for_job, - validate_and_create_pvc, - launch_download_job, - model_attribute, create_namespace, - kube_connect, - llmdbench_execute_cmd, environment_variable_to_dict, is_openshift, - SecurityContextConstraints, + kube_connect, + launch_download_job, + llmdbench_execute_cmd, + model_attribute, + validate_and_create_pvc, + wait_for_job, ) @@ -66,9 +55,7 @@ def add_scc_to_service_account( # check if the service account is already in the list if sa_user_name in scc.obj["users"]: - announce( - f'Service Account "{sa_user_name}" already has SCC "{scc_name}". No changes needed' - ) + announce(f'Service Account "{sa_user_name}" already has SCC "{scc_name}". No changes needed') else: if dry_run: announce(f'DRY RUN: Would add "{sa_user_name}" to SCC "{scc_name}"') @@ -80,23 +67,18 @@ def add_scc_to_service_account( def main(): - - os.environ["LLMDBENCH_CURRENT_STEP"] = os.path.splitext(os.path.basename(__file__))[ - 0 - ] + os.environ["LLMDBENCH_CURRENT_STEP"] = os.path.splitext(os.path.basename(__file__))[0] ev = {} environment_variable_to_dict(ev) env_cmd = f'source "{ev["control_dir"]}/env.sh"' - result = llmdbench_execute_cmd( - actual_cmd=env_cmd, dry_run=ev["control_dry_run"], verbose=ev["control_verbose"] - ) + result = llmdbench_execute_cmd(actual_cmd=env_cmd, dry_run=ev["control_dry_run"], verbose=ev["control_verbose"]) if result != 0: announce(f'❌ Failed while running "{env_cmd}" (exit code: {result})') exit(result) - api = kube_connect(f'{ev["control_work_dir"]}/environment/context.ctx') + api = kube_connect(f"{ev['control_work_dir']}/environment/context.ctx") if ev["control_dry_run"]: announce("DRY RUN enabled. No actual changes will be made.") @@ -108,9 +90,7 @@ def main(): ) if ev["hf_token"]: - announce( - f'🔑 Creating or updating secret "{ev["vllm_common_hf_token_name"]}"...' - ) + announce(f'🔑 Creating or updating secret "{ev["vllm_common_hf_token_name"]}"...') secret_obj = { "apiVersion": "v1", "kind": "Secret", @@ -119,11 +99,7 @@ def main(): "namespace": ev["vllm_common_namespace"], }, "type": "Opaque", - "data": { - ev["vllm_common_hf_token_key"]: base64.b64encode( - ev["hf_token"].encode() - ).decode() - }, + "data": {ev["vllm_common_hf_token_key"]: base64.b64encode(ev["hf_token"].encode()).decode()}, } secret = pykube.Secret(api, secret_obj) if ev["control_dry_run"] != "1": @@ -133,24 +109,15 @@ def main(): secret.create() announce("Secret created/updated.") - models = [ - model.strip() for model in ev["deploy_model_list"].split(",") if model.strip() - ] + models = [model.strip() for model in ev["deploy_model_list"].split(",") if model.strip()] for model_name in models: - if ( - ev["vllm_modelservice_uri_protocol"] == "pvc" - or ev["control_environment_type_standalone_active"] - ): + if ev["vllm_modelservice_uri_protocol"] == "pvc" or ev["control_environment_type_standalone_active"]: download_model = model_attribute(model=model_name, attribute="model") - model_artifact_uri = ( - f'pvc://{ev["vllm_common_pvc_name"]}/models/{download_model}' - ) + model_artifact_uri = f"pvc://{ev['vllm_common_pvc_name']}/models/{download_model}" protocol, pvc_and_model_path = model_artifact_uri.split( "://" ) # protocol var unused but exists in prev script - pvc_name, model_path = pvc_and_model_path.split( - "/", 1 - ) # split from first occurence + pvc_name, model_path = pvc_and_model_path.split("/", 1) # split from first occurence validate_and_create_pvc( api=api, @@ -203,9 +170,7 @@ def main(): ev["control_dry_run"], ) - announce( - f"🚚 Creating configmap with contents of all files under workload/preprocesses..." - ) + announce("🚚 Creating configmap with contents of all files under workload/preprocesses...") config_map_name = "llm-d-benchmark-preprocesses" config_map_data = {} preprocess_dir = Path(ev["main_dir"]) / "setup" / "preprocess" @@ -216,9 +181,7 @@ def main(): for path in file_paths: config_map_data[path.name] = path.read_text(encoding="utf-8") except FileNotFoundError: - announce( - f"Warning: Directory not found at {preprocess_dir}. Creating empty ConfigMap." - ) + announce(f"Warning: Directory not found at {preprocess_dir}. Creating empty ConfigMap.") cm_obj = { "apiVersion": "v1", @@ -228,7 +191,7 @@ def main(): } cm = pykube.ConfigMap(api, cm_obj) - if ev["control_dry_run"] != "1": + if not ev["control_dry_run"]: if cm.exists(): cm.update() else: diff --git a/setup/steps/04_ensure_model_namespace_prepared.sh b/llm_d_benchmark/setup/steps/04_ensure_model_namespace_prepared.sh similarity index 100% rename from setup/steps/04_ensure_model_namespace_prepared.sh rename to llm_d_benchmark/setup/steps/04_ensure_model_namespace_prepared.sh diff --git a/setup/steps/05_ensure_harness_namespace_prepared.sh b/llm_d_benchmark/setup/steps/05_ensure_harness_namespace_prepared.sh similarity index 100% rename from setup/steps/05_ensure_harness_namespace_prepared.sh rename to llm_d_benchmark/setup/steps/05_ensure_harness_namespace_prepared.sh diff --git a/setup/steps/06_deploy_vllm_standalone_models.py b/llm_d_benchmark/setup/steps/06_deploy_vllm_standalone_models.py similarity index 73% rename from setup/steps/06_deploy_vllm_standalone_models.py rename to llm_d_benchmark/setup/steps/06_deploy_vllm_standalone_models.py index 64c2fd79..e2eb9c97 100755 --- a/setup/steps/06_deploy_vllm_standalone_models.py +++ b/llm_d_benchmark/setup/steps/06_deploy_vllm_standalone_models.py @@ -4,16 +4,19 @@ import sys from pathlib import Path -# Add project root to Python path -current_file = Path(__file__).resolve() -project_root = current_file.parents[1] -sys.path.insert(0, str(project_root)) - -# Import from functions.py -from functions import ( - announce, llmdbench_execute_cmd, model_attribute, extract_environment, - get_image, check_storage_class, check_affinity, add_annotations, - add_command_line_options, add_additional_env_to_yaml, get_accelerator_nr, is_standalone_deployment +from llm_d_benchmark.setup.utils.functions import ( + add_additional_env_to_yaml, + add_annotations, + add_command_line_options, + announce, + check_affinity, + check_storage_class, + extract_environment, + get_accelerator_nr, + get_image, + is_standalone_deployment, + llmdbench_execute_cmd, + model_attribute, ) @@ -29,7 +32,6 @@ def main(): # Check if standalone environment is active if is_standalone_deployment(ev): - # Check storage class if not check_storage_class(): announce("❌ Failed to check storage class") @@ -68,10 +70,12 @@ def main(): # Generate Deployment YAML deployment_yaml = generate_deployment_yaml(ev, model, model_label) deployment_file = yamls_dir / f"{ev['current_step']}_a_deployment_{modelfn}.yaml" - with open(deployment_file, 'w') as f: + with open(deployment_file, "w") as f: f.write(deployment_yaml) - announce(f"🚚 Deploying model \"{model}\" and associated service (from files located at {ev['control_work_dir']})...") + announce( + f'🚚 Deploying model "{model}" and associated service (from files located at {ev["control_work_dir"]})...' + ) # Apply deployment kubectl_deploy_cmd = f"{ev['control_kcmd']} apply -f {deployment_file}" @@ -79,13 +83,13 @@ def main(): actual_cmd=kubectl_deploy_cmd, dry_run=int(ev.get("control_dry_run", 0)), verbose=int(ev.get("control_verbose", 0)), - fatal=True + fatal=True, ) # Generate Service YAML service_yaml = generate_service_yaml(ev, model, model_label) service_file = yamls_dir / f"{ev['current_step']}_b_service_{modelfn}.yaml" - with open(service_file, 'w') as f: + with open(service_file, "w") as f: f.write(service_yaml) # Apply service @@ -93,7 +97,7 @@ def main(): llmdbench_execute_cmd( actual_cmd=kubectl_service_cmd, dry_run=int(ev.get("control_dry_run", 0)), - verbose=int(ev.get("control_verbose", 0)) + verbose=int(ev.get("control_verbose", 0)), ) # Optional HTTPRoute for OpenShift @@ -104,7 +108,7 @@ def main(): # Generate HTTPRoute YAML httproute_yaml = generate_httproute_yaml(ev, model, model_label) httproute_file = yamls_dir / f"{ev['current_step']}_c_httproute_{modelfn}.yaml" - with open(httproute_file, 'w') as f: + with open(httproute_file, "w") as f: f.write(httproute_yaml) # Apply HTTPRoute @@ -112,10 +116,10 @@ def main(): llmdbench_execute_cmd( actual_cmd=kubectl_httproute_cmd, dry_run=int(ev.get("control_dry_run", 0)), - verbose=int(ev.get("control_verbose", 0)) + verbose=int(ev.get("control_verbose", 0)), ) - announce(f"✅ Model \"{model}\" and associated service deployed.") + announce(f'✅ Model "{model}" and associated service deployed.') # Second pass: Wait for pods to be ready for model in model_list: @@ -134,12 +138,14 @@ def main(): dry_run=int(ev.get("control_dry_run", 0)), verbose=int(ev.get("control_verbose", 0)), fatal=True, - attempts=2 + attempts=2, ) announce(f"✅ (standalone) pods serving model {model} created") # Wait for Running state - announce(f"⏳ Waiting for (standalone) pods serving model {model} to be in \"Running\" state (timeout={ev.get('vllm_common_timeout', 300)}s)...") + announce( + f'⏳ Waiting for (standalone) pods serving model {model} to be in "Running" state (timeout={ev.get("vllm_common_timeout", 300)}s)...' + ) kubectl_wait_running_cmd = ( f"{ev['control_kcmd']} --namespace {namespace} wait " f"--timeout={ev.get('vllm_common_timeout', 300)}s " @@ -148,12 +154,14 @@ def main(): llmdbench_execute_cmd( actual_cmd=kubectl_wait_running_cmd, dry_run=int(ev.get("control_dry_run", 0)), - verbose=int(ev.get("control_verbose", 0)) + verbose=int(ev.get("control_verbose", 0)), ) announce(f"🚀 (standalone) pods serving model {model} running") # Wait for Ready condition - announce(f"⏳ Waiting for (standalone) pods serving {model} to be Ready (timeout={ev.get('vllm_common_timeout', 300)}s)...") + announce( + f"⏳ Waiting for (standalone) pods serving {model} to be Ready (timeout={ev.get('vllm_common_timeout', 300)}s)..." + ) kubectl_wait_ready_cmd = ( f"{ev['control_kcmd']} --namespace {namespace} wait " f"--timeout={ev.get('vllm_common_timeout', 300)}s " @@ -162,7 +170,7 @@ def main(): llmdbench_execute_cmd( actual_cmd=kubectl_wait_ready_cmd, dry_run=int(ev.get("control_dry_run", 0)), - verbose=int(ev.get("control_verbose", 0)) + verbose=int(ev.get("control_verbose", 0)), ) announce(f"🚀 (standalone) pods serving model {model} ready") @@ -176,13 +184,11 @@ def main(): llmdbench_execute_cmd( actual_cmd=kubectl_logs_cmd, dry_run=int(ev.get("control_dry_run", 0)), - verbose=int(ev.get("control_verbose", 0)) + verbose=int(ev.get("control_verbose", 0)), ) # Handle OpenShift route exposure - if (int(ev.get("vllm_standalone_route", 0)) != 0 and - int(ev.get("control_deploy_is_openshift", 0)) == 1): - + if int(ev.get("vllm_standalone_route", 0)) != 0 and int(ev.get("control_deploy_is_openshift", 0)) == 1: # Check if route already exists route_check_cmd = ( f"{ev['control_kcmd']} --namespace {namespace} get route --ignore-not-found | " @@ -191,13 +197,8 @@ def main(): try: import subprocess - result = subprocess.run( - route_check_cmd, - shell=True, - capture_output=True, - text=True, - check=False - ) + + result = subprocess.run(route_check_cmd, shell=True, capture_output=True, text=True, check=False) is_route = result.stdout.strip() except Exception: is_route = "" @@ -213,25 +214,27 @@ def main(): llmdbench_execute_cmd( actual_cmd=kubectl_expose_cmd, dry_run=int(ev.get("control_dry_run", 0)), - verbose=int(ev.get("control_verbose", 0)) + verbose=int(ev.get("control_verbose", 0)), ) announce(f"✅ Service for pods service model {model} created") - announce(f"✅ Model \"{model}\" and associated service deployed.") + announce(f'✅ Model "{model}" and associated service deployed.') # Show resource snapshot - announce(f"ℹ️ A snapshot of the relevant (model-specific) resources on namespace \"{ev['vllm_common_namespace']}\":") + announce( + f'ℹ️ A snapshot of the relevant (model-specific) resources on namespace "{ev["vllm_common_namespace"]}":' + ) if int(ev.get("control_dry_run", 0)) == 0: kubectl_get_cmd = f"{ev['control_kcmd']} get --namespace {ev['vllm_common_namespace']} {srl}" llmdbench_execute_cmd( actual_cmd=kubectl_get_cmd, dry_run=int(ev.get("control_dry_run", 0)), verbose=int(ev.get("control_verbose", 0)), - fatal=False + fatal=False, ) else: deploy_methods = ev.get("deploy_methods", "") - announce(f"⏭️ Environment types are \"{deploy_methods}\". Skipping this step.") + announce(f'⏭️ Environment types are "{deploy_methods}". Skipping this step.') return 0 @@ -244,7 +247,7 @@ def generate_deployment_yaml(ev, model, model_label): ev["vllm_standalone_image_registry"], ev["vllm_standalone_image_repo"], ev["vllm_standalone_image_name"], - ev["vllm_standalone_image_tag"] + ev["vllm_standalone_image_tag"], ) # Parse affinity @@ -265,9 +268,9 @@ def generate_deployment_yaml(ev, model, model_label): name: vllm-standalone-{model_label} labels: app: vllm-standalone-{model_label} - namespace: {ev['vllm_common_namespace']} + namespace: {ev["vllm_common_namespace"]} spec: - replicas: {ev['vllm_common_replicas']} + replicas: {ev["vllm_common_replicas"]} selector: matchLabels: app: vllm-standalone-{model_label} @@ -278,7 +281,7 @@ def generate_deployment_yaml(ev, model, model_label): annotations: {annotations} spec: - schedulerName: {ev.get('vllm_common_pod_scheduler', 'default-scheduler')} + schedulerName: {ev.get("vllm_common_pod_scheduler", "default-scheduler")} affinity: nodeAffinity: requiredDuringSchedulingIgnoredDuringExecution: @@ -299,70 +302,70 @@ def generate_deployment_yaml(ev, model, model_label): {args} env: - name: LLMDBENCH_VLLM_STANDALONE_MODEL - value: "{os.environ.get('LLMDBENCH_DEPLOY_CURRENT_MODEL', '')}" + value: "{os.environ.get("LLMDBENCH_DEPLOY_CURRENT_MODEL", "")}" - name: LLMDBENCH_VLLM_STANDALONE_VLLM_LOAD_FORMAT - value: "{ev.get('vllm_standalone_vllm_load_format', '')}" + value: "{ev.get("vllm_standalone_vllm_load_format", "")}" - name: LLMDBENCH_VLLM_STANDALONE_MODEL_LOADER_EXTRA_CONFIG - value: "{os.environ.get('LLMDBENCH_VLLM_STANDALONE_MODEL_LOADER_EXTRA_CONFIG', '{}')}" + value: "{os.environ.get("LLMDBENCH_VLLM_STANDALONE_MODEL_LOADER_EXTRA_CONFIG", "{}")}" - name: VLLM_LOGGING_LEVEL - value: "{ev.get('vllm_standalone_vllm_logging_level', '')}" + value: "{ev.get("vllm_standalone_vllm_logging_level", "")}" - name: HF_HOME - value: {ev.get('vllm_standalone_pvc_mountpoint', '')} + value: {ev.get("vllm_standalone_pvc_mountpoint", "")} - name: HUGGING_FACE_HUB_TOKEN valueFrom: secretKeyRef: - name: {ev.get('vllm_common_hf_token_name', '')} + name: {ev.get("vllm_common_hf_token_name", "")} key: HF_TOKEN {additional_env} ports: - - containerPort: {ev['vllm_common_inference_port']} + - containerPort: {ev["vllm_common_inference_port"]} startupProbe: httpGet: path: /health - port: {ev['vllm_common_inference_port']} + port: {ev["vllm_common_inference_port"]} failureThreshold: 200 - initialDelaySeconds: {ev.get('vllm_common_initial_delay_probe', 60)} + initialDelaySeconds: {ev.get("vllm_common_initial_delay_probe", 60)} periodSeconds: 30 timeoutSeconds: 5 livenessProbe: tcpSocket: - port: {ev['vllm_common_inference_port']} + port: {ev["vllm_common_inference_port"]} failureThreshold: 3 periodSeconds: 10 readinessProbe: httpGet: path: /health - port: {ev['vllm_common_inference_port']} + port: {ev["vllm_common_inference_port"]} failureThreshold: 3 periodSeconds: 5 resources: limits: - cpu: "{ev.get('vllm_common_cpu_nr', '')}" - memory: {ev.get('vllm_common_cpu_mem', '')} - {ev.get('vllm_common_accelerator_resource', '')}: "{ - get_accelerator_nr( - ev.get('vllm_common_accelerator_nr', 'auto'), - ev.get('vllm_common_tensor_parallelism', 1), - ev.get('vllm_common_data_parallelism', 1), - ) - }" - ephemeral-storage: {ev.get('vllm_standalone_ephemeral_storage', '')} + cpu: "{ev.get("vllm_common_cpu_nr", "")}" + memory: {ev.get("vllm_common_cpu_mem", "")} + {ev.get("vllm_common_accelerator_resource", "")}: "{ + get_accelerator_nr( + ev.get("vllm_common_accelerator_nr", "auto"), + ev.get("vllm_common_tensor_parallelism", 1), + ev.get("vllm_common_data_parallelism", 1), + ) + }" + ephemeral-storage: {ev.get("vllm_standalone_ephemeral_storage", "")} requests: - cpu: "{ev.get('vllm_common_cpu_nr', '')}" - memory: {ev.get('vllm_common_cpu_mem', '')} - {ev.get('vllm_common_accelerator_resource', '')}: "{ - get_accelerator_nr( - ev.get('vllm_common_accelerator_nr', 'auto'), - ev.get('vllm_common_tensor_parallelism', 1), - ev.get('vllm_common_data_parallelism', 1), - ) - }" - ephemeral-storage: {ev.get('vllm_standalone_ephemeral_storage', '')} + cpu: "{ev.get("vllm_common_cpu_nr", "")}" + memory: {ev.get("vllm_common_cpu_mem", "")} + {ev.get("vllm_common_accelerator_resource", "")}: "{ + get_accelerator_nr( + ev.get("vllm_common_accelerator_nr", "auto"), + ev.get("vllm_common_tensor_parallelism", 1), + ev.get("vllm_common_data_parallelism", 1), + ) + }" + ephemeral-storage: {ev.get("vllm_standalone_ephemeral_storage", "")} volumeMounts: - name: preprocesses mountPath: /setup/preprocess - name: cache-volume - mountPath: {ev.get('vllm_standalone_pvc_mountpoint', '')} + mountPath: {ev.get("vllm_standalone_pvc_mountpoint", "")} - name: shm mountPath: /dev/shm volumes: @@ -372,7 +375,7 @@ def generate_deployment_yaml(ev, model, model_label): defaultMode: 0500 - name: cache-volume persistentVolumeClaim: - claimName: {ev.get('vllm_common_pvc_name', '')} + claimName: {ev.get("vllm_common_pvc_name", "")} # readOnly: true - name: shm emptyDir: @@ -389,12 +392,12 @@ def generate_service_yaml(ev, model, model_label): kind: Service metadata: name: vllm-standalone-{model_label} - namespace: {ev['vllm_common_namespace']} + namespace: {ev["vllm_common_namespace"]} spec: ports: - name: http port: 80 - targetPort: {ev['vllm_common_inference_port']} + targetPort: {ev["vllm_common_inference_port"]} selector: app: vllm-standalone-{model_label} type: ClusterIP @@ -416,13 +419,13 @@ def generate_httproute_yaml(ev, model, model_label): kind: HTTPRoute metadata: name: vllm-standalone-{model_label} - namespace: {ev['vllm_common_namespace']} + namespace: {ev["vllm_common_namespace"]} spec: parentRefs: - name: openshift-gateway namespace: openshift-gateway hostnames: - - "{model}.{ev['vllm_common_namespace']}.apps.{cluster_url}" + - "{model}.{ev["vllm_common_namespace"]}.apps.{cluster_url}" rules: - matches: - path: @@ -430,7 +433,7 @@ def generate_httproute_yaml(ev, model, model_label): value: / backendRefs: - name: vllm-standalone-{model_parameters}-vllm-{model_label}-{model_type} - port: {ev['vllm_common_inference_port']} + port: {ev["vllm_common_inference_port"]} """ return httproute_yaml diff --git a/setup/steps/06_deploy_vllm_standalone_models.sh b/llm_d_benchmark/setup/steps/06_deploy_vllm_standalone_models.sh similarity index 100% rename from setup/steps/06_deploy_vllm_standalone_models.sh rename to llm_d_benchmark/setup/steps/06_deploy_vllm_standalone_models.sh diff --git a/setup/steps/07_deploy_setup.py b/llm_d_benchmark/setup/steps/07_deploy_setup.py similarity index 60% rename from setup/steps/07_deploy_setup.py rename to llm_d_benchmark/setup/steps/07_deploy_setup.py index 3fd15fea..25dc449d 100755 --- a/setup/steps/07_deploy_setup.py +++ b/llm_d_benchmark/setup/steps/07_deploy_setup.py @@ -1,19 +1,19 @@ #!/usr/bin/env python3 import os -import sys import subprocess +import sys from pathlib import Path -# Add project root to Python path -current_file = Path(__file__).resolve() -project_root = current_file.parents[1] -sys.path.insert(0, str(project_root)) +from llm_d_benchmark.setup.utils.functions import ( + announce, + environment_variable_to_dict, + llmdbench_execute_cmd, + model_attribute, +) -# Import from functions.py -from functions import environment_variable_to_dict, announce, llmdbench_execute_cmd, model_attribute -def gateway_values(provider : str, host: str) -> str: +def gateway_values(provider: str, host: str) -> str: if provider == "istio": return f"""gateway: gatewayClassName: istio @@ -44,24 +44,25 @@ def gateway_values(provider : str, host: str) -> str: else: return "" -def auto_detect_version(ev, chart, version_key, repo_key) -> int: + +def auto_detect_version(ev, chart, version_key, repo_key, dry_run=False) -> int: + if dry_run: + announce( + f"ℹ️ Dry-run mode enabled. Skipping auto-detection of chart: {chart}, version key: {version_key}, repo key: {repo_key}." + ) + return 0 if ev.get(version_key) == "auto": announce(f"🔍 Auto-detecting {chart} chart version...") try: - #FIXME (USE llmdbench_execute_cmd) + # FIXME (USE llmdbench_execute_cmd) helm_search_cmd = f"{ev['control_hcmd']} search repo {ev[repo_key]}" result = subprocess.run( - helm_search_cmd, - capture_output=True, - text=True, - shell=True, - executable="/bin/bash", - check=False + helm_search_cmd, capture_output=True, text=True, shell=True, executable="/bin/bash", check=False ) if result.returncode == 0 and result.stdout.strip(): - lines = result.stdout.strip().split('\n') + lines = result.stdout.strip().split("\n") if len(lines) > 1: # Skip header line last_line = lines[-1] version = last_line.split()[1] if len(last_line.split()) > 1 else "" @@ -85,6 +86,7 @@ def auto_detect_version(ev, chart, version_key, repo_key) -> int: return 1 return 0 + def main(): """Set up helm repositories and create helmfile configurations for model deployments.""" os.environ["CURRENT_STEP_NAME"] = os.path.splitext(os.path.basename(__file__))[0] @@ -95,7 +97,6 @@ def main(): # Check if modelservice environment is active if ev["control_environment_type_modelservice_active"]: - # Add and update llm-d-modelservic helm repository announce("🔧 Setting up helm repositories ...") @@ -108,10 +109,12 @@ def main(): result = llmdbench_execute_cmd( actual_cmd=helm_repo_add_cmd, dry_run=int(ev.get("control_dry_run", 0)), - verbose=int(ev.get("control_verbose", 0)) + verbose=int(ev.get("control_verbose", 0)), ) if result != 0: - announce(f"❌ Failed setting up llm-d-modelservice helm repository with \"{helm_repo_add_cmd}\" (exit code: {result})") + announce( + f'❌ Failed setting up llm-d-modelservice helm repository with "{helm_repo_add_cmd}" (exit code: {result})' + ) exit(result) # Add llm-d-infra helm repository @@ -122,10 +125,12 @@ def main(): result = llmdbench_execute_cmd( actual_cmd=helm_repo_add_cmd, dry_run=int(ev.get("control_dry_run", 0)), - verbose=int(ev.get("control_verbose", 0)) + verbose=int(ev.get("control_verbose", 0)), ) if result != 0: - announce(f"❌ Failed setting up llm-d-infra helm repository with \"{helm_repo_add_cmd}\" (exit code: {result})") + announce( + f'❌ Failed setting up llm-d-infra helm repository with "{helm_repo_add_cmd}" (exit code: {result})' + ) exit(result) # Update helm repositories @@ -133,17 +138,29 @@ def main(): result = llmdbench_execute_cmd( actual_cmd=helm_repo_update_cmd, dry_run=int(ev.get("control_dry_run", 0)), - verbose=int(ev.get("control_verbose", 0)) + verbose=int(ev.get("control_verbose", 0)), ) if result != 0: - announce(f"❌ Failed setting up helm repositories with \"{helm_repo_update_cmd}\" (exit code: {result})") + announce(f'❌ Failed setting up helm repositories with "{helm_repo_update_cmd}" (exit code: {result})') exit(result) # Auto-detect chart version if needed - result = auto_detect_version(ev, ev['vllm_modelservice_chart_name'], "vllm_modelservice_chart_version", "vllm_modelservice_helm_repository") + result = auto_detect_version( + ev, + ev["vllm_modelservice_chart_name"], + "vllm_modelservice_chart_version", + "vllm_modelservice_helm_repository", + dry_run=ev["control_dry_run"], + ) if 0 != result: exit(result) - result = auto_detect_version(ev, ev['vllm_infra_chart_name'], "vllm_infra_chart_version", "vllm_infra_helm_repository") + result = auto_detect_version( + ev, + ev["vllm_infra_chart_name"], + "vllm_infra_chart_version", + "vllm_infra_helm_repository", + dry_run=ev["control_dry_run"], + ) if 0 != result: exit(result) @@ -152,9 +169,14 @@ def main(): helm_base_dir.mkdir(parents=True, exist_ok=True) # Create infra values file - infra_value_file = Path(helm_base_dir / "infra.yaml" ) - with open(infra_value_file, 'w') as f: - f.write(gateway_values(ev['vllm_modelservice_gateway_class_name'], f"gaie-inference-scheduling-epp.{ev['vllm_common_namespace']}.svc.cluster.local")) + infra_value_file = Path(helm_base_dir / "infra.yaml") + with open(infra_value_file, "w") as f: + f.write( + gateway_values( + ev["vllm_modelservice_gateway_class_name"], + f"gaie-inference-scheduling-epp.{ev['vllm_common_namespace']}.svc.cluster.local", + ) + ) # Process each model model_number = 0 @@ -174,16 +196,16 @@ def main(): # Generate helmfile YAML content helmfile_content = f"""repositories: - - name: {ev['vllm_modelservice_helm_repository']} - url: {ev['vllm_modelservice_helm_repository_url']} - - name: {ev['vllm_infra_helm_repository']} - url: {ev['vllm_infra_helm_repository_url']} + - name: {ev["vllm_modelservice_helm_repository"]} + url: {ev["vllm_modelservice_helm_repository_url"]} + - name: {ev["vllm_infra_helm_repository"]} + url: {ev["vllm_infra_helm_repository_url"]} releases: - - name: infra-{ev['vllm_modelservice_release']} - namespace: {ev['vllm_common_namespace']} - chart: {ev['vllm_infra_helm_repository']}/{ev['vllm_infra_chart_name']} - version: {ev['vllm_infra_chart_version']} + - name: infra-{ev["vllm_modelservice_release"]} + namespace: {ev["vllm_common_namespace"]} + chart: {ev["vllm_infra_helm_repository"]}/{ev["vllm_infra_chart_name"]} + version: {ev["vllm_infra_chart_version"]} installed: true labels: type: infrastructure @@ -192,25 +214,25 @@ def main(): - infra.yaml - name: {model_id_label}-ms - namespace: {ev['vllm_common_namespace']} - chart: {ev['vllm_modelservice_helm_repository']}/{ev['vllm_modelservice_chart_name']} - version: {ev['vllm_modelservice_chart_version']} + namespace: {ev["vllm_common_namespace"]} + chart: {ev["vllm_modelservice_helm_repository"]}/{ev["vllm_modelservice_chart_name"]} + version: {ev["vllm_modelservice_chart_version"]} installed: true needs: - - {ev['vllm_common_namespace']}/infra-{ev['vllm_modelservice_release']} - - {ev['vllm_common_namespace']}/{model_id_label}-gaie + - {ev["vllm_common_namespace"]}/infra-{ev["vllm_modelservice_release"]} + - {ev["vllm_common_namespace"]}/{model_id_label}-gaie values: - {model_num}/ms-values.yaml labels: kind: inference-stack - name: {model_id_label}-gaie - namespace: {ev['vllm_common_namespace']} - chart: {ev['vllm_gaie_chart_name']} - version: {ev['vllm_gaie_chart_version']} + namespace: {ev["vllm_common_namespace"]} + chart: {ev["vllm_gaie_chart_name"]} + version: {ev["vllm_gaie_chart_version"]} installed: true needs: - - {ev['vllm_common_namespace']}/infra-{ev['vllm_modelservice_release']} + - {ev["vllm_common_namespace"]}/infra-{ev["vllm_modelservice_release"]} values: - {model_num}/gaie-values.yaml labels: @@ -219,7 +241,7 @@ def main(): # Write helmfile configuration helmfile_path = helm_base_dir / f"helmfile-{model_num}.yaml" - with open(helmfile_path, 'w') as f: + with open(helmfile_path, "w") as f: f.write(helmfile_content) announce(f"📝 Created helmfile configuration for model {model} ({model_num})") @@ -230,22 +252,24 @@ def main(): model_number += 1 - announce(f"🚀 Installing helm chart \"infra-{ev['vllm_modelservice_release']}\" via helmfile...") - install_cmd=f"helmfile --namespace {ev['vllm_common_namespace']} --kubeconfig {ev['control_work_dir']}/environment/context.ctx --selector name=infra-{ev['vllm_modelservice_release']} apply -f {ev['control_work_dir']}/setup/helm/{ev['vllm_modelservice_release']}/helmfile-00.yaml --skip-diff-on-install" + announce(f'🚀 Installing helm chart "infra-{ev["vllm_modelservice_release"]}" via helmfile...') + install_cmd = f"helmfile --namespace {ev['vllm_common_namespace']} --kubeconfig {ev['control_work_dir']}/environment/context.ctx --selector name=infra-{ev['vllm_modelservice_release']} apply -f {ev['control_work_dir']}/setup/helm/{ev['vllm_modelservice_release']}/helmfile-00.yaml --skip-diff-on-install" result = llmdbench_execute_cmd( actual_cmd=install_cmd, dry_run=int(ev.get("control_dry_run", 0)), - verbose=int(ev.get("control_verbose", 0)) + verbose=int(ev.get("control_verbose", 0)), ) if result != 0: - announce(f"❌ Failed Failed installing chart \"infra-{ev['vllm_modelservice_release']}\" (exit code: {result})") + announce( + f'❌ Failed Failed installing chart "infra-{ev["vllm_modelservice_release"]}" (exit code: {result})' + ) exit(result) - announce(f"✅ chart \"infra-{ev['vllm_modelservice_release']}\" deployed successfully") + announce(f'✅ chart "infra-{ev["vllm_modelservice_release"]}" deployed successfully') announce("✅ Completed gaie deployment") else: deploy_methods = ev.get("deploy_methods", "") - announce(f"⏭️ Environment types are \"{deploy_methods}\". Skipping this step.") + announce(f'⏭️ Environment types are "{deploy_methods}". Skipping this step.') return 0 diff --git a/setup/steps/07_deploy_setup.sh b/llm_d_benchmark/setup/steps/07_deploy_setup.sh similarity index 100% rename from setup/steps/07_deploy_setup.sh rename to llm_d_benchmark/setup/steps/07_deploy_setup.sh diff --git a/setup/steps/08_deploy_gaie.py b/llm_d_benchmark/setup/steps/08_deploy_gaie.py similarity index 75% rename from setup/steps/08_deploy_gaie.py rename to llm_d_benchmark/setup/steps/08_deploy_gaie.py index 8742a2c9..4c58a36a 100755 --- a/setup/steps/08_deploy_gaie.py +++ b/llm_d_benchmark/setup/steps/08_deploy_gaie.py @@ -4,20 +4,14 @@ import sys from pathlib import Path -# Add project root to Python path -current_file = Path(__file__).resolve() -project_root = current_file.parents[1] -sys.path.insert(0, str(project_root)) - -# Import from functions.py -from functions import ( - environment_variable_to_dict, +from llm_d_benchmark.setup.utils.functions import ( + add_config, announce, - llmdbench_execute_cmd, - model_attribute, + environment_variable_to_dict, extract_environment, get_image, - add_config, + llmdbench_execute_cmd, + model_attribute, ) @@ -43,9 +37,7 @@ def main(): model_list = ev.get("deploy_model_list", "").replace(",", " ").split() for model in model_list: - announce( - f"🔄 Processing model {model_number + 1}/{len(model_list)}: {model}" - ) + announce(f"🔄 Processing model {model_number + 1}/{len(model_list)}: {model}") # Get model attribute model_id_label = model_attribute(model, "modelid_label") @@ -55,13 +47,7 @@ def main(): model_num = f"{model_number:02d}" # Create directory structure - helm_dir = ( - Path(ev["control_work_dir"]) - / "setup" - / "helm" - / ev["vllm_modelservice_release"] - / model_num - ) + helm_dir = Path(ev["control_work_dir"]) / "setup" / "helm" / ev["vllm_modelservice_release"] / model_num helm_dir.mkdir(parents=True, exist_ok=True) # A plugin config file is identified by ev["vllm_modelservice_gaie_plugins_configfile"] @@ -75,9 +61,7 @@ def main(): # look for benchmark provided ev["vllm_modelservice_gaie_plugins_configfile"] # expose it as ev["vllm_modelservice_gaie_presets_full_path"] if ev["vllm_modelservice_gaie_plugins_configfile"].startswith("/"): - ev["vllm_modelservice_gaie_presets_full_path"] = ev[ - "vllm_modelservice_gaie_plugins_configfile" - ] + ev["vllm_modelservice_gaie_presets_full_path"] = ev["vllm_modelservice_gaie_plugins_configfile"] else: configfile = ev["vllm_modelservice_gaie_plugins_configfile"] if not configfile.endswith(".yaml"): @@ -93,21 +77,15 @@ def main(): with open(ev["vllm_modelservice_gaie_presets_full_path"], "r") as f: presets_content = f.read() if "vllm_modelservice_gaie_custom_plugins" not in ev: - plugin_config = ( - f'{ev["vllm_modelservice_gaie_plugins_configfile"]}: |\n' - + "\n".join( - f" {line}" for line in presets_content.splitlines() - ) + plugin_config = f"{ev['vllm_modelservice_gaie_plugins_configfile']}: |\n" + "\n".join( + f" {line}" for line in presets_content.splitlines() ) except FileNotFoundError: # The (benchmark) plugin config file does not exist # - use ev["vllm_modelservice_gaie_custom_plugins"] if it is defined if "vllm_modelservice_gaie_custom_plugins" in ev: plugin_config = "\n".join( - f"{line}" - for line in ev[ - "vllm_modelservice_gaie_custom_plugins" - ].splitlines() + f"{line}" for line in ev["vllm_modelservice_gaie_custom_plugins"].splitlines() ) # Get image tag @@ -132,8 +110,8 @@ def main(): gaie_values_content = f"""inferenceExtension: replicas: 1 image: - name: {ev['llmd_inferencescheduler_image_name']} - hub: {ev['llmd_inferencescheduler_image_registry']}/{ev['llmd_inferencescheduler_image_repo']} + name: {ev["llmd_inferencescheduler_image_name"]} + hub: {ev["llmd_inferencescheduler_image_registry"]}/{ev["llmd_inferencescheduler_image_repo"]} tag: {image_tag} pullPolicy: Always extProcPort: 9002 @@ -147,10 +125,10 @@ def main(): targetPort: 5557 protocol: TCP {hf_token_env} - pluginsConfigFile: "{ev['vllm_modelservice_gaie_plugins_configfile']}" + pluginsConfigFile: "{ev["vllm_modelservice_gaie_plugins_configfile"]}" {add_config(plugin_config, 4, "pluginsCustomConfig:")} inferencePool: - targetPortNumber: {ev['vllm_common_inference_port']} + targetPortNumber: {ev["vllm_common_inference_port"]} modelServerType: vllm apiVersion: "inference.networking.x-k8s.io/v1alpha2" modelServers: @@ -158,7 +136,7 @@ def main(): llm-d.ai/inferenceServing: "true" llm-d.ai/model: {model_id_label} provider: - name: {provider(ev['vllm_modelservice_gateway_class_name'])} + name: {provider(ev["vllm_modelservice_gateway_class_name"])} """ # Write GAIE values file gaie_values_file = helm_dir / "gaie-values.yaml" @@ -166,9 +144,7 @@ def main(): f.write(gaie_values_content) # Deploy helm chart via helmfile - announce( - f"🚀 Installing helm chart \"gaie-{ev['vllm_modelservice_release']}\" via helmfile..." - ) + announce(f'🚀 Installing helm chart "gaie-{ev["vllm_modelservice_release"]}" via helmfile...') helmfile_cmd = ( f"helmfile --namespace {ev['vllm_common_namespace']} " f"--kubeconfig {ev['control_work_dir']}/environment/context.ctx " @@ -184,13 +160,11 @@ def main(): ) if result != 0: announce( - f"❌ Failed installing helm chart \"gaie-{ev['vllm_modelservice_release']}\" via helmfile with \"{helmfile_cmd}\" (exit code: {result})" + f'❌ Failed installing helm chart "gaie-{ev["vllm_modelservice_release"]}" via helmfile with "{helmfile_cmd}" (exit code: {result})' ) exit(result) - announce( - f"✅ {ev['vllm_common_namespace']}-{model_id_label}-gaie helm chart deployed successfully" - ) + announce(f"✅ {ev['vllm_common_namespace']}-{model_id_label}-gaie helm chart deployed successfully") # List relevant resources resource_list = "deployment,service,pods,secrets,inferencepools" @@ -198,7 +172,7 @@ def main(): resource_list += ",route" announce( - f"ℹ️ A snapshot of the relevant (model-specific) resources on namespace \"{ev['vllm_common_namespace']}\":" + f'ℹ️ A snapshot of the relevant (model-specific) resources on namespace "{ev["vllm_common_namespace"]}":' ) if int(ev.get("control_dry_run", 0)) == 0: @@ -211,7 +185,7 @@ def main(): ) if result != 0: announce( - f"❌ Failed to get a snapshot of the relevant (model-specific) resources on namespace \"{ev['vllm_common_namespace']}\" with \"{kubectl_cmd}\" (exit code: {result})" + f'❌ Failed to get a snapshot of the relevant (model-specific) resources on namespace "{ev["vllm_common_namespace"]}" with "{kubectl_cmd}" (exit code: {result})' ) exit(result) diff --git a/setup/steps/08_deploy_gaie.sh b/llm_d_benchmark/setup/steps/08_deploy_gaie.sh similarity index 100% rename from setup/steps/08_deploy_gaie.sh rename to llm_d_benchmark/setup/steps/08_deploy_gaie.sh diff --git a/setup/steps/09_deploy_via_modelservice.py b/llm_d_benchmark/setup/steps/09_deploy_via_modelservice.py similarity index 86% rename from setup/steps/09_deploy_via_modelservice.py rename to llm_d_benchmark/setup/steps/09_deploy_via_modelservice.py index d29fd44e..f5c42200 100644 --- a/setup/steps/09_deploy_via_modelservice.py +++ b/llm_d_benchmark/setup/steps/09_deploy_via_modelservice.py @@ -4,20 +4,27 @@ import sys from pathlib import Path -# Add project root to Python path -current_file = Path(__file__).resolve() -project_root = current_file.parents[1] -sys.path.insert(0, str(project_root)) - -# Import from functions.py -from functions import ( - announce, llmdbench_execute_cmd, model_attribute, extract_environment, - check_storage_class, check_affinity, environment_variable_to_dict, - get_image, add_command_line_options, get_accelerator_nr, add_annotations as functions_add_annotations, - add_additional_env_to_yaml as functions_add_additional_env_to_yaml, add_config as functions_add_config +from llm_d_benchmark.setup.utils.functions import ( + add_additional_env_to_yaml as functions_add_additional_env_to_yaml, +) +from llm_d_benchmark.setup.utils.functions import ( + add_annotations as functions_add_annotations, +) +from llm_d_benchmark.setup.utils.functions import ( + add_command_line_options, + announce, + check_affinity, + check_storage_class, + environment_variable_to_dict, + extract_environment, + get_accelerator_nr, + get_image, + llmdbench_execute_cmd, + model_attribute, +) +from llm_d_benchmark.setup.utils.functions import ( + add_config as functions_add_config, ) - - def add_command(model_command: str) -> str: @@ -50,7 +57,7 @@ def conditional_extra_config(extra_config: str, indent: int = 2, label: str = "e # Check if config is empty before processing if not extra_config or extra_config.strip() in ["{}", "[]", "#no____config"]: return "" - + config_result = functions_add_config(extra_config, indent + 2) # Add extra indent for content if config_result.strip(): spaces = " " * indent @@ -66,26 +73,26 @@ def add_config_prep(): # Set defaults for decode extra configs if not os.environ.get("LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_POD_CONFIG"): os.environ["LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_POD_CONFIG"] = "{}" - + if not os.environ.get("LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_CONTAINER_CONFIG"): os.environ["LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_CONTAINER_CONFIG"] = "{}" - + if not os.environ.get("LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_VOLUME_MOUNTS"): os.environ["LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_VOLUME_MOUNTS"] = "[]" - + if not os.environ.get("LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_VOLUMES"): os.environ["LLMDBENCH_VLLM_MODELSERVICE_DECODE_EXTRA_VOLUMES"] = "[]" - + # Set defaults for prefill extra configs if not os.environ.get("LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_POD_CONFIG"): os.environ["LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_POD_CONFIG"] = "{}" - + if not os.environ.get("LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_CONTAINER_CONFIG"): os.environ["LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_CONTAINER_CONFIG"] = "{}" - + if not os.environ.get("LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_VOLUME_MOUNTS"): os.environ["LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_VOLUME_MOUNTS"] = "[]" - + if not os.environ.get("LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_VOLUMES"): os.environ["LLMDBENCH_VLLM_MODELSERVICE_PREFILL_EXTRA_VOLUMES"] = "[]" @@ -93,10 +100,6 @@ def add_config_prep(): # Note: add_command_line_options is now imported from functions.py - - - - def filter_empty_resource(resource_name: str, resource_value: str) -> str: """ Filter out empty resource values, mimicking bash behavior with sed. @@ -104,45 +107,45 @@ def filter_empty_resource(resource_name: str, resource_value: str) -> str: """ if not resource_name or not resource_value: return "" - return f"{resource_name}: \"{resource_value}\"" + return f'{resource_name}: "{resource_value}"' def generate_ms_values_yaml(ev: dict, mount_model_volume: bool, rules_file: Path) -> str: """ Generate the ms-values.yaml content for Helm chart. Exactly matches the bash script structure from lines 60-239. - + Args: ev: Environment variables dictionary mount_model_volume: Whether to mount model volume rules_file: Path to ms-rules.yaml file to be included - + Returns: YAML content as string """ # Get all required environment variables fullname_override = ev.get("deploy_current_model_id_label", "") multinode = ev.get("vllm_modelservice_multinode", "false") - + # Model artifacts section model_uri = ev.get("vllm_modelservice_uri", "") model_size = ev.get("vllm_common_pvc_model_cache_size", "") model_name = ev.get("deploy_current_model", "") - + # Routing section service_port = ev.get("vllm_common_inference_port", "8000") release = ev.get("vllm_modelservice_release", "") route_enabled = ev.get("vllm_modelservice_route", "false") model_id = ev.get("deploy_current_model_id", "") model_id_label = ev.get("deploy_current_model_id_label", "") - + # Image details image_registry = ev.get("llmd_image_registry", "") image_repo = ev.get("llmd_image_repo", "") image_name = ev.get("llmd_image_name", "") image_tag = ev.get("llmd_image_tag", "") main_image = get_image(image_registry, image_repo, image_name, image_tag, 0) - + # Proxy details proxy_image_registry = ev.get("llmd_routingsidecar_image_registry", "") proxy_image_repo = ev.get("llmd_routingsidecar_image_repo", "") @@ -151,12 +154,12 @@ def generate_ms_values_yaml(ev: dict, mount_model_volume: bool, rules_file: Path proxy_image = get_image(proxy_image_registry, proxy_image_repo, proxy_image_name, proxy_image_tag, 0) proxy_connector = ev.get("llmd_routingsidecar_connector", "") proxy_debug_level = ev.get("llmd_routingsidecar_debug_level", "") - + # EPP and routing configuration inference_model_create = ev.get("vllm_modelservice_inference_model", "true") inference_pool_create = ev.get("vllm_modelservice_inference_pool", "true") epp_create = ev.get("vllm_modelservice_epp", "true") - + # Decode configuration decode_replicas = int(ev.get("vllm_modelservice_decode_replicas", "0")) decode_create = "true" if decode_replicas > 0 else "false" @@ -177,7 +180,7 @@ def generate_ms_values_yaml(ev: dict, mount_model_volume: bool, rules_file: Path prefill_extra_args = ev.get("vllm_modelservice_prefill_extra_args", "") prefill_cpu_mem = ev.get("vllm_modelservice_prefill_cpu_mem", "") or ev.get("vllm_common_cpu_mem", "") prefill_cpu_nr = ev.get("vllm_modelservice_prefill_cpu_nr", "") or ev.get("vllm_common_cpu_nr", "") - + # Resource configuration - handle auto accelerator resource accelerator_resource = os.environ.get("LLMDBENCH_VLLM_COMMON_ACCELERATOR_RESOURCE", "") if accelerator_resource == "auto": @@ -185,58 +188,54 @@ def generate_ms_values_yaml(ev: dict, mount_model_volume: bool, rules_file: Path decode_accelerator_nr = ev.get("vllm_modelservice_decode_accelerator_nr", "auto") prefill_accelerator_nr = ev.get("vllm_modelservice_prefill_accelerator_nr", "auto") - + # Calculate actual accelerator numbers decode_accelerator_count = get_accelerator_nr( - decode_accelerator_nr, - decode_tensor_parallelism, - decode_data_parallelism + decode_accelerator_nr, decode_tensor_parallelism, decode_data_parallelism ) prefill_accelerator_count = get_accelerator_nr( - prefill_accelerator_nr, - prefill_tensor_parallelism, - prefill_data_parallelism + prefill_accelerator_nr, prefill_tensor_parallelism, prefill_data_parallelism ) - + ephemeral_storage_resource = ev.get("vllm_common_ephemeral_storage_resource", "") decode_ephemeral_storage_nr = ev.get("vllm_modelservice_decode_ephemeral_storage_nr", "") prefill_ephemeral_storage_nr = ev.get("vllm_modelservice_prefill_ephemeral_storage_nr", "") - + decode_network_resource = ev.get("vllm_modelservice_decode_network_resource", "") decode_network_nr = ev.get("vllm_modelservice_decode_network_nr", "") prefill_network_resource = ev.get("vllm_modelservice_prefill_network_resource", "") prefill_network_nr = ev.get("vllm_modelservice_prefill_network_nr", "") - + # Affinity configuration - get fresh value after check_affinity() call affinity = os.environ.get("LLMDBENCH_VLLM_COMMON_AFFINITY", "") if ":" in affinity: affinity_key, affinity_value = affinity.split(":", 1) else: affinity_key, affinity_value = "", "" - + # Probe configuration initial_delay_probe = ev.get("vllm_common_initial_delay_probe", "30") common_inference_port = ev.get("vllm_common_inference_port", "8000") - + # Extra configurations decode_extra_pod_config = ev.get("vllm_modelservice_decode_extra_pod_config", "") decode_extra_container_config = ev.get("vllm_modelservice_decode_extra_container_config", "") decode_extra_volume_mounts = ev.get("vllm_modelservice_decode_extra_volume_mounts", "") decode_extra_volumes = ev.get("vllm_modelservice_decode_extra_volumes", "") - + prefill_extra_pod_config = ev.get("vllm_modelservice_prefill_extra_pod_config", "") prefill_extra_container_config = ev.get("vllm_modelservice_prefill_extra_container_config", "") prefill_extra_volume_mounts = ev.get("vllm_modelservice_prefill_extra_volume_mounts", "") prefill_extra_volumes = ev.get("vllm_modelservice_prefill_extra_volumes", "") - + # Environment variables to YAML envvars_to_yaml = ev.get("vllm_common_envvars_to_yaml", "") - + # Read the rules file content rules_content = "" if rules_file.exists(): rules_content = rules_file.read_text().rstrip() - + # Build decode resources section cleanly decode_limits_resources = [] decode_requests_resources = [] @@ -245,17 +244,17 @@ def generate_ms_values_yaml(ev: dict, mount_model_volume: bool, rules_file: Path decode_limits_resources.append(f" memory: {decode_cpu_mem}") decode_requests_resources.append(f" memory: {decode_cpu_mem}") if decode_cpu_nr: - decode_limits_resources.append(f" cpu: \"{decode_cpu_nr}\"") - decode_requests_resources.append(f" cpu: \"{decode_cpu_nr}\"") + decode_limits_resources.append(f' cpu: "{decode_cpu_nr}"') + decode_requests_resources.append(f' cpu: "{decode_cpu_nr}"') if ephemeral_storage_resource and decode_ephemeral_storage_nr: - decode_limits_resources.append(f" {ephemeral_storage_resource}: \"{decode_ephemeral_storage_nr}\"") - decode_requests_resources.append(f" {ephemeral_storage_resource}: \"{decode_ephemeral_storage_nr}\"") + decode_limits_resources.append(f' {ephemeral_storage_resource}: "{decode_ephemeral_storage_nr}"') + decode_requests_resources.append(f' {ephemeral_storage_resource}: "{decode_ephemeral_storage_nr}"') if accelerator_resource and decode_accelerator_count and str(decode_accelerator_count) != "0": - decode_limits_resources.append(f" {accelerator_resource}: \"{decode_accelerator_count}\"") - decode_requests_resources.append(f" {accelerator_resource}: \"{decode_accelerator_count}\"") + decode_limits_resources.append(f' {accelerator_resource}: "{decode_accelerator_count}"') + decode_requests_resources.append(f' {accelerator_resource}: "{decode_accelerator_count}"') if decode_network_resource and decode_network_nr: - decode_limits_resources.append(f" {decode_network_resource}: \"{decode_network_nr}\"") - decode_requests_resources.append(f" {decode_network_resource}: \"{decode_network_nr}\"") + decode_limits_resources.append(f' {decode_network_resource}: "{decode_network_nr}"') + decode_requests_resources.append(f' {decode_network_resource}: "{decode_network_nr}"') # Build prefill resources section cleanly prefill_limits_resources = [] @@ -265,17 +264,17 @@ def generate_ms_values_yaml(ev: dict, mount_model_volume: bool, rules_file: Path prefill_limits_resources.append(f" memory: {prefill_cpu_mem}") prefill_requests_resources.append(f" memory: {prefill_cpu_mem}") if prefill_cpu_nr: - prefill_limits_resources.append(f" cpu: \"{prefill_cpu_nr}\"") - prefill_requests_resources.append(f" cpu: \"{prefill_cpu_nr}\"") + prefill_limits_resources.append(f' cpu: "{prefill_cpu_nr}"') + prefill_requests_resources.append(f' cpu: "{prefill_cpu_nr}"') if ephemeral_storage_resource and prefill_ephemeral_storage_nr: - prefill_limits_resources.append(f" {ephemeral_storage_resource}: \"{prefill_ephemeral_storage_nr}\"") - prefill_requests_resources.append(f" {ephemeral_storage_resource}: \"{prefill_ephemeral_storage_nr}\"") + prefill_limits_resources.append(f' {ephemeral_storage_resource}: "{prefill_ephemeral_storage_nr}"') + prefill_requests_resources.append(f' {ephemeral_storage_resource}: "{prefill_ephemeral_storage_nr}"') if accelerator_resource and prefill_accelerator_count and str(prefill_accelerator_count) != "0": - prefill_limits_resources.append(f" {accelerator_resource}: \"{prefill_accelerator_count}\"") - prefill_requests_resources.append(f" {accelerator_resource}: \"{prefill_accelerator_count}\"") + prefill_limits_resources.append(f' {accelerator_resource}: "{prefill_accelerator_count}"') + prefill_requests_resources.append(f' {accelerator_resource}: "{prefill_accelerator_count}"') if prefill_network_resource and prefill_network_nr: - prefill_limits_resources.append(f" {prefill_network_resource}: \"{prefill_network_nr}\"") - prefill_requests_resources.append(f" {prefill_network_resource}: \"{prefill_network_nr}\"") + prefill_limits_resources.append(f' {prefill_network_resource}: "{prefill_network_nr}"') + prefill_requests_resources.append(f' {prefill_network_resource}: "{prefill_network_nr}"') # Join resources with newlines decode_limits_str = "\n".join(decode_limits_resources) if decode_limits_resources else " {}" @@ -460,10 +459,8 @@ def generate_ms_values_yaml(ev: dict, mount_model_volume: bool, rules_file: Path {conditional_volume_config(prefill_extra_volume_mounts, "volumeMounts", 4)} {conditional_volume_config(prefill_extra_volumes, "volumes", 2)} """ - - return yaml_content - + return yaml_content def wait_for_pods_creation(ev: dict, component: str, dry_run: bool, verbose: bool) -> int: @@ -473,7 +470,7 @@ def wait_for_pods_creation(ev: dict, component: str, dry_run: bool, verbose: boo namespace = ev.get("vllm_common_namespace", "") model_id_label = ev.get("deploy_current_model_id_label", "") wait_timeout = int(ev.get("control_wait_timeout", "600")) // 2 - + announce(f"⏳ waiting for ({component}) pods serving model to be created...") wait_cmd = f"kubectl --namespace {namespace} wait --timeout={wait_timeout}s --for=create pod -l llm-d.ai/model={model_id_label},llm-d.ai/role={component}" result = llmdbench_execute_cmd(wait_cmd, dry_run, verbose, 1, 2) @@ -489,8 +486,8 @@ def wait_for_pods_running(ev: dict, component: str, dry_run: bool, verbose: bool namespace = ev.get("vllm_common_namespace", "") model_id_label = ev.get("deploy_current_model_id_label", "") wait_timeout = ev.get("control_wait_timeout", "600") - - announce(f"⏳ Waiting for ({component}) pods serving model to be in \"Running\" state (timeout={wait_timeout}s)...") + + announce(f'⏳ Waiting for ({component}) pods serving model to be in "Running" state (timeout={wait_timeout}s)...') wait_cmd = f"kubectl --namespace {namespace} wait --timeout={wait_timeout}s --for=jsonpath='{{.status.phase}}'=Running pod -l llm-d.ai/model={model_id_label},llm-d.ai/role={component}" result = llmdbench_execute_cmd(wait_cmd, dry_run, verbose) if result == 0: @@ -505,7 +502,7 @@ def wait_for_pods_ready(ev: dict, component: str, dry_run: bool, verbose: bool) namespace = ev.get("vllm_common_namespace", "") model_id_label = ev.get("deploy_current_model_id_label", "") wait_timeout = ev.get("control_wait_timeout", "600") - + announce(f"⏳ Waiting for ({component}) pods serving model to be Ready (timeout={wait_timeout}s)...") wait_cmd = f"kubectl --namespace {namespace} wait --timeout={wait_timeout}s --for=condition=Ready=True pod -l llm-d.ai/model={model_id_label},llm-d.ai/role={component}" result = llmdbench_execute_cmd(wait_cmd, dry_run, verbose) @@ -521,12 +518,12 @@ def collect_logs(ev: dict, component: str, dry_run: bool, verbose: bool) -> int: namespace = ev.get("vllm_common_namespace", "") model_id_label = ev.get("deploy_current_model_id_label", "") work_dir = ev.get("control_work_dir", "") - + # Create logs directory logs_dir = Path(work_dir) / "setup" / "logs" if not dry_run: logs_dir.mkdir(parents=True, exist_ok=True) - + # Collect logs log_file = logs_dir / f"llm-d-{component}.log" log_cmd = f"kubectl --namespace {namespace} logs --tail=-1 --prefix=true -l llm-d.ai/model={model_id_label},llm-d.ai/role={component} > {log_file}" @@ -535,93 +532,95 @@ def collect_logs(ev: dict, component: str, dry_run: bool, verbose: bool) -> int: def main(): """Main function for step 09 - Deploy via modelservice""" - + # Set current step for functions.py compatibility os.environ["LLMDBENCH_CURRENT_STEP"] = "09" - + # Parse environment variables into ev dictionary ev = {} environment_variable_to_dict(ev) - + # Check if modelservice environment is active if not ev.get("control_environment_type_modelservice_active", False): deploy_methods = ev.get("deploy_methods", "") - announce(f"⏭️ Environment types are \"{deploy_methods}\". Skipping this step.") + announce(f'⏭️ Environment types are "{deploy_methods}". Skipping this step.') return 0 - + # Check storage class if not check_storage_class(): announce("❌ Failed to check storage class") return 1 - + # Check affinity if not check_affinity(): announce("❌ Failed to check affinity") return 1 - + # Extract environment for debugging extract_environment() - + # Extract flags dry_run = ev.get("control_dry_run", "false") == "true" verbose = ev.get("control_verbose", "false") == "true" - + # Deploy models model_list = ev.get("deploy_model_list", "").replace(",", " ").split() model_number = 0 - + for model in model_list: if not model.strip(): continue - + # Set current model environment variables current_model = model_attribute(model, "model") current_model_id = model_attribute(model, "modelid") current_model_id_label = model_attribute(model, "modelid_label") - + os.environ["LLMDBENCH_DEPLOY_CURRENT_MODEL"] = current_model os.environ["LLMDBENCH_DEPLOY_CURRENT_MODEL_ID"] = current_model_id os.environ["LLMDBENCH_DEPLOY_CURRENT_MODEL_ID_LABEL"] = current_model_id_label - + # Update ev dictionary with new model info ev["deploy_current_model"] = current_model ev["deploy_current_model_id"] = current_model_id ev["deploy_current_model_id_label"] = current_model_id_label - + # Determine model mounting mount_model_volume = False - if (ev.get("vllm_modelservice_uri_protocol") == "pvc" or - ev.get("control_environment_type_standalone_active", "0") == "1"): + if ( + ev.get("vllm_modelservice_uri_protocol") == "pvc" + or ev.get("control_environment_type_standalone_active", "0") == "1" + ): pvc_name = ev.get("vllm_common_pvc_name", "") os.environ["LLMDBENCH_VLLM_MODELSERVICE_URI"] = f"pvc://{pvc_name}/models/{current_model}" mount_model_volume = True else: os.environ["LLMDBENCH_VLLM_MODELSERVICE_URI"] = f"hf://{current_model}" mount_model_volume = True - + # Check for mount override mount_override = ev.get("vllm_modelservice_mount_model_volume_override") if mount_override: mount_model_volume = mount_override == "true" - + # Update ev with URI ev["vllm_modelservice_uri"] = os.environ["LLMDBENCH_VLLM_MODELSERVICE_URI"] - + # Create directory structure (Do not use "llmdbench_execute_cmd" for these commands) model_num = f"{model_number:02d}" release = ev.get("vllm_modelservice_release", "") work_dir = Path(ev.get("control_work_dir", "")) helm_dir = work_dir / "setup" / "helm" / release / model_num - + # Always create directory structure (even in dry-run) helm_dir.mkdir(parents=True, exist_ok=True) - + # Set proper defaults for empty configurations add_config_prep() - + # Generate ms-rules.yaml content rules_file = helm_dir / "ms-rules.yaml" - + # For single model, write routing rule; otherwise empty if len([m for m in model_list if m.strip()]) == 1: rules_content = f"""- backendRefs: @@ -637,100 +636,103 @@ def main(): rules_file.write_text(rules_content) else: rules_file.write_text("") - - + # Generate ms-values.yaml values_content = generate_ms_values_yaml(ev, mount_model_volume, rules_file) values_file = helm_dir / "ms-values.yaml" values_file.write_text(values_content) - + # Clean up temp file rules_file.unlink() - + # Deploy via helmfile - announce(f"🚀 Installing helm chart \"ms-{release}\" via helmfile...") + announce(f'🚀 Installing helm chart "ms-{release}" via helmfile...') context_path = work_dir / "environment" / "context.ctx" namespace = ev.get("vllm_common_namespace", "") - - helmfile_cmd = (f"helmfile --namespace {namespace} " - f"--kubeconfig {context_path} " - f"--selector name={current_model_id_label}-ms " - f"apply -f {work_dir}/setup/helm/{release}/helmfile-{model_num}.yaml --skip-diff-on-install --skip-schema-validation") - + + helmfile_cmd = ( + f"helmfile --namespace {namespace} " + f"--kubeconfig {context_path} " + f"--selector name={current_model_id_label}-ms " + f"apply -f {work_dir}/setup/helm/{release}/helmfile-{model_num}.yaml --skip-diff-on-install --skip-schema-validation" + ) + result = llmdbench_execute_cmd(helmfile_cmd, dry_run, verbose) if result != 0: announce(f"❌ Failed to deploy helm chart for model {current_model}") return result - + announce(f"✅ {namespace}-{current_model_id_label}-ms helm chart deployed successfully") - + # Wait for pods and collect logs exactly like bash script decode_replicas = int(ev.get("vllm_modelservice_decode_replicas", "0")) prefill_replicas = int(ev.get("vllm_modelservice_prefill_replicas", "0")) - + # Wait for decode pods creation if decode_replicas > 0: result = wait_for_pods_creation(ev, "decode", dry_run, verbose) if result != 0: return result - + # Wait for prefill pods creation if prefill_replicas > 0: result = wait_for_pods_creation(ev, "prefill", dry_run, verbose) if result != 0: return result - + # Wait for decode pods to be running if decode_replicas > 0: result = wait_for_pods_running(ev, "decode", dry_run, verbose) if result != 0: return result - + # Wait for prefill pods to be running if prefill_replicas > 0: result = wait_for_pods_running(ev, "prefill", dry_run, verbose) if result != 0: return result - + # Wait for decode pods to be ready if decode_replicas > 0: result = wait_for_pods_ready(ev, "decode", dry_run, verbose) if result != 0: return result - + # Collect decode logs collect_logs(ev, "decode", dry_run, verbose) - + # Wait for prefill pods to be ready if prefill_replicas > 0: result = wait_for_pods_ready(ev, "prefill", dry_run, verbose) if result != 0: return result - + # Collect prefill logs collect_logs(ev, "prefill", dry_run, verbose) - + # Handle OpenShift route creation - if (ev.get("vllm_modelservice_route") == "true" and - ev.get("control_deploy_is_openshift", "0") == "1"): - + if ev.get("vllm_modelservice_route") == "true" and ev.get("control_deploy_is_openshift", "0") == "1": # Check if route exists route_name = f"{release}-inference-gateway-route" - check_route_cmd = f"kubectl --namespace {namespace} get route -o name --ignore-not-found | grep -E \"/{route_name}$\"" - + check_route_cmd = ( + f'kubectl --namespace {namespace} get route -o name --ignore-not-found | grep -E "/{route_name}$"' + ) + result = llmdbench_execute_cmd(check_route_cmd, dry_run, verbose) if result != 0: # Route doesn't exist announce(f"📜 Exposing pods serving model {model} as service...") inference_port = ev.get("vllm_common_inference_port", "8000") - expose_cmd = (f"kubectl --namespace {namespace} expose service/infra-{release}-inference-gateway " - f"--target-port={inference_port} --name={route_name}") - + expose_cmd = ( + f"kubectl --namespace {namespace} expose service/infra-{release}-inference-gateway " + f"--target-port={inference_port} --name={route_name}" + ) + result = llmdbench_execute_cmd(expose_cmd, dry_run, verbose) if result == 0: announce(f"✅ Service for pods service model {model} created") - - announce(f"✅ Model \"{model}\" and associated service deployed.") - + + announce(f'✅ Model "{model}" and associated service deployed.') + # Clean up model environment variables if "LLMDBENCH_DEPLOY_CURRENT_MODEL" in os.environ: del os.environ["LLMDBENCH_DEPLOY_CURRENT_MODEL"] @@ -738,12 +740,12 @@ def main(): del os.environ["LLMDBENCH_DEPLOY_CURRENT_MODEL_ID"] if "LLMDBENCH_DEPLOY_CURRENT_MODEL_ID_LABEL" in os.environ: del os.environ["LLMDBENCH_DEPLOY_CURRENT_MODEL_ID_LABEL"] - + model_number += 1 - + announce("✅ modelservice completed model deployment") return 0 if __name__ == "__main__": - sys.exit(main()) \ No newline at end of file + sys.exit(main()) diff --git a/setup/steps/09_deploy_via_modelservice.sh b/llm_d_benchmark/setup/steps/09_deploy_via_modelservice.sh similarity index 100% rename from setup/steps/09_deploy_via_modelservice.sh rename to llm_d_benchmark/setup/steps/09_deploy_via_modelservice.sh diff --git a/setup/steps/10_smoketest.sh b/llm_d_benchmark/setup/steps/10_smoketest.sh similarity index 100% rename from setup/steps/10_smoketest.sh rename to llm_d_benchmark/setup/steps/10_smoketest.sh diff --git a/config_explorer/src/config_explorer/__init__.py b/llm_d_benchmark/setup/steps/__init__.py similarity index 100% rename from config_explorer/src/config_explorer/__init__.py rename to llm_d_benchmark/setup/steps/__init__.py diff --git a/setup/teardown.sh b/llm_d_benchmark/setup/teardown.sh similarity index 98% rename from setup/teardown.sh rename to llm_d_benchmark/setup/teardown.sh index ace58388..e6cdd230 100755 --- a/setup/teardown.sh +++ b/llm_d_benchmark/setup/teardown.sh @@ -119,7 +119,9 @@ export LLMDBENCH_CONTROL_CLI_OPTS_PROCESSED=1 source ${LLMDBENCH_CONTROL_DIR}/env.sh extract_environment -sleep 5 +if [[ $LLMDBENCH_CONTROL_DRY_RUN -eq 0 ]]; then + sleep 5 +fi for resource in ${LLMDBENCH_CONTROL_RESOURCE_LIST//,/ }; do has_resource=$($LLMDBENCH_CONTROL_KCMD get ${resource} --no-headers -o name 2>&1 | grep error || true) @@ -231,7 +233,9 @@ fi if [[ $LLMDBENCH_CONTROL_DEEP_CLEANING -eq 1 ]]; then # Optional: delete cloned repos if they exist announce "🧼 Cleaning up local Git clones..." - sleep 10 + if [[ $LLMDBENCH_CONTROL_DRY_RUN -eq 0 ]]; then + sleep 10 + fi llmdbench_execute_cmd "rm -rf ${LLMDBENCH_HARNESS_DIR}/fmperf" ${LLMDBENCH_CONTROL_DRY_RUN} ${LLMDBENCH_CONTROL_VERBOSE} fi diff --git a/llm_d_benchmark/setup/utils/__init__.py b/llm_d_benchmark/setup/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/setup/functions.py b/llm_d_benchmark/setup/utils/functions.py similarity index 83% rename from setup/functions.py rename to llm_d_benchmark/setup/utils/functions.py index 5b42d1b2..a909acff 100644 --- a/setup/functions.py +++ b/llm_d_benchmark/setup/utils/functions.py @@ -1,65 +1,56 @@ +import asyncio +import hashlib +import logging +import os import re -from datetime import datetime -from typing import Union +import subprocess import sys -import os import time +from datetime import datetime from pathlib import Path -import subprocess -import requests -import inspect -import pykube -import hashlib -from pykube.exceptions import PyKubeError +import pykube +import requests import yaml - -import kubernetes -from kubernetes import client as k8s_client, config as k8s_config - +from kubernetes import client as k8s_client +from kubernetes import config as k8s_config from kubernetes_asyncio import client as k8s_async_client from kubernetes_asyncio import config as k8s_async_config from kubernetes_asyncio import watch as k8s_async_watch +from pykube.exceptions import PyKubeError -import asyncio -import logging -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(levelname)s - %(message)s' -) +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s") logger = logging.getLogger(__name__) -def announce(message: str, logfile : str = None): - work_dir = os.getenv("LLMDBENCH_CONTROL_WORK_DIR", '.') - log_dir = os.path.join(work_dir, 'logs') +def announce(message: str, logfile: str | None = None): + work_dir = os.getenv("LLMDBENCH_CONTROL_WORK_DIR", ".") + log_dir = os.path.join(work_dir, "logs") # ensure logs dir exists os.makedirs(log_dir, exist_ok=True) - if not logfile: - cur_step = os.getenv("CURRENT_STEP_NAME", 'step') - logfile = cur_step + '.log' + cur_step = os.getenv("CURRENT_STEP_NAME", "step") + logfile = cur_step + ".log" logpath = os.path.join(log_dir, logfile) logger.info(message) try: - timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S') + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") log_line = f"{timestamp} : {message}" - with open(logpath, 'a', encoding='utf-8') as f: - f.write(log_line + '\n') + with open(logpath, "a", encoding="utf-8") as f: + f.write(log_line + "\n") except IOError as e: logger.error(f"Could not write to log file '{logpath}'. Reason: {e}") except Exception as e: logger.error(f"An unexpected error occurred with logfile '{logpath}'. Reason: {e}") - -def kube_connect(config_path : str = '~/.kube/config'): +def kube_connect(config_path: str = "~/.kube/config"): api = None try: api = pykube.HTTPClient(pykube.KubeConfig.from_file(os.path.expanduser(config_path))) @@ -69,11 +60,13 @@ def kube_connect(config_path : str = '~/.kube/config'): return api + class SecurityContextConstraints(pykube.objects.APIObject): version = "security.openshift.io/v1" endpoint = "securitycontextconstraints" kind = "SecurityContextConstraints" + def is_openshift(api: pykube.HTTPClient) -> bool: try: # the priviledged scc is a standard built in component for oc @@ -91,13 +84,18 @@ def is_openshift(api: pykube.HTTPClient) -> bool: return False # for other errors like 403, we might be on OpenShift but lack permissions # if we cant query sccs we cant modify them either - announce(f'Could not query SCCs due to an API error (perhaps permissions?): {e}. Assuming not OpenShift for SCC operations') + announce( + f"Could not query SCCs due to an API error (perhaps permissions?): {e}. Assuming not OpenShift for SCC operations" + ) return False except Exception as e: # other potential non pykube errors - announce(f'An unexpected error occurred while checking for OpenShift: {e}. Assuming not OpenShift for SCC operations') + announce( + f"An unexpected error occurred while checking for OpenShift: {e}. Assuming not OpenShift for SCC operations" + ) return False + def llmdbench_execute_cmd( actual_cmd: str, dry_run: bool = True, @@ -105,7 +103,7 @@ def llmdbench_execute_cmd( silent: bool = True, attempts: int = 1, fatal: bool = False, - delay: int = 10 + delay: int = 10, ) -> int: work_dir_str = os.getenv("LLMDBENCH_CONTROL_WORK_DIR", ".") log_dir = Path(work_dir_str) / "setup" / "commands" @@ -115,18 +113,18 @@ def llmdbench_execute_cmd( command_tstamp = int(time.time() * 1_000_000_000) if dry_run: - msg = f"---> would have executed the command \"{actual_cmd}\"" + msg = f'---> would have executed the command "{actual_cmd}"' announce(msg) try: - (log_dir / f"{command_tstamp}_command.log").write_text(msg + '\n') + (log_dir / f"{command_tstamp}_command.log").write_text(msg + "\n") except IOError as e: announce(f"Error writing to dry run log: {e}") return 0 if verbose: - msg = f"---> will execute the command \"{actual_cmd}\"" + msg = f'---> will execute the command "{actual_cmd}"' try: - (log_dir / f"{command_tstamp}_command.log").write_text(msg + '\n') + (log_dir / f"{command_tstamp}_command.log").write_text(msg + "\n") except IOError as e: announce(f"Error writing to command log: {e}") @@ -147,8 +145,10 @@ def llmdbench_execute_cmd( # mimics the if/elif/else for verbose/silent if not verbose and silent: # correspon to eval with writing log - with open(stdout_log, 'w') as f_out, open(stderr_log, 'w') as f_err: - result = subprocess.run(actual_cmd, shell=True, executable="/bin/bash", stdout=f_out, stderr=f_err, check=False) + with open(stdout_log, "w") as f_out, open(stderr_log, "w") as f_err: + result = subprocess.run( + actual_cmd, shell=True, executable="/bin/bash", stdout=f_out, stderr=f_err, check=False + ) elif not verbose and not silent: # run with no log result = subprocess.run(actual_cmd, shell=True, executable="/bin/bash", check=False) @@ -171,8 +171,8 @@ def llmdbench_execute_cmd( time.sleep(delay) if ecode != 0: - if not silent : - announce(f"\nERROR while executing command \"{actual_cmd}\"") + if not silent: + announce(f'\nERROR while executing command "{actual_cmd}"') if last_stdout_log and last_stdout_log.exists(): try: @@ -198,11 +198,10 @@ def llmdbench_execute_cmd( return ecode - -def environment_variable_to_dict(ev: dict = {}) : +def environment_variable_to_dict(ev: dict = {}): for key in dict(os.environ).keys(): if "LLMDBENCH_" in key: - ev.update({key.split("LLMDBENCH_")[1].lower():os.environ.get(key)}) + ev.update({key.split("LLMDBENCH_")[1].lower(): os.environ.get(key)}) # Convert true/false to boolean values for key, value in ev.items(): @@ -212,14 +211,15 @@ def environment_variable_to_dict(ev: dict = {}) : if value == "false": ev[key] = False - for mandatory_key in [ "control_dry_run", - "control_verbose", - "run_experiment_analyze_locally", - "user_is_admin", - "control_environment_type_standalone_active", - "control_environment_type_modelservice_active", - ] : - if mandatory_key not in ev : + for mandatory_key in [ + "control_dry_run", + "control_verbose", + "run_experiment_analyze_locally", + "user_is_admin", + "control_environment_type_standalone_active", + "control_environment_type_modelservice_active", + ]: + if mandatory_key not in ev: ev[mandatory_key] = 0 ev[mandatory_key] = bool(int(ev[mandatory_key])) @@ -233,6 +233,7 @@ def environment_variable_to_dict(ev: dict = {}) : ev["control_kcmd"] = ev.get("control_kcmd", "kubectl") ev["vllm_modelservice_gateway_class_name"] = ev.get("vllm_modelservice_gateway_class_name", "").lower() + def create_namespace(api: pykube.HTTPClient, namespace_name: str, dry_run: bool = False, verbose: bool = False): if not namespace_name: announce("Error: namespace_name cannot be empty.") @@ -262,11 +263,11 @@ def validate_and_create_pvc( pvc_name: str, pvc_size: str, pvc_class: str, - dry_run: bool = False + dry_run: bool = False, ): announce("Provisioning model storage…") - if '/' not in download_model: + if "/" not in download_model: announce(f"'{download_model}' is not in Hugging Face format /") sys.exit(1) @@ -275,11 +276,13 @@ def validate_and_create_pvc( k8s_config.load_kube_config() storage_v1_api = k8s_client.StorageV1Api() - if pvc_class == "default" : - for x in storage_v1_api.list_storage_class().items : - if x.metadata.annotations and "storageclass.kubernetes.io/is-default-class" in x.metadata.annotations : - if x.metadata.annotations["storageclass.kubernetes.io/is-default-class"] == "true" : - announce(f"ℹ️ Environment variable LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS automatically set to \"{x.metadata.name}\"") + if pvc_class == "default": + for x in storage_v1_api.list_storage_class().items: + if x.metadata.annotations and "storageclass.kubernetes.io/is-default-class" in x.metadata.annotations: + if x.metadata.annotations["storageclass.kubernetes.io/is-default-class"] == "true": + announce( + f'ℹ️ Environment variable LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS automatically set to "{x.metadata.name}"' + ) pvc_class = x.metadata.name storage_v1_api.read_storage_class(name=pvc_class) announce(f"StorageClass '{pvc_class}' found.") @@ -306,12 +309,10 @@ def validate_and_create_pvc( }, "spec": { "accessModes": ["ReadWriteMany"], - "resources": { - "requests": {"storage": pvc_size} - }, + "resources": {"requests": {"storage": pvc_size}}, "storageClassName": pvc_class, - "volumeMode": "Filesystem" - } + "volumeMode": "Filesystem", + }, } pvc = pykube.PersistentVolumeClaim(api, pvc_obj) @@ -339,7 +340,6 @@ def launch_download_job( dry_run: bool = False, verbose: bool = False, ): - work_dir_str = os.getenv("LLMDBENCH_CONTROL_WORK_DIR", ".") current_step = os.getenv("LLMDBENCH_CURRENT_STEP", "step") kcmd = os.getenv("LLMDBENCH_CONTROL_KCMD", "kubectl") @@ -360,9 +360,7 @@ def launch_download_job( hf_cmds = [] hf_token_env = "" if is_hf_model_gated(os.getenv("LLMDBENCH_DEPLOY_MODEL_LIST")): - if user_has_hf_model_access( - os.getenv("LLMDBENCH_DEPLOY_MODEL_LIST"), os.getenv("LLMDBENCH_HF_TOKEN") - ): + if user_has_hf_model_access(os.getenv("LLMDBENCH_DEPLOY_MODEL_LIST"), os.getenv("LLMDBENCH_HF_TOKEN")): # # Login is only required for GATED models. # https://huggingface.co/docs/hub/models-gated @@ -381,9 +379,7 @@ def launch_download_job( # check this here again since there may be some code path that some how gets here # without first sourcing env.sh and running the precheck there... # - announce( - f"❌ Unauthorized access to gated model {model_path}. Check your HF Token." - ) + announce(f"❌ Unauthorized access to gated model {model_path}. Check your HF Token.") sys.exit(1) hf_cmds.append('hf download "${HF_MODEL_ID}" --local-dir "/cache/${MODEL_PATH}"') base_cmds.extend(hf_cmds) @@ -442,24 +438,18 @@ def launch_download_job( # FIXME (USE PYKUBE) delete_cmd = f"{kcmd} delete job {job_name} -n {namespace} --ignore-not-found=true" - announce( - f"--> Deleting previous job '{job_name}' (if it exists) to prevent conflicts..." - ) - llmdbench_execute_cmd( - actual_cmd=delete_cmd, dry_run=dry_run, verbose=verbose, silent=True - ) + announce(f"--> Deleting previous job '{job_name}' (if it exists) to prevent conflicts...") + llmdbench_execute_cmd(actual_cmd=delete_cmd, dry_run=dry_run, verbose=verbose, silent=True) # FIXME (USE PYKUBE) apply_cmd = f"{kcmd} apply -n {namespace} -f {yaml_file_path}" - llmdbench_execute_cmd( - actual_cmd=apply_cmd, dry_run=dry_run, verbose=verbose, silent=True, attempts=1 - ) + llmdbench_execute_cmd(actual_cmd=apply_cmd, dry_run=dry_run, verbose=verbose, silent=True, attempts=1) async def wait_for_job(job_name, namespace, timeout=7200, dry_run: bool = False): """Wait for the job to complete""" announce(f"Waiting for job {job_name} to complete...") - if dry_run : + if dry_run: return True # use async config loading @@ -467,7 +457,6 @@ async def wait_for_job(job_name, namespace, timeout=7200, dry_run: bool = False) api_client = k8s_async_client.ApiClient() batch_v1_api = k8s_async_client.BatchV1Api(api_client) try: - w = k8s_async_watch.Watch() # sets up connection with kubernetes, async with manages the streams lifecycle @@ -475,11 +464,12 @@ async def wait_for_job(job_name, namespace, timeout=7200, dry_run: bool = False) func=batch_v1_api.list_namespaced_job, namespace=namespace, field_selector=f"metadata.name={job_name}", - timeout_seconds=timeout # replaces the manual timeout check + timeout_seconds=timeout, # replaces the manual timeout check ) as stream: - - async for event in stream: # replaces time.wait since we grab events as they come from stream sasynchronous - job_status = event['object'].status + async for ( + event + ) in stream: # replaces time.wait since we grab events as they come from stream sasynchronous + job_status = event["object"].status if job_status.succeeded: announce(f"Evaluation job {job_name} completed successfully.") return True @@ -497,24 +487,24 @@ async def wait_for_job(job_name, namespace, timeout=7200, dry_run: bool = False) finally: await api_client.close() -def model_attribute(model: str, attribute: str) -> str: - model, modelid = model.split(':', 1) if ':' in model else (model, model) - modelid = modelid.replace('/', '-').replace('.','-') +def model_attribute(model: str, attribute: str) -> str: + model, modelid = model.split(":", 1) if ":" in model else (model, model) + modelid = modelid.replace("/", "-").replace(".", "-") # split the model name into provider and rest - provider, model_part = model.split('/', 1) if '/' in model else ("", model) + provider, model_part = model.split("/", 1) if "/" in model else ("", model) ns = os.getenv("LLMDBENCH_VLLM_COMMON_NAMESPACE") hash_object = hashlib.sha256() - hash_object.update(f'{ns}/{modelid}'.encode('utf-8')) + hash_object.update(f"{ns}/{modelid}".encode("utf-8")) digest = hash_object.hexdigest() modelid_label = f"{modelid[:8]}-{digest[:8]}-{modelid[-8:]}" # create a list of components from the model part # equiv to: tr '[:upper:]' '[:lower:]' | sed -e 's^qwen^qwen-^g' -e 's^-^\n^g' model_components_str = model_part.lower().replace("qwen", "qwen-") - model_components = model_components_str.split('-') + model_components = model_components_str.split("-") # get individual attributes using regex type_str = "" @@ -526,8 +516,8 @@ def model_attribute(model: str, attribute: str) -> str: parameters = "" for comp in model_components: if re.search(r"[0-9].*[bm]", comp, re.IGNORECASE): - parameters = re.sub(r'^[a-z]', '', comp, count=1) - parameters = parameters.replace('.', 'p') + parameters = re.sub(r"^[a-z]", "", comp, count=1) + parameters = parameters.replace(".", "p") break major_version = "1" @@ -536,19 +526,19 @@ def model_attribute(model: str, attribute: str) -> str: if comp.isdigit() or (comp and comp[0].isdigit() and not re.search(r"b|m", comp, re.IGNORECASE)): # remove the parameter string from it if present ... for case like like "3.1-8B" version_part = comp.replace(parameters, "") - major_version = version_part.split('.')[0] + major_version = version_part.split(".")[0] break kind = model_components[0] if model_components else "" - as_label = model.lower().replace('/', '-').replace('.', '-') + as_label = model.lower().replace("/", "-").replace(".", "-") # build label and clean it up label_parts = [part for part in [kind, major_version, parameters] if part] - label = '-'.join(label_parts) - label = re.sub(r'-+', '-', label).strip('-') # replace multiple hyphens and strip from ends + label = "-".join(label_parts) + label = re.sub(r"-+", "-", label).strip("-") # replace multiple hyphens and strip from ends - folder = model.lower().replace('/', '_').replace('-', '_') + folder = model.lower().replace("/", "_").replace("-", "_") # storing all attributes in a dictionary attributes = { @@ -574,7 +564,8 @@ def model_attribute(model: str, attribute: str) -> str: else: return result -#FIXME (USE PYKUBE) + +# FIXME (USE PYKUBE) def apply_configmap(yaml_file: Path, kubectl_cmd: str, dry_run: bool, verbose: bool) -> int: """ Apply ConfigMap using kubectl/oc command. @@ -590,12 +581,7 @@ def apply_configmap(yaml_file: Path, kubectl_cmd: str, dry_run: bool, verbose: b """ cmd = f"{kubectl_cmd} apply -f {yaml_file}" - return llmdbench_execute_cmd( - actual_cmd=cmd, - dry_run=dry_run, - verbose=verbose, - silent=not verbose - ) + return llmdbench_execute_cmd(actual_cmd=cmd, dry_run=dry_run, verbose=verbose, silent=not verbose) def extract_environment(): @@ -612,7 +598,9 @@ def extract_environment(): # Get environment variables that start with LLMDBENCH, excluding sensitive ones env_vars = [] for key, value in os.environ.items(): - if key.startswith("LLMDBENCH_") and not any(sensitive in key.upper() for sensitive in ["TOKEN", "USER", "PASSWORD", "EMAIL"]): + if key.startswith("LLMDBENCH_") and not any( + sensitive in key.upper() for sensitive in ["TOKEN", "USER", "PASSWORD", "EMAIL"] + ): env_vars.append(f"{key}={value}") env_vars.sort() @@ -664,7 +652,7 @@ def get_image(image_registry: str, image_repo: str, image_name: str, image_tag: try: result = subprocess.run(cmd.split(), capture_output=True, text=True, check=False) if result.returncode == 0: - lines = result.stdout.strip().split('\n') + lines = result.stdout.strip().split("\n") if len(lines) > 0: # Get the last line and extract the tag (second column) last_line = lines[-1] @@ -672,7 +660,7 @@ def get_image(image_registry: str, image_repo: str, image_name: str, image_tag: if len(parts) >= 2: is_latest_tag = parts[1] # The || true part in bash means we don't fail if command fails - except: + except Exception: pass else: # Use skopeo to get latest tag @@ -680,15 +668,16 @@ def get_image(image_registry: str, image_repo: str, image_name: str, image_tag: try: result = subprocess.run(cmd.split(), capture_output=True, text=True, check=True) import json + tags_data = json.loads(result.stdout) if tags_data.get("Tags"): # Use jq -r .Tags[] | tail -1 equivalent is_latest_tag = tags_data["Tags"][-1] - except: + except Exception: is_latest_tag = "" if not is_latest_tag: - announce(f"❌ Unable to find latest tag for image \"{image_full_name}\"") + announce(f'❌ Unable to find latest tag for image "{image_full_name}"') sys.exit(1) if tag_only == "1": @@ -711,7 +700,7 @@ def check_storage_class(): try: # Use pykube to connect to Kubernetes control_work_dir = os.environ.get("LLMDBENCH_CONTROL_WORK_DIR", "/tmp/llm-d-benchmark") - api = kube_connect(f'{control_work_dir}/environment/context.ctx') + api = kube_connect(f"{control_work_dir}/environment/context.ctx") # Create StorageClass object - try pykube-ng first, fallback to custom class try: @@ -739,11 +728,15 @@ class StorageClass(pykube.objects.APIObject): break if default_sc: - announce(f"ℹ️ Environment variable LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS automatically set to \"{default_sc}\"") + announce( + f'ℹ️ Environment variable LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS automatically set to "{default_sc}"' + ) os.environ["LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS"] = default_sc storage_class = default_sc else: - announce("❌ ERROR: environment variable LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS=default, but unable to find a default storage class") + announce( + "❌ ERROR: environment variable LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS=default, but unable to find a default storage class" + ) return False except Exception as e: announce(f"❌ Error checking default storage class: {e}") @@ -755,10 +748,14 @@ class StorageClass(pykube.objects.APIObject): if sc.exists(): return True else: - announce(f"❌ ERROR. Environment variable LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS={storage_class} but could not find such storage class") + announce( + f"❌ ERROR. Environment variable LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS={storage_class} but could not find such storage class" + ) return False except pykube.exceptions.ObjectDoesNotExist: - announce(f"❌ ERROR. Environment variable LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS={storage_class} but could not find such storage class") + announce( + f"❌ ERROR. Environment variable LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS={storage_class} but could not find such storage class" + ) return False except Exception as e: announce(f"❌ Error checking storage class: {e}") @@ -784,7 +781,7 @@ def check_affinity(): try: # Use pykube to connect to Kubernetes control_work_dir = os.environ.get("LLMDBENCH_CONTROL_WORK_DIR", "/tmp/llm-d-benchmark") - api = kube_connect(f'{control_work_dir}/environment/context.ctx') + api = kube_connect(f"{control_work_dir}/environment/context.ctx") # Handle auto affinity detection if affinity == "auto": @@ -796,7 +793,7 @@ def check_affinity(): accelerator_patterns = [ "nvidia.com/gpu.product", "gpu.nvidia.com/class", - "cloud.google.com/gke-accelerator" + "cloud.google.com/gke-accelerator", ] found_accelerator = None @@ -815,9 +812,13 @@ def check_affinity(): if found_accelerator: os.environ["LLMDBENCH_VLLM_COMMON_ACCELERATOR_RESOURCE"] = "nvidia.com/gpu" os.environ["LLMDBENCH_VLLM_COMMON_AFFINITY"] = found_accelerator - announce(f"ℹ️ Environment variable LLMDBENCH_VLLM_COMMON_AFFINITY automatically set to \"{found_accelerator}\"") + announce( + f'ℹ️ Environment variable LLMDBENCH_VLLM_COMMON_AFFINITY automatically set to "{found_accelerator}"' + ) else: - announce("❌ ERROR: environment variable LLMDBENCH_VLLM_COMMON_AFFINITY=auto, but unable to find an accelerator on any node") + announce( + "❌ ERROR: environment variable LLMDBENCH_VLLM_COMMON_AFFINITY=auto, but unable to find an accelerator on any node" + ) return False except Exception as e: announce(f"❌ Error checking affinity: {e}") @@ -837,7 +838,9 @@ def check_affinity(): break if not found_matching_node: - announce(f"❌ ERROR. There are no nodes on this cluster with the label \"{annotation_key}:{annotation_value}\" (environment variable LLMDBENCH_VLLM_COMMON_AFFINITY)") + announce( + f'❌ ERROR. There are no nodes on this cluster with the label "{annotation_key}:{annotation_value}" (environment variable LLMDBENCH_VLLM_COMMON_AFFINITY)' + ) return False except Exception as e: announce(f"❌ Error validating affinity: {e}") @@ -847,7 +850,9 @@ def check_affinity(): accelerator_resource = os.environ.get("LLMDBENCH_VLLM_COMMON_ACCELERATOR_RESOURCE", "") if accelerator_resource == "auto": os.environ["LLMDBENCH_VLLM_COMMON_ACCELERATOR_RESOURCE"] = "nvidia.com/gpu" - announce(f"ℹ️ Environment variable LLMDBENCH_VLLM_COMMON_ACCELERATOR_RESOURCE automatically set to \"nvidia.com/gpu\"") + announce( + 'ℹ️ Environment variable LLMDBENCH_VLLM_COMMON_ACCELERATOR_RESOURCE automatically set to "nvidia.com/gpu"' + ) return True @@ -855,18 +860,20 @@ def check_affinity(): announce(f"❌ Error connecting to Kubernetes: {e}") return False + def get_accelerator_nr(accelerator_nr, tp, dp) -> int: """ Get the number of accelerator resources needed. Equivalent to the Bash get_accelerator_nr function. """ - if accelerator_nr != 'auto': + if accelerator_nr != "auto": return int(accelerator_nr) # Calculate number of accelerators needed return int(tp) * int(dp) + def add_annotations(varname: str) -> str: """ Generate pod annotations YAML. @@ -876,7 +883,7 @@ def add_annotations(varname: str) -> str: if not annotations: return "" - #FIXME (This should be extracted "ev" dictionary) + # FIXME (This should be extracted "ev" dictionary) # Determine indentation based on environment type standalone_active = int(os.environ.get("LLMDBENCH_CONTROL_ENVIRONMENT_TYPE_STANDALONE_ACTIVE", 0)) modelservice_active = int(os.environ.get("LLMDBENCH_CONTROL_ENVIRONMENT_TYPE_MODELSERVICE_ACTIVE", 0)) @@ -884,7 +891,7 @@ def add_annotations(varname: str) -> str: if standalone_active == 1: indent = " " # 8 spaces elif modelservice_active == 1: - indent = " " # 6 spaces + indent = " " # 6 spaces else: indent = " " # default 8 spaces @@ -944,7 +951,7 @@ def render_string(input_string): elif default_value: final_value = default_value else: - announce(f"❌ ERROR: variable \"REPLACE_ENV_{parameter_name}\" not defined!") + announce(f'❌ ERROR: variable "REPLACE_ENV_{parameter_name}" not defined!') sys.exit(1) # Replace in the string @@ -962,7 +969,7 @@ def add_command_line_options(args_string): current_step = os.environ.get("LLMDBENCH_CURRENT_STEP", "") if os.access(args_string, os.R_OK): - with open(args_string, 'r') as fp: + with open(args_string, "r") as fp: fc = fp.read() args_string = fc @@ -999,7 +1006,7 @@ def add_command_line_options(args_string): for arg in args_list: if arg.strip(): # Clean up any trailing artifacts from line continuation - cleaned_arg = arg.rstrip('\\').rstrip('"').strip() + cleaned_arg = arg.rstrip("\\").rstrip('"').strip() if cleaned_arg: # Handle JSON strings and complex arguments with proper quoting if cleaned_arg.startswith("'") and cleaned_arg.endswith("'"): @@ -1007,7 +1014,7 @@ def add_command_line_options(args_string): yaml_list.append(f" - {cleaned_arg}") else: # Regular argument - wrap in double quotes - yaml_list.append(f" - \"{cleaned_arg}\"") + yaml_list.append(f' - "{cleaned_arg}"') return "\n".join(yaml_list) else: processed_args = processed_args.replace("____", " ") @@ -1016,7 +1023,7 @@ def add_command_line_options(args_string): yaml_list = [] for arg in args_list: if arg.strip(): - yaml_list.append(f" - \"{arg}\"") + yaml_list.append(f' - "{arg}"') return "\n".join(yaml_list) else: # Default case @@ -1062,10 +1069,10 @@ def add_additional_env_to_yaml(env_vars_string: str) -> str: if os.access(env_vars_string, os.R_OK): lines = [] - with open(env_vars_string, 'r') as fp: + with open(env_vars_string, "r") as fp: for line in fp: lines.append(name_indent + line.rstrip()) - return '\n'.join(lines) + return "\n".join(lines) # Parse environment variables (comma-separated list) env_lines = [] @@ -1080,7 +1087,7 @@ def add_additional_env_to_yaml(env_vars_string: str) -> str: processed_value = render_string(env_value) if env_value else "" env_lines.append(f"{name_indent}- name: {clean_name}") - env_lines.append(f"{value_indent}value: \"{processed_value}\"") + env_lines.append(f'{value_indent}value: "{processed_value}"') return "\n".join(env_lines) @@ -1092,17 +1099,17 @@ def add_config(obj_or_filename, num_spaces=0, label=""): contents = obj_or_filename - if len(obj_or_filename.split('\n')) == 1 : + if len(obj_or_filename.split("\n")) == 1: try: - with open(obj_or_filename, 'r') as f: + with open(obj_or_filename, "r") as f: contents = f.read() except FileNotFoundError: pass - indented_contents = '\n'.join(f"{spaces}{line}" for line in contents.splitlines()) - if indented_contents.strip() not in ["{}", "[]"] : + indented_contents = "\n".join(f"{spaces}{line}" for line in contents.splitlines()) + if indented_contents.strip() not in ["{}", "[]"]: indented_contents = f" {label}\n{indented_contents}" - else : + else: indented_contents = "" return indented_contents @@ -1113,12 +1120,13 @@ def is_standalone_deployment(ev: dict) -> bool: """ return int(ev.get("control_environment_type_standalone_active", 0)) == 1 + def get_accelerator_type(ev: dict) -> str | None: """ Attempts to get the GPU type """ - common_affinity = ev['vllm_common_affinity'] + common_affinity = ev["vllm_common_affinity"] if common_affinity == "auto": return common_affinity else: @@ -1156,7 +1164,7 @@ def is_hf_model_gated(model_id: str) -> bool: response = requests.get(url, headers=headers) response.raise_for_status() data = response.json() - return data.get("gated", False) != False + return data.get("gated", False) except requests.RequestException as e: announce("❌ ERROR - Request failed:", e) return False @@ -1190,9 +1198,7 @@ def user_has_hf_model_access(model_id: str, hf_token: str) -> bool: headers = {"Authorization": f"Bearer {hf_token}"} try: - with requests.get( - url, headers=headers, allow_redirects=True, stream=True - ) as response: + with requests.get(url, headers=headers, allow_redirects=True, stream=True) as response: if response.status_code == 200: return True elif response.status_code in (401, 403): @@ -1201,4 +1207,4 @@ def user_has_hf_model_access(model_id: str, hf_token: str) -> bool: response.raise_for_status() except requests.RequestException as e: announce("❌ ERROR - Request failed:", e) - return False \ No newline at end of file + return False diff --git a/util/audit_secrets.sh b/llm_d_benchmark/util/audit_secrets.sh similarity index 100% rename from util/audit_secrets.sh rename to llm_d_benchmark/util/audit_secrets.sh diff --git a/util/rbac.sh b/llm_d_benchmark/util/rbac.sh similarity index 100% rename from util/rbac.sh rename to llm_d_benchmark/util/rbac.sh diff --git a/util/rbac_audit_report.md b/llm_d_benchmark/util/rbac_audit_report.md similarity index 100% rename from util/rbac_audit_report.md rename to llm_d_benchmark/util/rbac_audit_report.md diff --git a/util/reset_deployment_parameters.sh b/llm_d_benchmark/util/reset_deployment_parameters.sh similarity index 100% rename from util/reset_deployment_parameters.sh rename to llm_d_benchmark/util/reset_deployment_parameters.sh diff --git a/util/setup_precommit.sh b/llm_d_benchmark/util/setup_precommit.sh similarity index 100% rename from util/setup_precommit.sh rename to llm_d_benchmark/util/setup_precommit.sh diff --git a/util/unit_test/add_additional_env_to_yaml.sh b/llm_d_benchmark/util/unit_test/add_additional_env_to_yaml.sh similarity index 100% rename from util/unit_test/add_additional_env_to_yaml.sh rename to llm_d_benchmark/util/unit_test/add_additional_env_to_yaml.sh diff --git a/util/unit_test/add_annotations.sh b/llm_d_benchmark/util/unit_test/add_annotations.sh similarity index 100% rename from util/unit_test/add_annotations.sh rename to llm_d_benchmark/util/unit_test/add_annotations.sh diff --git a/util/unit_test/add_command_line_options.sh b/llm_d_benchmark/util/unit_test/add_command_line_options.sh similarity index 100% rename from util/unit_test/add_command_line_options.sh rename to llm_d_benchmark/util/unit_test/add_command_line_options.sh diff --git a/util/unit_test/generate_standup_parameter_scenarios.sh b/llm_d_benchmark/util/unit_test/generate_standup_parameter_scenarios.sh similarity index 100% rename from util/unit_test/generate_standup_parameter_scenarios.sh rename to llm_d_benchmark/util/unit_test/generate_standup_parameter_scenarios.sh diff --git a/util/unit_test/model_attribute_function.sh b/llm_d_benchmark/util/unit_test/model_attribute_function.sh similarity index 100% rename from util/unit_test/model_attribute_function.sh rename to llm_d_benchmark/util/unit_test/model_attribute_function.sh diff --git a/util/unit_test/render_workload_templates.sh b/llm_d_benchmark/util/unit_test/render_workload_templates.sh similarity index 100% rename from util/unit_test/render_workload_templates.sh rename to llm_d_benchmark/util/unit_test/render_workload_templates.sh diff --git a/util/unit_test/test_01_ensure_local_conda.py b/llm_d_benchmark/util/unit_test/test_01_ensure_local_conda.py similarity index 65% rename from util/unit_test/test_01_ensure_local_conda.py rename to llm_d_benchmark/util/unit_test/test_01_ensure_local_conda.py index 9f8d9d25..74a3135a 100644 --- a/util/unit_test/test_01_ensure_local_conda.py +++ b/llm_d_benchmark/util/unit_test/test_01_ensure_local_conda.py @@ -5,353 +5,334 @@ Tests the Python conversion using native Python implementation. """ +import importlib.util import os import sys -import tempfile import unittest from pathlib import Path -from unittest.mock import patch, MagicMock +from unittest.mock import MagicMock, patch + -# Add setup directory to path -current_file = Path(__file__).resolve() -project_root = current_file.parents[2] # Go up 2 levels: util -> llm-d-benchmark -setup_dir = project_root / "setup" +# Save original functions module if it exists +_original_functions = sys.modules.get("functions") # Mock the functions module before any imports to avoid dependency issues -sys.modules['functions'] = MagicMock() -sys.modules['requests'] = MagicMock() +# Note: requests is a real package and should not be mocked +sys.modules["functions"] = MagicMock() -# Import the module under test -sys.path.insert(0, str(setup_dir)) -sys.path.append(str(setup_dir / "steps")) -import importlib.util # Load the Python module dynamically +setup_dir = Path(__file__).resolve().parents[2] / "setup" spec = importlib.util.spec_from_file_location( - "ensure_local_conda_py", - setup_dir / "steps" / "01_ensure_local_conda.py" + "ensure_local_conda_py", setup_dir / "steps" / "01_ensure_local_conda.py" ) module_under_test = importlib.util.module_from_spec(spec) spec.loader.exec_module(module_under_test) +def teardown_module(): + """Restore original modules after all tests in this module complete""" + if _original_functions is not None: + sys.modules["functions"] = _original_functions + elif "functions" in sys.modules: + del sys.modules["functions"] + + class TestEnsureLocalConda(unittest.TestCase): """Test cases for the 01_ensure_local_conda.py module""" - + def setUp(self): """Set up test environment""" - + # Mock announce function self.announce_calls = [] - + def mock_announce(message): print(f"[TEST ANNOUNCE] {message}") self.announce_calls.append(message) - + module_under_test.announce = mock_announce - + # Mock external dependencies self.platform_mock = MagicMock() self.subprocess_mock = MagicMock() self.shutil_mock = MagicMock() self.requests_mock = MagicMock() - + # Set up mocks module_under_test.platform = self.platform_mock module_under_test.subprocess = self.subprocess_mock module_under_test.shutil = self.shutil_mock module_under_test.requests = self.requests_mock - + def test_get_platform_info_macos(self): """Test platform detection for macOS""" - self.platform_mock.system.return_value = 'Darwin' - self.platform_mock.machine.return_value = 'arm64' - + self.platform_mock.system.return_value = "Darwin" + self.platform_mock.machine.return_value = "arm64" + result = module_under_test.get_platform_info() - - expected = { - 'system': 'darwin', - 'machine': 'arm64', - 'is_mac': True, - 'is_linux': False - } + + expected = {"system": "darwin", "machine": "arm64", "is_mac": True, "is_linux": False} self.assertEqual(result, expected) - + def test_get_platform_info_linux(self): """Test platform detection for Linux""" - self.platform_mock.system.return_value = 'Linux' - self.platform_mock.machine.return_value = 'x86_64' - + self.platform_mock.system.return_value = "Linux" + self.platform_mock.machine.return_value = "x86_64" + result = module_under_test.get_platform_info() - - expected = { - 'system': 'linux', - 'machine': 'x86_64', - 'is_mac': False, - 'is_linux': True - } + + expected = {"system": "linux", "machine": "x86_64", "is_mac": False, "is_linux": True} self.assertEqual(result, expected) - + def test_is_conda_available_true(self): """Test conda availability check when conda exists""" - self.shutil_mock.which.return_value = '/opt/conda/bin/conda' - + self.shutil_mock.which.return_value = "/opt/conda/bin/conda" + result = module_under_test.is_conda_available() - + self.assertTrue(result) - self.shutil_mock.which.assert_called_once_with('conda') - + self.shutil_mock.which.assert_called_once_with("conda") + def test_is_conda_available_false(self): """Test conda availability check when conda doesn't exist""" self.shutil_mock.which.return_value = None - + result = module_under_test.is_conda_available() - + self.assertFalse(result) - self.shutil_mock.which.assert_called_once_with('conda') - + self.shutil_mock.which.assert_called_once_with("conda") + def test_install_miniforge_macos_dry_run(self): """Test macOS miniforge installation in dry run mode""" - - exit_code, anaconda_path, conda_sh = module_under_test.install_miniforge_macos( - dry_run=True, verbose=True - ) - + + exit_code, anaconda_path, conda_sh = module_under_test.install_miniforge_macos(dry_run=True, verbose=True) + # Verify dry run behavior self.assertEqual(exit_code, 0) self.assertEqual(anaconda_path, 'export PATH="/opt/homebrew/bin/conda:$PATH"') self.assertEqual(conda_sh, Path("/opt/homebrew/Caskroom/miniforge/base/etc/profile.d/conda.sh")) - + # Verify announcements self.assertIn("🛠️ Installing Miniforge for macOS...", self.announce_calls) self.assertIn("---> would execute: brew install --cask miniforge", self.announce_calls) - + def test_install_miniforge_macos_no_brew(self): """Test macOS miniforge installation when brew is not available""" self.shutil_mock.which.return_value = None # No brew - + with self.assertRaises(EnvironmentError) as context: module_under_test.install_miniforge_macos(dry_run=False, verbose=True) - + self.assertIn("Homebrew not found", str(context.exception)) - + def test_install_miniforge_macos_success(self): """Test successful macOS miniforge installation""" - self.shutil_mock.which.return_value = '/opt/homebrew/bin/brew' - + self.shutil_mock.which.return_value = "/opt/homebrew/bin/brew" + # Mock successful subprocess call mock_result = MagicMock() mock_result.returncode = 0 self.subprocess_mock.run.return_value = mock_result - - exit_code, anaconda_path, conda_sh = module_under_test.install_miniforge_macos( - dry_run=False, verbose=True - ) - + + exit_code, anaconda_path, conda_sh = module_under_test.install_miniforge_macos(dry_run=False, verbose=True) + # Verify success self.assertEqual(exit_code, 0) - + # Verify subprocess call self.subprocess_mock.run.assert_called_once() call_args = self.subprocess_mock.run.call_args[0][0] - self.assertEqual(call_args, ['brew', 'install', '--cask', 'miniforge']) - + self.assertEqual(call_args, ["brew", "install", "--cask", "miniforge"]) + def test_install_miniforge_linux_dry_run(self): """Test Linux miniforge installation in dry run mode""" - + # Mock platform info - with patch.object(module_under_test, 'get_platform_info') as mock_platform: - mock_platform.return_value = { - 'system': 'linux', - 'machine': 'x86_64', - 'is_mac': False, - 'is_linux': True - } - - exit_code, anaconda_path, conda_sh = module_under_test.install_miniforge_linux( - dry_run=True, verbose=True - ) - + with patch.object(module_under_test, "get_platform_info") as mock_platform: + mock_platform.return_value = {"system": "linux", "machine": "x86_64", "is_mac": False, "is_linux": True} + + exit_code, anaconda_path, conda_sh = module_under_test.install_miniforge_linux(dry_run=True, verbose=True) + # Verify dry run behavior self.assertEqual(exit_code, 0) self.assertEqual(anaconda_path, 'export PATH="/opt/miniconda/bin/conda:$PATH"') self.assertEqual(conda_sh, Path("/opt/miniconda/etc/profile.d/conda.sh")) - + # Verify announcements self.assertIn("🛠️ Installing Miniforge for Linux...", self.announce_calls) self.assertTrue(any("would download and install" in call for call in self.announce_calls)) - + def test_check_conda_environment_exists(self): """Test conda environment checking when environment exists""" - + # Mock successful conda env list output mock_result = MagicMock() mock_result.returncode = 0 mock_result.stdout = "env1\ntest-env\nenv2\n" self.subprocess_mock.run.return_value = mock_result - + result = module_under_test.check_conda_environment("test-env") - + self.assertTrue(result) self.subprocess_mock.run.assert_called_once_with( - ['conda', 'env', 'list'], capture_output=True, text=True, check=True + ["conda", "env", "list"], capture_output=True, text=True, check=True ) - + def test_check_conda_environment_not_exists(self): """Test conda environment checking when environment doesn't exist""" - + # Mock successful conda env list output without target env mock_result = MagicMock() mock_result.returncode = 0 mock_result.stdout = "env1\nother-env\nenv2\n" self.subprocess_mock.run.return_value = mock_result - + result = module_under_test.check_conda_environment("test-env") - + self.assertFalse(result) - + def test_early_exit_when_not_running_locally(self): """Test early exit when LLMDBENCH_RUN_EXPERIMENT_ANALYZE_LOCALLY=0""" - + result = module_under_test.ensure_local_conda( - run_locally=False, - host_os="mac", - host_shell="zsh", - env_name="test-env", - dry_run=True, - verbose=True + run_locally=False, host_os="mac", host_shell="zsh", env_name="test-env", dry_run=True, verbose=True ) - + # Verify early exit self.assertEqual(result, 0) self.assertTrue(any("skipping local setup" in call for call in self.announce_calls)) - + def test_main_function_environment_parsing(self): """Test the main function's environment variable parsing""" - + # Mock environment variables test_env = { - 'LLMDBENCH_RUN_EXPERIMENT_ANALYZE_LOCALLY': '1', - 'LLMDBENCH_CONTROL_DEPLOY_HOST_OS': 'mac', - 'LLMDBENCH_CONTROL_DEPLOY_HOST_SHELL': 'zsh', - 'LLMDBENCH_HARNESS_CONDA_ENV_NAME': 'test-env', - 'LLMDBENCH_CONTROL_DRY_RUN': '1', - 'LLMDBENCH_CONTROL_VERBOSE': '1' + "LLMDBENCH_RUN_EXPERIMENT_ANALYZE_LOCALLY": "1", + "LLMDBENCH_CONTROL_DEPLOY_HOST_OS": "mac", + "LLMDBENCH_CONTROL_DEPLOY_HOST_SHELL": "zsh", + "LLMDBENCH_HARNESS_CONDA_ENV_NAME": "test-env", + "LLMDBENCH_CONTROL_DRY_RUN": "1", + "LLMDBENCH_CONTROL_VERBOSE": "1", } - + + # Mock environment_variable_to_dict to properly populate the ev dict + def mock_env_to_dict(ev): + ev.update( + { + "run_experiment_analyze_locally": True, + "control_deploy_host_os": "mac", + "control_deploy_host_shell": "zsh", + "harness_conda_env_name": "test-env", + "control_dry_run": True, + "control_verbose": True, + } + ) + with patch.dict(os.environ, test_env): - with patch.object(module_under_test, 'ensure_local_conda') as mock_ensure: - mock_ensure.return_value = 0 - - result = module_under_test.main() - - # Verify the function was called with correct parameters - mock_ensure.assert_called_once_with( - run_locally=True, - host_os='mac', - host_shell='zsh', - env_name='test-env', - dry_run=True, - verbose=True - ) - - self.assertEqual(result, 0) + with patch.object(module_under_test, "environment_variable_to_dict", side_effect=mock_env_to_dict): + with patch.object(module_under_test, "ensure_local_conda") as mock_ensure: + mock_ensure.return_value = 0 + + result = module_under_test.main() + + # Verify the function was called with correct parameters + mock_ensure.assert_called_once_with( + run_locally=True, + host_os="mac", + host_shell="zsh", + env_name="test-env", + dry_run=True, + verbose=True, + ) + + self.assertEqual(result, 0) class TestCondaWorkflows(unittest.TestCase): """Test complete conda setup workflows""" - + def setUp(self): """Set up test environment""" - + # Mock announce function self.announce_calls = [] - + def mock_announce(message): self.announce_calls.append(message) - + module_under_test.announce = mock_announce - + def test_macos_workflow_no_conda(self): """Test complete macOS workflow when conda is not installed""" - + with patch.multiple( module_under_test, - get_platform_info=MagicMock(return_value={'is_mac': True, 'is_linux': False, 'system': 'darwin'}), + get_platform_info=MagicMock(return_value={"is_mac": True, "is_linux": False, "system": "darwin"}), is_conda_available=MagicMock(return_value=False), - install_miniforge_macos=MagicMock(return_value=(0, 'anaconda_path', Path('/conda.sh'))), + install_miniforge_macos=MagicMock(return_value=(0, "anaconda_path", Path("/conda.sh"))), update_shell_rc_file=MagicMock(return_value=True), source_conda_script=MagicMock(return_value=0), - create_conda_environment=MagicMock(return_value=0) + create_conda_environment=MagicMock(return_value=0), ): result = module_under_test.ensure_local_conda( - run_locally=True, - host_os='mac', - host_shell='zsh', - env_name='test-env', - dry_run=True, - verbose=True + run_locally=True, host_os="mac", host_shell="zsh", env_name="test-env", dry_run=True, verbose=True ) - + # Verify success self.assertEqual(result, 0) - + # Verify workflow calls module_under_test.install_miniforge_macos.assert_called_once() module_under_test.update_shell_rc_file.assert_called_once() module_under_test.source_conda_script.assert_called_once() module_under_test.create_conda_environment.assert_called_once() - + def test_linux_workflow_no_conda(self): """Test complete Linux workflow when conda is not installed""" - + with patch.multiple( module_under_test, - get_platform_info=MagicMock(return_value={'is_mac': False, 'is_linux': True, 'system': 'linux'}), + get_platform_info=MagicMock(return_value={"is_mac": False, "is_linux": True, "system": "linux"}), is_conda_available=MagicMock(return_value=False), - install_miniforge_linux=MagicMock(return_value=(0, 'anaconda_path', Path('/conda.sh'))), + install_miniforge_linux=MagicMock(return_value=(0, "anaconda_path", Path("/conda.sh"))), update_shell_rc_file=MagicMock(return_value=True), source_conda_script=MagicMock(return_value=0), - create_conda_environment=MagicMock(return_value=0) + create_conda_environment=MagicMock(return_value=0), ): result = module_under_test.ensure_local_conda( - run_locally=True, - host_os='linux', - host_shell='bash', - env_name='test-env', - dry_run=True, - verbose=True + run_locally=True, host_os="linux", host_shell="bash", env_name="test-env", dry_run=True, verbose=True ) - + # Verify success self.assertEqual(result, 0) - + # Verify workflow calls module_under_test.install_miniforge_linux.assert_called_once() module_under_test.update_shell_rc_file.assert_called_once() module_under_test.source_conda_script.assert_called_once() module_under_test.create_conda_environment.assert_called_once() - + def test_source_conda_script_dry_run(self): """Test that source_conda_script works in dry run mode without file existence check""" - + # Mock announce function announce_calls = [] + def mock_announce(message): announce_calls.append(message) + module_under_test.announce = mock_announce - + # Test dry run mode - should not check file existence result = module_under_test.source_conda_script( - conda_sh=Path("/nonexistent/conda.sh"), - dry_run=True, - verbose=True + conda_sh=Path("/nonexistent/conda.sh"), dry_run=True, verbose=True ) - + # Verify success and correct announcement self.assertEqual(result, 0) self.assertTrue(any("would source" in call for call in announce_calls)) -if __name__ == '__main__': - unittest.main() \ No newline at end of file +if __name__ == "__main__": + unittest.main() diff --git a/util/unit_test/validate_step_01_conversion.sh b/llm_d_benchmark/util/unit_test/validate_step_01_conversion.sh similarity index 100% rename from util/unit_test/validate_step_01_conversion.sh rename to llm_d_benchmark/util/unit_test/validate_step_01_conversion.sh diff --git a/workload/harnesses/fmperf-llm-d-benchmark.py b/llm_d_benchmark/workload/harnesses/fmperf-llm-d-benchmark.py similarity index 74% rename from workload/harnesses/fmperf-llm-d-benchmark.py rename to llm_d_benchmark/workload/harnesses/fmperf-llm-d-benchmark.py index 2ce1c084..58c0984f 100755 --- a/workload/harnesses/fmperf-llm-d-benchmark.py +++ b/llm_d_benchmark/workload/harnesses/fmperf-llm-d-benchmark.py @@ -6,71 +6,66 @@ provided by the job configuration. """ -import os -import subprocess -import urllib3 -import yaml +import asyncio import logging -import json +import os import shutil -from datetime import datetime -import sys -import time +import subprocess from pathlib import Path import kubernetes +import urllib3 +from fmperf import LMBenchmarkWorkload +from fmperf.Cluster import Cluster +from fmperf.StackSpec import StackSpec +from fmperf.utils import run_benchmark from kubernetes import client from kubernetes_asyncio import client as k8s_async_client from kubernetes_asyncio import config as k8s_async_config from kubernetes_asyncio import watch as k8s_async_watch -import asyncio - -from fmperf.Cluster import Cluster -from fmperf import LMBenchmarkWorkload -from fmperf.StackSpec import StackSpec -from fmperf.utils import run_benchmark logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) -formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') +formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") # Disable SSL warnings urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) + def update_workload_config(workload_spec, env_vars): """Update workload configuration with environment variables if provided.""" logger.info("Updating workload configuration from environment variables") - if 'LLMDBENCH_FMPERF_BATCH_SIZE' in env_vars: - workload_spec.batch_size = int(env_vars['LLMDBENCH_FMPERF_BATCH_SIZE']) + if "LLMDBENCH_FMPERF_BATCH_SIZE" in env_vars: + workload_spec.batch_size = int(env_vars["LLMDBENCH_FMPERF_BATCH_SIZE"]) logger.info(f"Set batch_size to {workload_spec.batch_size}") - if 'LLMDBENCH_FMPERF_SEQUENCE_LENGTH' in env_vars: - workload_spec.sequence_length = int(env_vars['LLMDBENCH_FMPERF_SEQUENCE_LENGTH']) + if "LLMDBENCH_FMPERF_SEQUENCE_LENGTH" in env_vars: + workload_spec.sequence_length = int(env_vars["LLMDBENCH_FMPERF_SEQUENCE_LENGTH"]) logger.info(f"Set sequence_length to {workload_spec.sequence_length}") - if 'LLMDBENCH_FMPERF_MAX_TOKENS' in env_vars: - workload_spec.max_tokens = int(env_vars['LLMDBENCH_FMPERF_MAX_TOKENS']) + if "LLMDBENCH_FMPERF_MAX_TOKENS" in env_vars: + workload_spec.max_tokens = int(env_vars["LLMDBENCH_FMPERF_MAX_TOKENS"]) logger.info(f"Set max_tokens to {workload_spec.max_tokens}") - if 'LLMDBENCH_FMPERF_NUM_USERS_WARMUP' in env_vars: - workload_spec.num_users_warmup = int(env_vars['LLMDBENCH_FMPERF_NUM_USERS_WARMUP']) + if "LLMDBENCH_FMPERF_NUM_USERS_WARMUP" in env_vars: + workload_spec.num_users_warmup = int(env_vars["LLMDBENCH_FMPERF_NUM_USERS_WARMUP"]) logger.info(f"Set num_users_warmup to {workload_spec.num_users_warmup}") - if 'LLMDBENCH_FMPERF_NUM_USERS' in env_vars: - workload_spec.num_users = int(env_vars['LLMDBENCH_FMPERF_NUM_USERS']) + if "LLMDBENCH_FMPERF_NUM_USERS" in env_vars: + workload_spec.num_users = int(env_vars["LLMDBENCH_FMPERF_NUM_USERS"]) logger.info(f"Set num_users to {workload_spec.num_users}") - if 'LLMDBENCH_FMPERF_NUM_ROUNDS' in env_vars: - workload_spec.num_rounds = int(env_vars['LLMDBENCH_FMPERF_NUM_ROUNDS']) + if "LLMDBENCH_FMPERF_NUM_ROUNDS" in env_vars: + workload_spec.num_rounds = int(env_vars["LLMDBENCH_FMPERF_NUM_ROUNDS"]) logger.info(f"Set num_rounds to {workload_spec.num_rounds}") - if 'LLMDBENCH_FMPERF_SYSTEM_PROMPT' in env_vars: - workload_spec.system_prompt = int(env_vars['LLMDBENCH_FMPERF_SYSTEM_PROMPT']) + if "LLMDBENCH_FMPERF_SYSTEM_PROMPT" in env_vars: + workload_spec.system_prompt = int(env_vars["LLMDBENCH_FMPERF_SYSTEM_PROMPT"]) logger.info(f"Set system_prompt to {workload_spec.system_prompt}") - if 'LLMDBENCH_FMPERF_CHAT_HISTORY' in env_vars: - workload_spec.chat_history = int(env_vars['LLMDBENCH_FMPERF_CHAT_HISTORY']) + if "LLMDBENCH_FMPERF_CHAT_HISTORY" in env_vars: + workload_spec.chat_history = int(env_vars["LLMDBENCH_FMPERF_CHAT_HISTORY"]) logger.info(f"Set chat_history to {workload_spec.chat_history}") - if 'LLMDBENCH_FMPERF_ANSWER_LEN' in env_vars: - workload_spec.answer_len = int(env_vars['LLMDBENCH_FMPERF_ANSWER_LEN']) + if "LLMDBENCH_FMPERF_ANSWER_LEN" in env_vars: + workload_spec.answer_len = int(env_vars["LLMDBENCH_FMPERF_ANSWER_LEN"]) logger.info(f"Set answer_len to {workload_spec.answer_len}") - if 'LLMDBENCH_FMPERF_TEST_DURATION' in env_vars: - workload_spec.test_duration = int(env_vars['LLMDBENCH_FMPERF_TEST_DURATION']) + if "LLMDBENCH_FMPERF_TEST_DURATION" in env_vars: + workload_spec.test_duration = int(env_vars["LLMDBENCH_FMPERF_TEST_DURATION"]) logger.info(f"Set test_duration to {workload_spec.test_duration}") return workload_spec @@ -92,11 +87,12 @@ async def wait_for_job(job_name, namespace, timeout=7200): func=batch_v1_api.list_namespaced_job, namespace=namespace, field_selector=f"metadata.name={job_name}", - timeout_seconds=timeout # replaces the manual timeout check + timeout_seconds=timeout, # replaces the manual timeout check ) as stream: - - async for event in stream: # replaces time.wait since we grab events as they come from stream sasynchronous - job_status = event['object'].status + async for ( + event + ) in stream: # replaces time.wait since we grab events as they come from stream sasynchronous + job_status = event["object"].status if job_status.succeeded: logger.info(f"Evaluation job {job_name} completed successfully.") return True @@ -105,7 +101,6 @@ async def wait_for_job(job_name, namespace, timeout=7200): logger.error(f"Evaluation job {job_name} failed") return False - except asyncio.TimeoutError: logger.info(f"Timeout waiting for evaluation job {job_name} after {timeout} seconds.") return False @@ -115,20 +110,17 @@ async def wait_for_job(job_name, namespace, timeout=7200): await api_client.close() -def capture_pod_logs(job_name, namespace, output_file : str): +def capture_pod_logs(job_name, namespace, output_file: str): """Capture logs from pods created by a job - Not specific to fmperf, as the pod logs are based on the job, - rather than fmperf specifically + Not specific to fmperf, as the pod logs are based on the job, + rather than fmperf specifically """ try: v1 = client.CoreV1Api() # get pods created by the job using label selector label_selector = f"job-name={job_name}" - pods = v1.list_namespaced_pod( - namespace=namespace, - label_selector=label_selector - ) + pods = v1.list_namespaced_pod(namespace=namespace, label_selector=label_selector) if not pods.items: logger.error(f"No pods found for job {job_name}") @@ -140,15 +132,11 @@ def capture_pod_logs(job_name, namespace, output_file : str): logger.info(f"Capturing logs from pod: {pod_name}") - logs = v1.read_namespaced_pod_log( - name=pod_name, - namespace=namespace, - pretty=True - ) + logs = v1.read_namespaced_pod_log(name=pod_name, namespace=namespace, pretty=True) # create dir is parent path doesnt exist Path(output_file).parent.mkdir(parents=True, exist_ok=True) - with open(output_file, 'w') as f: + with open(output_file, "w") as f: f.write(logs) logger.info(f"Wrote logs to: {output_file}") @@ -162,8 +150,8 @@ def capture_pod_logs(job_name, namespace, output_file : str): def move_data_result(capture_log_file, data_dir): """Move the data result from the file mentioned in the log to the specified data directory.""" - sed_cmd = 's/^.*Finished benchmarking, dumping summary to \\(.*.csv\\).*$/\\1/p' - os_command = [ 'sed', '-n', sed_cmd, capture_log_file ] + sed_cmd = "s/^.*Finished benchmarking, dumping summary to \\(.*.csv\\).*$/\\1/p" + os_command = ["sed", "-n", sed_cmd, capture_log_file] result = subprocess.run(os_command, capture_output=True, text=True) if result.returncode != 0: logger.error(f"Error finding result data: {result.stderr}") @@ -187,7 +175,7 @@ def move_data_result(capture_log_file, data_dir): data_file = data_file.strip() if not os.path.exists(data_file): logger.error(f"Data file does not exist: {data_file}") - continue # ignore the missing temp warm up files + continue # ignore the missing temp warm up files try: destination = os.path.join(data_dir, os.path.basename(data_file)) @@ -211,29 +199,29 @@ def convert_data_result(capture_dir: str) -> None: """ if not os.path.isdir(capture_dir): - logger.error(f'Invalid directory: {capture_dir}') + logger.error(f"Invalid directory: {capture_dir}") return for data_file in os.listdir(capture_dir): - if data_file.lower()[-4:] != '.csv': + if data_file.lower()[-4:] != ".csv": continue data_file_full_path = os.path.join(capture_dir, data_file) - logger.info(f'Converting file to benchmark report: {data_file_full_path}') + logger.info(f"Converting file to benchmark report: {data_file_full_path}") os_command = [ - 'convert.py', + "convert.py", data_file_full_path, - os.path.join(capture_dir, f'benchmark_report,_{data_file}.yaml'), - '-w', - 'fmperf', - '-f', + os.path.join(capture_dir, f"benchmark_report,_{data_file}.yaml"), + "-w", + "fmperf", + "-f", ] result = subprocess.run(os_command, capture_output=True, text=True) if result.returncode != 0: # Report error, but do not quit - logger.error(f'Error converting result data: {result.stderr}') + logger.error(f"Error converting result data: {result.stderr}") -def main(): +def main(): env_vars = os.environ # Get results directory for configuration @@ -263,7 +251,7 @@ def main(): namespace = env_vars.get("LLMDBENCH_HARNESS_NAMESPACE", "llmdbench") job_id = env_vars.get("LLMDBENCH_FMPERF_JOB_ID", f"{stack_name}-{experiment_id}") - logger.info(f"Using configuration:") + logger.info("Using configuration:") logger.info(f" Stack name: {stack_name}") logger.info(f" Stack type: {stack_type}") logger.info(f" Endpoint URL: {endpoint_url}") @@ -283,25 +271,18 @@ def main(): workload_spec = update_workload_config(workload_spec, env_vars) logger.info("Creating stack specification") - stack_spec = StackSpec( - name=stack_name, - stack_type=stack_type, - refresh_interval=300, - endpoint_url=endpoint_url - ) + stack_spec = StackSpec(name=stack_name, stack_type=stack_type, refresh_interval=300, endpoint_url=endpoint_url) logger.info("Initializing Kubernetes client") kubernetes.config.load_incluster_config() apiclient = client.ApiClient() cluster = Cluster(name="in-cluster", apiclient=apiclient, namespace=namespace) - run_id = datetime.now().strftime("%Y%m%d_%H%M%S") - logger.info("Starting benchmark run") try: # run benchmark which will create the evaluation job - results = run_benchmark( + run_benchmark( cluster=cluster, stack_spec=stack_spec, workload_spec=workload_spec, @@ -317,10 +298,9 @@ def main(): stem = "/eval-pod-lod.log" eval_path = results_dir - if eval_path == "/requests": # customize eval path if default dir is /requests + if eval_path == "/requests": # customize eval path if default dir is /requests eval_path = f"{results_dir}/{harness_name}_{experiment_id}_{stack_name}" eval_log_file = eval_path + stem - eval_data_dir = f"{eval_path}/analysis/data/" job_name = f"lmbenchmark-evaluate-{job_id}" logger.info(f"Waiting for evaluation job {job_name} to complete...") @@ -328,17 +308,17 @@ def main(): # Wait for the evaluation job to complete asyncio.run(wait_for_job(job_name, namespace)) - logs = capture_pod_logs(job_name, namespace, eval_log_file) + capture_pod_logs(job_name, namespace, eval_log_file) if move_data_result(eval_log_file, eval_path): logger.info(f"Data moved to {eval_path}") # Create benchmark report - logger.info(f"Performing benchmark report conversion") + logger.info("Performing benchmark report conversion") convert_data_result(eval_path) - except Exception as e: logger.error(f"Benchmark run failed: {str(e)}") raise + if __name__ == "__main__": main() diff --git a/workload/harnesses/guidellm-llm-d-benchmark.sh b/llm_d_benchmark/workload/harnesses/guidellm-llm-d-benchmark.sh similarity index 100% rename from workload/harnesses/guidellm-llm-d-benchmark.sh rename to llm_d_benchmark/workload/harnesses/guidellm-llm-d-benchmark.sh diff --git a/workload/harnesses/inference-perf-llm-d-benchmark.sh b/llm_d_benchmark/workload/harnesses/inference-perf-llm-d-benchmark.sh similarity index 100% rename from workload/harnesses/inference-perf-llm-d-benchmark.sh rename to llm_d_benchmark/workload/harnesses/inference-perf-llm-d-benchmark.sh diff --git a/workload/harnesses/nop-llm-d-benchmark.py b/llm_d_benchmark/workload/harnesses/nop-llm-d-benchmark.py similarity index 88% rename from workload/harnesses/nop-llm-d-benchmark.py rename to llm_d_benchmark/workload/harnesses/nop-llm-d-benchmark.py index 97950884..5446ac17 100755 --- a/workload/harnesses/nop-llm-d-benchmark.py +++ b/llm_d_benchmark/workload/harnesses/nop-llm-d-benchmark.py @@ -5,25 +5,27 @@ """ from __future__ import annotations + import ast -from dataclasses import dataclass, field, fields -from datetime import datetime -from enum import StrEnum import io import json +import logging import os import re import subprocess import time -import logging +from dataclasses import dataclass, field, fields +from datetime import datetime +from enum import StrEnum +from pathlib import Path from typing import Any from urllib.parse import urljoin, urlparse -from pathlib import Path + import requests import yaml - from kubernetes import client, config + # Configure logging logger = logging.getLogger(__name__) logger.setLevel(logging.DEBUG) @@ -203,21 +205,15 @@ def dump(self, include_not_defined: bool = False) -> list[dict[str, Any]]: return BenchmarkCategory._dump(self, include_not_defined) @staticmethod - def _dump( - benchmark_category: BenchmarkCategory, include_not_defined: bool - ) -> list[dict[str, Any]]: + def _dump(benchmark_category: BenchmarkCategory, include_not_defined: bool) -> list[dict[str, Any]]: categories = [] category = benchmark_category while category is not None: if category.defined or include_not_defined: dump_dict = {"title": category.title} procs = [ - "" - if category.start.log_line is None - else category.start.log_line.process_desc(), - "" - if category.end.log_line is None - else category.end.log_line.process_desc(), + "" if category.start.log_line is None else category.start.log_line.process_desc(), + "" if category.end.log_line is None else category.end.log_line.process_desc(), ] if procs[0] != procs[1]: raise ValueError( @@ -226,10 +222,7 @@ def _dump( f"the same as end process '{procs[1]}'" ) - if ( - category.start.log_line is not None - and category.start.log_line.process is not None - ): + if category.start.log_line is not None and category.start.log_line.process is not None: dump_dict["process"] = category.start.log_line.process.dump() dump_dict["elapsed"] = 0.0 if ( @@ -238,14 +231,10 @@ def _dump( and category.start.log_line.time is not None and category.end.log_line.time is not None ): - dump_dict["elapsed"] = ( - category.end.log_line.time - category.start.log_line.time - ).total_seconds() + dump_dict["elapsed"] = (category.end.log_line.time - category.start.log_line.time).total_seconds() if category.root_child is not None: - dump_dict["categories"] = BenchmarkCategory._dump( - category.root_child, include_not_defined - ) + dump_dict["categories"] = BenchmarkCategory._dump(category.root_child, include_not_defined) categories.append(dump_dict) category = category.next @@ -362,11 +351,7 @@ def dump(self) -> dict[str, Any]: dump_dict = {} for f in fields(self): value = getattr(self, f.name) - dump_dict[f.name] = ( - value.dump() - if hasattr(value, "dump") and callable(value.dump) - else value - ) + dump_dict[f.name] = value.dump() if hasattr(value, "dump") and callable(value.dump) else value return dump_dict @@ -389,11 +374,7 @@ def dump(self) -> dict[str, Any]: dump_dict = {} for f in fields(self): value = getattr(self, f.name) - dump_dict[f.name] = ( - value.dump() - if hasattr(value, "dump") and callable(value.dump) - else value - ) + dump_dict[f.name] = value.dump() if hasattr(value, "dump") and callable(value.dump) else value dump_dict["duration"] = self.stop - self.start return dump_dict @@ -429,11 +410,7 @@ def dump(self) -> dict[str, Any]: if f.name in ["load_cached_compiled_graph", "compile_graph"] and value == 0: continue - dump_dict[f.name] = ( - value.dump() - if hasattr(value, "dump") and callable(value.dump) - else value - ) + dump_dict[f.name] = value.dump() if hasattr(value, "dump") and callable(value.dump) else value transfer_rate = 0.0 if self.load_time != 0.0: transfer_rate = self.size / self.load_time @@ -461,11 +438,7 @@ def dump(self) -> dict[str, Any]: dump_dict = {} for f in fields(self): value = getattr(self, f.name) - dump_dict[f.name] = ( - value.dump() - if hasattr(value, "dump") and callable(value.dump) - else value - ) + dump_dict[f.name] = value.dump() if hasattr(value, "dump") and callable(value.dump) else value return dump_dict @@ -548,9 +521,7 @@ def sleep(base_url: str, level: int, timeout: float): url = urljoin(base_url, "sleep") response = requests.post(url, params={"level": str(level)}, timeout=timeout) if response.status_code != 200: - raise RuntimeError( - f"sleep level {level} url {url} error code {response.status_code}." - ) + raise RuntimeError(f"sleep level {level} url {url} error code {response.status_code}.") sleeping = False start = time.perf_counter() @@ -595,23 +566,17 @@ def wake(base_url: str, timeout: float): raise RuntimeError(f"Server failed sleeping status after {elapsed} secs.") -def get_vllm_pod_info( - v1: client.CoreV1Api, namespace: str, deployment_name: str -) -> dict[str, str]: +def get_vllm_pod_info(v1: client.CoreV1Api, namespace: str, deployment_name: str) -> dict[str, str]: """get vllm pod name""" selectors = get_deployment_selectors(namespace, deployment_name) if len(selectors) == 0: - raise RuntimeError( - f"No deployment selectors for deployment {deployment_name} on namespace {namespace}." - ) + raise RuntimeError(f"No deployment selectors for deployment {deployment_name} on namespace {namespace}.") selector = selectors[0] pod_infos = get_pod_infos(v1, namespace, selector) if len(pod_infos) == 0: - raise RuntimeError( - f"No pods found on namespace {namespace} with selector 'app={selector}'." - ) + raise RuntimeError(f"No pods found on namespace {namespace} with selector 'app={selector}'.") return pod_infos[0] @@ -619,13 +584,9 @@ def get_vllm_pod_info( def get_deployment_selectors(namespace: str, name: str) -> list[str]: """get deployment label selectors based on prefix""" - deployment = client.AppsV1Api().read_namespaced_deployment( - name=name, namespace=namespace - ) + deployment = client.AppsV1Api().read_namespaced_deployment(name=name, namespace=namespace) if deployment is None: - raise RuntimeError( - f"No deployment found with name {name} on namespace {namespace}." - ) + raise RuntimeError(f"No deployment found with name {name} on namespace {namespace}.") selectors = [] if deployment.spec.selector and deployment.spec.selector.match_labels: dict_selector = deployment.spec.selector.match_labels @@ -635,14 +596,10 @@ def get_deployment_selectors(namespace: str, name: str) -> list[str]: return selectors -def get_pod_infos( - v1: client.CoreV1Api, namespace: str, selector: str -) -> list[dict[str, str]]: +def get_pod_infos(v1: client.CoreV1Api, namespace: str, selector: str) -> list[dict[str, str]]: """get pods by selector""" - pod_list = v1.list_namespaced_pod( - namespace=namespace, label_selector=f"app={selector}" - ) + pod_list = v1.list_namespaced_pod(namespace=namespace, label_selector=f"app={selector}") pod_infos = [] for pod in pod_list.items: image = pod.spec.containers[0].image @@ -655,9 +612,7 @@ def get_pod_infos( def get_pod_logs(v1: client.CoreV1Api, namespace: str, pod_name: str) -> bytes: """get pod logs""" - response = v1.read_namespaced_pod_log( - name=pod_name, namespace=namespace, pretty=False, _preload_content=False - ) + response = v1.read_namespaced_pod_log(name=pod_name, namespace=namespace, pretty=False, _preload_content=False) return response.data @@ -684,9 +639,7 @@ def extract_datetime(log_line: str) -> datetime | None: return None -def initialize_benchmark_categories( - defined_categories: list[Any], parent: BenchmarkCategory -) -> BenchmarkCategory: +def initialize_benchmark_categories(defined_categories: list[Any], parent: BenchmarkCategory) -> BenchmarkCategory: """initialize categories""" root_benchmark_category = None prev_benchmark_category = None @@ -700,15 +653,10 @@ def initialize_benchmark_categories( benchmark_category.title = defined_category.get("title") benchmark_category.defined = True - benchmark_category.start.pattern = re.compile( - rf"{defined_category.get('start')}" - ) + benchmark_category.start.pattern = re.compile(rf"{defined_category.get('start')}") benchmark_category.end.pattern = re.compile(rf"{defined_category.get('end')}") benchmark_category.parent = parent - if ( - benchmark_category.parent is not None - and benchmark_category.parent.root_child is None - ): + if benchmark_category.parent is not None and benchmark_category.parent.root_child is None: benchmark_category.parent.root_child = benchmark_category defined_children = defined_category.get("children") @@ -733,9 +681,7 @@ def get_log_list(logs: str) -> list[LogLine]: return log_list -def get_log_list_per_process( - vllm_model: str, log_list: list[LogLine] -) -> dict[BenchmarkProcess, list[LogLine]]: +def get_log_list_per_process(vllm_model: str, log_list: list[LogLine]) -> dict[BenchmarkProcess, list[LogLine]]: """get log list divided by Process""" tensorizer_serialization_end = f"End model {vllm_model} serialization" @@ -786,12 +732,10 @@ def populate_benchmark_categories( ): """populate categories from log lines""" - for _, log_list_process in log_list_per_process.items(): + for log_list_process in log_list_per_process.values(): index = 0 while index < len(log_list_process): - index = populate_benchmark_category( - index, log_list_process, root_benchmark_category - ) + index = populate_benchmark_category(index, log_list_process, root_benchmark_category) index += 1 @@ -826,9 +770,7 @@ def add_uncategorized_categories(benchmark_category: BenchmarkCategory): category = category.next -def populate_benchmark_category( - index: int, log_list: list[LogLine], benchmark_category: BenchmarkCategory -) -> int: +def populate_benchmark_category(index: int, log_list: list[LogLine], benchmark_category: BenchmarkCategory) -> int: """populate category from log line""" category = benchmark_category @@ -904,8 +846,7 @@ def parse_logs(logs: str) -> BenchmarkResult: and benchmark_result.metrics.gpu_in_use != 0 and benchmark_result.metrics.wake != 0 and ( - benchmark_result.metrics.load_cached_compiled_graph != 0 - or benchmark_result.metrics.compile_graph != 0 + benchmark_result.metrics.load_cached_compiled_graph != 0 or benchmark_result.metrics.compile_graph != 0 ) ): break @@ -918,9 +859,7 @@ def parse_logs(logs: str) -> BenchmarkResult: start_index += len(server_non_default_args) args = line[start_index:].strip() try: - benchmark_result.scenario.platform.engine.args = ast.literal_eval( - args - ) + benchmark_result.scenario.platform.engine.args = ast.literal_eval(args) except Exception: logger.exception( "log args dict parsing returned error converting: %s", @@ -945,9 +884,7 @@ def parse_logs(logs: str) -> BenchmarkResult: end_index = line.find(",", start_index) if end_index >= 0: format_value = line[start_index:end_index].strip() - benchmark_result.scenario.load_format = ( - LoadFormat.loadformat_from_value(format_value) - ) + benchmark_result.scenario.load_format = LoadFormat.loadformat_from_value(format_value) if benchmark_result.metrics.load_time == 0: floats = find_floats_in_line(model_load_string, line) @@ -975,10 +912,7 @@ def parse_logs(logs: str) -> BenchmarkResult: benchmark_result.metrics.wake = floats[0] continue - if ( - benchmark_result.metrics.load_cached_compiled_graph == 0 - and benchmark_result.metrics.compile_graph == 0 - ): + if benchmark_result.metrics.load_cached_compiled_graph == 0 and benchmark_result.metrics.compile_graph == 0: floats = find_floats_in_line(cached_compiled_graph, line) if len(floats) > 0: benchmark_result.metrics.load_cached_compiled_graph = floats[0] @@ -1036,9 +970,7 @@ def convert_result(result_filepath: str, output_filepath: str) -> tuple[str, str logger.exception("convert.py returned error converting: %s", result_filepath) -def write_benchmark_categories_to_log( - level: int, benchmark_category: BenchmarkCategory, file: io.BufferedWriter -): +def write_benchmark_categories_to_log(level: int, benchmark_category: BenchmarkCategory, file: io.BufferedWriter): """write benchmark category tree log""" blank_string = " " * level if level > 0 else "" category = benchmark_category @@ -1059,22 +991,18 @@ def write_benchmark_categories_to_log( time_format = "%m-%d %H:%M:%S.%f" date_str = ( category.start.log_line.time.strftime(time_format)[:-3] - if category.start.log_line is not None - and category.start.log_line.time is not None + if category.start.log_line is not None and category.start.log_line.time is not None else "" ) file.write(f"{blank_string} Start date : '{date_str}'\n") date_str = ( category.end.log_line.time.strftime(time_format)[:-3] - if category.end.log_line is not None - and category.end.log_line.time is not None + if category.end.log_line is not None and category.end.log_line.time is not None else "" ) file.write(f"{blank_string} End date : '{date_str}'\n") file.write(f"{blank_string} Elapsed : {elapsed}\n") - file.write( - f"{blank_string} Start pattern: '{category.start.pattern_desc()}'\n" - ) + file.write(f"{blank_string} Start pattern: '{category.start.pattern_desc()}'\n") file.write(f"{blank_string} End pattern : '{category.end.pattern_desc()}'\n") if category.start.log_line is None: file.write(f"{blank_string} Start line :\n") @@ -1087,8 +1015,7 @@ def write_benchmark_categories_to_log( file.write(f"{blank_string} End line :\n") else: file.write( - f"{blank_string} End line : " - f"{category.end.log_line.line_number} '{category.end.log_line.line}'\n" + f"{blank_string} End line : {category.end.log_line.line_number} '{category.end.log_line.line}'\n" ) if category.root_child is not None: write_benchmark_categories_to_log(level + 1, category.root_child, file) @@ -1148,9 +1075,7 @@ def main(): pod_info["image"], ) except Exception as e: - logger.info( - "Skipping harness because vLLM standalone pod not found: %s", str(e) - ) + logger.info("Skipping harness because vLLM standalone pod not found: %s", str(e)) return vllm_version = get_vllm_version(endpoint_url, REQUEST_TIMEOUT) @@ -1199,12 +1124,8 @@ def main(): # write log categories log file log_categories_filepath = os.path.join(requests_dir, "categories.log") with open(log_categories_filepath, "w", encoding="utf-8", newline="") as file: - write_benchmark_categories_to_log( - 0, benchmark_result.metrics.root_category, file - ) - logger.info( - "benchmark categories log file saved to path: %s", log_categories_filepath - ) + write_benchmark_categories_to_log(0, benchmark_result.metrics.root_category, file) + logger.info("benchmark categories log file saved to path: %s", log_categories_filepath) benchmark_result.metrics.time.start = start_time benchmark_result.metrics.time.stop = datetime.now().timestamp() diff --git a/workload/harnesses/vllm-benchmark-llm-d-benchmark.sh b/llm_d_benchmark/workload/harnesses/vllm-benchmark-llm-d-benchmark.sh similarity index 100% rename from workload/harnesses/vllm-benchmark-llm-d-benchmark.sh rename to llm_d_benchmark/workload/harnesses/vllm-benchmark-llm-d-benchmark.sh diff --git a/workload/profiles/fmperf/large_model_long_input.yaml.in b/llm_d_benchmark/workload/profiles/fmperf/large_model_long_input.yaml.in similarity index 100% rename from workload/profiles/fmperf/large_model_long_input.yaml.in rename to llm_d_benchmark/workload/profiles/fmperf/large_model_long_input.yaml.in diff --git a/workload/profiles/fmperf/medium_model_long_input.yaml.in b/llm_d_benchmark/workload/profiles/fmperf/medium_model_long_input.yaml.in similarity index 100% rename from workload/profiles/fmperf/medium_model_long_input.yaml.in rename to llm_d_benchmark/workload/profiles/fmperf/medium_model_long_input.yaml.in diff --git a/workload/profiles/fmperf/sanity_long-input.yaml.in b/llm_d_benchmark/workload/profiles/fmperf/sanity_long-input.yaml.in similarity index 100% rename from workload/profiles/fmperf/sanity_long-input.yaml.in rename to llm_d_benchmark/workload/profiles/fmperf/sanity_long-input.yaml.in diff --git a/workload/profiles/fmperf/sanity_sharegpt.yaml.in b/llm_d_benchmark/workload/profiles/fmperf/sanity_sharegpt.yaml.in similarity index 100% rename from workload/profiles/fmperf/sanity_sharegpt.yaml.in rename to llm_d_benchmark/workload/profiles/fmperf/sanity_sharegpt.yaml.in diff --git a/workload/profiles/fmperf/sanity_short-input.yaml.in b/llm_d_benchmark/workload/profiles/fmperf/sanity_short-input.yaml.in similarity index 100% rename from workload/profiles/fmperf/sanity_short-input.yaml.in rename to llm_d_benchmark/workload/profiles/fmperf/sanity_short-input.yaml.in diff --git a/workload/profiles/fmperf/small_model_long_input.yaml.in b/llm_d_benchmark/workload/profiles/fmperf/small_model_long_input.yaml.in similarity index 100% rename from workload/profiles/fmperf/small_model_long_input.yaml.in rename to llm_d_benchmark/workload/profiles/fmperf/small_model_long_input.yaml.in diff --git a/workload/profiles/guidellm/sanity_concurrent.yaml.in b/llm_d_benchmark/workload/profiles/guidellm/sanity_concurrent.yaml.in similarity index 100% rename from workload/profiles/guidellm/sanity_concurrent.yaml.in rename to llm_d_benchmark/workload/profiles/guidellm/sanity_concurrent.yaml.in diff --git a/workload/profiles/inference-perf/chatbot_sharegpt.yaml.in b/llm_d_benchmark/workload/profiles/inference-perf/chatbot_sharegpt.yaml.in similarity index 100% rename from workload/profiles/inference-perf/chatbot_sharegpt.yaml.in rename to llm_d_benchmark/workload/profiles/inference-perf/chatbot_sharegpt.yaml.in diff --git a/workload/profiles/inference-perf/chatbot_synthetic.yaml.in b/llm_d_benchmark/workload/profiles/inference-perf/chatbot_synthetic.yaml.in similarity index 100% rename from workload/profiles/inference-perf/chatbot_synthetic.yaml.in rename to llm_d_benchmark/workload/profiles/inference-perf/chatbot_synthetic.yaml.in diff --git a/workload/profiles/inference-perf/code_completion_synthetic.yaml.in b/llm_d_benchmark/workload/profiles/inference-perf/code_completion_synthetic.yaml.in similarity index 100% rename from workload/profiles/inference-perf/code_completion_synthetic.yaml.in rename to llm_d_benchmark/workload/profiles/inference-perf/code_completion_synthetic.yaml.in diff --git a/workload/profiles/inference-perf/sanity_random.yaml.in b/llm_d_benchmark/workload/profiles/inference-perf/sanity_random.yaml.in similarity index 100% rename from workload/profiles/inference-perf/sanity_random.yaml.in rename to llm_d_benchmark/workload/profiles/inference-perf/sanity_random.yaml.in diff --git a/workload/profiles/inference-perf/shared_prefix_synthetic.yaml.in b/llm_d_benchmark/workload/profiles/inference-perf/shared_prefix_synthetic.yaml.in similarity index 100% rename from workload/profiles/inference-perf/shared_prefix_synthetic.yaml.in rename to llm_d_benchmark/workload/profiles/inference-perf/shared_prefix_synthetic.yaml.in diff --git a/workload/profiles/inference-perf/shared_prefix_synthetic_short.yaml.in b/llm_d_benchmark/workload/profiles/inference-perf/shared_prefix_synthetic_short.yaml.in similarity index 100% rename from workload/profiles/inference-perf/shared_prefix_synthetic_short.yaml.in rename to llm_d_benchmark/workload/profiles/inference-perf/shared_prefix_synthetic_short.yaml.in diff --git a/workload/profiles/inference-perf/summarization_synthetic.yaml.in b/llm_d_benchmark/workload/profiles/inference-perf/summarization_synthetic.yaml.in similarity index 100% rename from workload/profiles/inference-perf/summarization_synthetic.yaml.in rename to llm_d_benchmark/workload/profiles/inference-perf/summarization_synthetic.yaml.in diff --git a/workload/profiles/nop/nop.yaml.in b/llm_d_benchmark/workload/profiles/nop/nop.yaml.in similarity index 100% rename from workload/profiles/nop/nop.yaml.in rename to llm_d_benchmark/workload/profiles/nop/nop.yaml.in diff --git a/workload/profiles/vllm-benchmark/random_concurrent.yaml.in b/llm_d_benchmark/workload/profiles/vllm-benchmark/random_concurrent.yaml.in similarity index 100% rename from workload/profiles/vllm-benchmark/random_concurrent.yaml.in rename to llm_d_benchmark/workload/profiles/vllm-benchmark/random_concurrent.yaml.in diff --git a/workload/profiles/vllm-benchmark/sanity_random.yaml.in b/llm_d_benchmark/workload/profiles/vllm-benchmark/sanity_random.yaml.in similarity index 100% rename from workload/profiles/vllm-benchmark/sanity_random.yaml.in rename to llm_d_benchmark/workload/profiles/vllm-benchmark/sanity_random.yaml.in diff --git a/workload/report/README.md b/llm_d_benchmark/workload/report/README.md similarity index 100% rename from workload/report/README.md rename to llm_d_benchmark/workload/report/README.md diff --git a/llm_d_benchmark/workload/report/convert.py b/llm_d_benchmark/workload/report/convert.py new file mode 100755 index 00000000..dd3b2142 --- /dev/null +++ b/llm_d_benchmark/workload/report/convert.py @@ -0,0 +1,1184 @@ +#!/usr/bin/env python3 + +# This script imports data from a benchmark run in llm-d-benchmark using any +# supported harness, and converts the results into a data file with a standard +# benchmark report format. This format can then be used for post processing +# that is not specialized to a particular harness. + +import argparse +import base64 +import datetime +import os +import re +import sys +from typing import Any + +import numpy as np +import yaml +from schema import BenchmarkReport, Units, WorkloadGenerator +from scipy import stats + + +def check_file(file_path: str) -> None: + """Make sure regular file exists. + + Args: + file_path (str): File to check. + """ + if not os.path.exists(file_path): + sys.stderr.write("File does not exist: %s\n" % file_path) + exit(2) + if not os.path.isfile(file_path): + sys.stderr.write("Not a regular file: %s\n" % file_path) + exit(2) + + +def import_yaml(file_path: str) -> dict[Any, Any]: + """Import a JSON/YAML file as a dict. + + Args: + file_path (str): Path to JSON/YAML file. + + Returns: + dict: Imported data. + """ + check_file(file_path) + with open(file_path, "r", encoding="UTF-8") as file: + data = yaml.safe_load(file) + return data + + +def import_csv_with_header(file_path: str) -> dict[str, list[Any]]: + """Import a CSV file where the first line is a header. + + Args: + file_path (str): Path to CSV file. + + Returns: + dict: Imported data where the header provides key names. + """ + check_file(file_path) + with open(file_path, "r", encoding="UTF-8") as file: + for ii, line in enumerate(file): + if ii == 0: + headers: list[str] = list(map(str.strip, line.split(","))) + data: dict[str, list[Any]] = {} + for hdr in headers: + data[hdr] = [] + continue + row_vals = list(map(str.strip, line.split(","))) + if len(row_vals) != len(headers): + sys.stderr.write( + 'Warning: line %d of "%s" does not match header length, skipping: %d != %d\n' + % (ii + 1, file_path, len(row_vals), len(headers)) + ) + continue + for jj, val in enumerate(row_vals): + # Try converting the value to an int or float + try: + val = int(val) + except ValueError: + try: + val = float(val) + except ValueError: + pass + data[headers[jj]].append(val) + # Convert lists of ints or floats to numpy arrays + for hdr in headers: + if isinstance(data[hdr][0], int) or isinstance(data[hdr][0], float): + data[hdr] = np.array(data[hdr]) + return data + + +def update_dict(dest: dict[Any, Any], source: dict[Any, Any]) -> None: + """Deep update a dict using values from another dict. If a value is a dict, + then update that dict, otherwise overwrite with the new value. + + Args: + dest (dict): dict to update. + source (dict): dict with new values to add to dest. + """ + for key, val in source.items(): + if key in dest and isinstance(dest[key], dict): + if not val: + # Do not "update" with null values + continue + if not isinstance(val, dict): + raise Exception("Cannot update dict type with non-dict: %s" % val) + update_dict(dest[key], val) + else: + dest[key] = val + + +def _get_llmd_benchmark_envars() -> dict: + """Get information from environment variables for the benchmark report. + + Returns: + dict: Imported data about scenario following schema of BenchmarkReport. + """ + # We make the assumption that if the environment variable + # LLMDBENCH_MAGIC_ENVAR is defined, then we are inside a harness pod. + if "LLMDBENCH_MAGIC_ENVAR" not in os.environ: + # We are not in a harness pod + return {} + + if "LLMDBENCH_DEPLOY_METHODS" not in os.environ: + sys.stderr.write("Warning: LLMDBENCH_DEPLOY_METHODS undefined, cannot determine deployment method.") + return {} + + if os.environ["LLMDBENCH_DEPLOY_METHODS"] == "standalone": + # Given a 'standalone' deployment, we expect the following environment + # variables to be available + return { + "scenario": { + "model": {"name": os.environ["LLMDBENCH_DEPLOY_CURRENT_MODEL"]}, + "host": { + "type": ["replica"] * int(os.environ["LLMDBENCH_VLLM_COMMON_REPLICAS"]), + "accelerator": [ + { + "model": os.environ["LLMDBENCH_VLLM_COMMON_AFFINITY"].split(":", 1)[-1], + "count": int(os.environ["LLMDBENCH_VLLM_COMMON_TENSOR_PARALLELISM"]) + * int(os.environ["LLMDBENCH_VLLM_COMMON_DATA_PARALLELISM"]), + "parallelism": { + "tp": int(os.environ["LLMDBENCH_VLLM_COMMON_TENSOR_PARALLELISM"]), + "dp": int(os.environ["LLMDBENCH_VLLM_COMMON_DATA_PARALLELISM"]), + }, + } + ] + * int(os.environ["LLMDBENCH_VLLM_COMMON_REPLICAS"]), + }, + "platform": { + "engine": [ + { + "name": os.environ["LLMDBENCH_VLLM_STANDALONE_IMAGE_REGISTRY"] + + "/" + + os.environ["LLMDBENCH_VLLM_STANDALONE_IMAGE_REPO"] + + "/" + + os.environ["LLMDBENCH_VLLM_STANDALONE_IMAGE_NAME"] + + ":" + + os.environ["LLMDBENCH_VLLM_STANDALONE_IMAGE_TAG"], + } + ] + * int(os.environ["LLMDBENCH_VLLM_COMMON_REPLICAS"]) + }, + "metadata": { + "load_format": os.environ["LLMDBENCH_VLLM_STANDALONE_VLLM_LOAD_FORMAT"], + "logging_level": os.environ["LLMDBENCH_VLLM_STANDALONE_VLLM_LOGGING_LEVEL"], + "vllm_server_dev_mode": os.environ["LLMDBENCH_VLLM_STANDALONE_VLLM_SERVER_DEV_MODE"], + "preprocess": os.environ["LLMDBENCH_VLLM_STANDALONE_PREPROCESS"], + }, + }, + } + + if os.environ["LLMDBENCH_DEPLOY_METHODS"] == "modelservice": + # Given a 'modelservice' deployment, we expect the following environment + # variables to be available + + # Get EPP configuration + epp_config = {} + epp_config_content = os.getenv("LLMDBENCH_VLLM_MODELSERVICE_GAIE_PRESETS_CONFIG", "") + if epp_config_content == "": + sys.stderr.write("Warning: LLMDBENCH_VLLM_MODELSERVICE_GAIE_PRESETS_CONFIG empty.") + else: + epp_config_content = base64.b64decode(epp_config_content).decode("utf-8") + epp_config = yaml.safe_load(epp_config_content) + + # Insert default parameter values for scorers if left undefined + for ii, plugin in enumerate(epp_config["plugins"]): + if plugin["type"] == "prefix-cache-scorer": + if "parameters" not in plugin: + plugin["parameters"] = {} + + parameters = plugin["parameters"] + if "blockSize" not in parameters: + parameters["blockSize"] = 16 + if "maxPrefixBlocksToMatch" not in parameters: + parameters["maxPrefixBlocksToMatch"] = 256 + if "lruCapacityPerServer" not in parameters: + parameters["lruCapacityPerServer"] = 31250 + + epp_config["plugins"][ii]["parameters"] = parameters + + return { + "scenario": { + "model": {"name": os.environ["LLMDBENCH_DEPLOY_CURRENT_MODEL"]}, + "host": { + "type": ["prefill"] * int(os.environ["LLMDBENCH_VLLM_MODELSERVICE_PREFILL_REPLICAS"]) + + ["decode"] * int(os.environ["LLMDBENCH_VLLM_MODELSERVICE_DECODE_REPLICAS"]), + "accelerator": [ + { + "model": os.environ["LLMDBENCH_VLLM_COMMON_AFFINITY"].split(":", 1)[-1], + "count": int(os.environ["LLMDBENCH_VLLM_MODELSERVICE_PREFILL_TENSOR_PARALLELISM"]) + * int(os.environ["LLMDBENCH_VLLM_MODELSERVICE_PREFILL_DATA_PARALLELISM"]), + "parallelism": { + "tp": int(os.environ["LLMDBENCH_VLLM_MODELSERVICE_PREFILL_TENSOR_PARALLELISM"]), + "dp": int(os.environ["LLMDBENCH_VLLM_MODELSERVICE_PREFILL_DATA_PARALLELISM"]), + }, + } + ] + * int(os.environ["LLMDBENCH_VLLM_MODELSERVICE_PREFILL_REPLICAS"]) + + [ + { + "model": os.environ["LLMDBENCH_VLLM_COMMON_AFFINITY"].split(":", 1)[-1], + "count": int(os.environ["LLMDBENCH_VLLM_MODELSERVICE_DECODE_TENSOR_PARALLELISM"]) + * int(os.environ["LLMDBENCH_VLLM_MODELSERVICE_DECODE_DATA_PARALLELISM"]), + "parallelism": { + "tp": int(os.environ["LLMDBENCH_VLLM_MODELSERVICE_DECODE_TENSOR_PARALLELISM"]), + "dp": int(os.environ["LLMDBENCH_VLLM_MODELSERVICE_DECODE_DATA_PARALLELISM"]), + }, + } + ] + * int(os.environ["LLMDBENCH_VLLM_MODELSERVICE_DECODE_REPLICAS"]), + }, + "platform": { + "metadata": { + "inferenceScheduler": epp_config, + }, + "engine": [ + { + "name": os.environ["LLMDBENCH_LLMD_IMAGE_REGISTRY"] + + "/" + + os.environ["LLMDBENCH_LLMD_IMAGE_REPO"] + + "/" + + os.environ["LLMDBENCH_LLMD_IMAGE_NAME"] + + ":" + + os.environ["LLMDBENCH_LLMD_IMAGE_TAG"], + } + ] + * ( + int(os.environ["LLMDBENCH_VLLM_MODELSERVICE_PREFILL_REPLICAS"]) + + int(os.environ["LLMDBENCH_VLLM_MODELSERVICE_DECODE_REPLICAS"]) + ), + }, + }, + } + + # Pre-existing deployment, cannot extract details about unknown inference + # service environment + sys.stderr.write( + 'Warning: LLMDBENCH_DEPLOY_METHODS is not "modelservice" or "standalone", cannot extract environmental details.' + ) + return {} + + +def import_benchmark_report(br_file: str) -> BenchmarkReport: + """Import benchmark report, and supplement with additional data from llm-d-benchmark run. + + Args: + br_file (str): Benchmark report file to import. + + Returns: + BenchmarkReport: Imported benchmark report supplemented with run data. + """ + check_file(br_file) + + # Import benchmark report as a dict following the schema of BenchmarkReport + br_dict = import_yaml(br_file) + + return BenchmarkReport(**br_dict) + + +def _vllm_timestamp_to_epoch(date_str: str) -> int: + """Convert timestamp from vLLM benchmark into seconds from Unix epoch. + + String format is YYYYMMDD-HHMMSS in UTC. + + Args: + date_str (str): Timestamp from vLLM benchmark. + + Returns: + int: Seconds from Unix epoch. + """ + date_str = date_str.strip() + if not re.search("[0-9]{8}-[0-9]{6}", date_str): + raise Exception("Invalid date format: %s" % date_str) + year = int(date_str[0:4]) + month = int(date_str[4:6]) + day = int(date_str[6:8]) + hour = int(date_str[9:11]) + minute = int(date_str[11:13]) + second = int(date_str[13:15]) + return datetime.datetime(year, month, day, hour, minute, second).timestamp() + + +def import_vllm_benchmark(results_file: str) -> BenchmarkReport: + """Import data from a vLLM benchmark run as a BenchmarkReport. + + Args: + results_file (str): Results file to import. + + Returns: + BenchmarkReport: Imported data. + """ + check_file(results_file) + + # Import results file from vLLM benchmark + results = import_yaml(results_file) + + # Get environment variables from llm-d-benchmark run as a dict following the + # schema of BenchmarkReport + br_dict = _get_llmd_benchmark_envars() + # Append to that dict the data from vLLM benchmark. + # This section assumes metric-percentiles contains at least the values + # "0.1,1,5,10,25,75,90,95,99,99.9". If any of these values are missing, we + # will crash with a KeyError. + update_dict( + br_dict, + { + "scenario": { + "model": {"name": results["model_id"]}, + "load": { + "name": WorkloadGenerator.VLLM_BENCHMARK, + "args": { + "num_prompts": results["num_prompts"], + "request_rate": results["request_rate"], + "burstiness": results["burstiness"], + "max_concurrency": results["max_concurrency"], + }, + }, + }, + "metrics": { + "time": { + "duration": results["duration"], + "start": _vllm_timestamp_to_epoch(results["date"]), + }, + "requests": { + "total": results["completed"], + "input_length": { + "units": Units.COUNT, + "mean": results["total_input_tokens"] / results["completed"], + }, + "output_length": { + "units": Units.COUNT, + "mean": results["total_output_tokens"] / results["completed"], + }, + }, + "latency": { + "time_to_first_token": { + "units": Units.MS, + "mean": results["mean_ttft_ms"], + "stddev": results["std_ttft_ms"], + "p00p1": results["p0.1_ttft_ms"], + "p01": results["p1_ttft_ms"], + "p05": results["p5_ttft_ms"], + "p10": results["p10_ttft_ms"], + "P25": results["p25_ttft_ms"], + "p50": results["median_ttft_ms"], + "p75": results["p75_ttft_ms"], + "p90": results["p90_ttft_ms"], + "p95": results["p95_ttft_ms"], + "p99": results["p99_ttft_ms"], + "p99p9": results["p99.9_ttft_ms"], + }, + "time_per_output_token": { + "units": Units.MS_PER_TOKEN, + "mean": results["mean_tpot_ms"], + "stddev": results["std_tpot_ms"], + "p00p1": results["p0.1_tpot_ms"], + "p01": results["p1_tpot_ms"], + "p05": results["p5_tpot_ms"], + "p10": results["p10_tpot_ms"], + "P25": results["p25_tpot_ms"], + "p50": results["median_tpot_ms"], + "p75": results["p75_tpot_ms"], + "p90": results["p90_tpot_ms"], + "p95": results["p95_tpot_ms"], + "p99": results["p99_tpot_ms"], + "p99p9": results["p99.9_tpot_ms"], + }, + "inter_token_latency": { + "units": Units.MS_PER_TOKEN, + "mean": results["mean_itl_ms"], + "stddev": results["std_itl_ms"], + "p00p1": results["p0.1_itl_ms"], + "p01": results["p1_itl_ms"], + "p05": results["p5_itl_ms"], + "p10": results["p10_itl_ms"], + "P25": results["p25_itl_ms"], + "p90": results["p90_itl_ms"], + "p95": results["p95_itl_ms"], + "p99": results["p99_itl_ms"], + "p99p9": results["p99.9_itl_ms"], + }, + "request_latency": { + "units": Units.MS, + "mean": results["mean_e2el_ms"], + "stddev": results["std_e2el_ms"], + "p00p1": results["p0.1_e2el_ms"], + "p01": results["p1_e2el_ms"], + "p05": results["p5_e2el_ms"], + "p10": results["p10_e2el_ms"], + "P25": results["p25_e2el_ms"], + "p90": results["p90_e2el_ms"], + "p95": results["p95_e2el_ms"], + "p99": results["p99_e2el_ms"], + "p99p9": results["p99.9_e2el_ms"], + }, + }, + "throughput": { + "output_tokens_per_sec": results["output_throughput"], + "total_tokens_per_sec": results["total_token_throughput"], + "requests_per_sec": results["request_throughput"], + }, + }, + }, + ) + + return BenchmarkReport(**br_dict) + + +def import_guidellm(results_file: str) -> BenchmarkReport: + """Import data from a GuideLLM run as a BenchmarkReport. + + Args: + results_file (str): Results file to import. + + Returns: + BenchmarkReport: Imported data. + """ + check_file(results_file) + + # Everything falls under ['benchmarks'][0], so just grab that part + results = import_yaml(results_file)["benchmarks"][0] + + # Get environment variables from llm-d-benchmark run as a dict following the + # schema of BenchmarkReport + br_dict = _get_llmd_benchmark_envars() + # Append to that dict the data from GuideLLM + update_dict( + br_dict, + { + "scenario": { + "model": {"name": results["worker"]["backend_model"]}, + "load": { + "name": WorkloadGenerator.GUIDELLM, + "args": results["args"], + }, + }, + "metrics": { + "time": { + "duration": results["duration"], + "start": results["start_time"], + "stop": results["end_time"], + }, + "requests": { + "total": results["request_totals"]["total"], + "failures": results["request_totals"]["errored"], + "incomplete": results["request_totals"]["incomplete"], + "input_length": { + "units": Units.COUNT, + "mean": results["metrics"]["prompt_token_count"]["successful"]["mean"], + "mode": results["metrics"]["prompt_token_count"]["successful"]["mode"], + "stddev": results["metrics"]["prompt_token_count"]["successful"]["std_dev"], + "min": results["metrics"]["prompt_token_count"]["successful"]["min"], + "p0p1": results["metrics"]["prompt_token_count"]["successful"]["percentiles"]["p001"], + "p1": results["metrics"]["prompt_token_count"]["successful"]["percentiles"]["p01"], + "p5": results["metrics"]["prompt_token_count"]["successful"]["percentiles"]["p05"], + "p10": results["metrics"]["prompt_token_count"]["successful"]["percentiles"]["p10"], + "p25": results["metrics"]["prompt_token_count"]["successful"]["percentiles"]["p25"], + "p50": results["metrics"]["prompt_token_count"]["successful"]["percentiles"]["p50"], + "p75": results["metrics"]["prompt_token_count"]["successful"]["percentiles"]["p75"], + "p90": results["metrics"]["prompt_token_count"]["successful"]["percentiles"]["p90"], + "p95": results["metrics"]["prompt_token_count"]["successful"]["percentiles"]["p95"], + "p99": results["metrics"]["prompt_token_count"]["successful"]["percentiles"]["p99"], + "p99p9": results["metrics"]["prompt_token_count"]["successful"]["percentiles"]["p999"], + "max": results["metrics"]["prompt_token_count"]["successful"]["max"], + }, + "output_length": { + "units": Units.COUNT, + "mean": results["metrics"]["output_token_count"]["successful"]["mean"], + "mode": results["metrics"]["output_token_count"]["successful"]["mode"], + "stddev": results["metrics"]["output_token_count"]["successful"]["std_dev"], + "min": results["metrics"]["output_token_count"]["successful"]["min"], + "p0p1": results["metrics"]["output_token_count"]["successful"]["percentiles"]["p001"], + "p1": results["metrics"]["output_token_count"]["successful"]["percentiles"]["p01"], + "p5": results["metrics"]["output_token_count"]["successful"]["percentiles"]["p05"], + "p10": results["metrics"]["output_token_count"]["successful"]["percentiles"]["p10"], + "p25": results["metrics"]["output_token_count"]["successful"]["percentiles"]["p25"], + "p50": results["metrics"]["output_token_count"]["successful"]["percentiles"]["p50"], + "p75": results["metrics"]["output_token_count"]["successful"]["percentiles"]["p75"], + "p90": results["metrics"]["output_token_count"]["successful"]["percentiles"]["p90"], + "p95": results["metrics"]["output_token_count"]["successful"]["percentiles"]["p95"], + "p99": results["metrics"]["output_token_count"]["successful"]["percentiles"]["p99"], + "p99p9": results["metrics"]["output_token_count"]["successful"]["percentiles"]["p999"], + "max": results["metrics"]["output_token_count"]["successful"]["max"], + }, + }, + "latency": { + "time_to_first_token": { + "units": Units.MS, + "mean": results["metrics"]["time_to_first_token_ms"]["successful"]["mean"], + "mode": results["metrics"]["time_to_first_token_ms"]["successful"]["mode"], + "stddev": results["metrics"]["time_to_first_token_ms"]["successful"]["std_dev"], + "min": results["metrics"]["time_to_first_token_ms"]["successful"]["min"], + "p0p1": results["metrics"]["time_to_first_token_ms"]["successful"]["percentiles"]["p001"], + "p1": results["metrics"]["time_to_first_token_ms"]["successful"]["percentiles"]["p01"], + "p5": results["metrics"]["time_to_first_token_ms"]["successful"]["percentiles"]["p05"], + "p10": results["metrics"]["time_to_first_token_ms"]["successful"]["percentiles"]["p10"], + "p25": results["metrics"]["time_to_first_token_ms"]["successful"]["percentiles"]["p25"], + "p50": results["metrics"]["time_to_first_token_ms"]["successful"]["percentiles"]["p50"], + "p75": results["metrics"]["time_to_first_token_ms"]["successful"]["percentiles"]["p75"], + "p90": results["metrics"]["time_to_first_token_ms"]["successful"]["percentiles"]["p90"], + "p95": results["metrics"]["time_to_first_token_ms"]["successful"]["percentiles"]["p95"], + "p99": results["metrics"]["time_to_first_token_ms"]["successful"]["percentiles"]["p99"], + "p99p9": results["metrics"]["time_to_first_token_ms"]["successful"]["percentiles"]["p999"], + "max": results["metrics"]["time_to_first_token_ms"]["successful"]["max"], + }, + "time_per_output_token": { + "units": Units.MS_PER_TOKEN, + "mean": results["metrics"]["time_per_output_token_ms"]["successful"]["mean"], + "mode": results["metrics"]["time_per_output_token_ms"]["successful"]["mode"], + "stddev": results["metrics"]["time_per_output_token_ms"]["successful"]["std_dev"], + "min": results["metrics"]["time_per_output_token_ms"]["successful"]["min"], + "p0p1": results["metrics"]["time_per_output_token_ms"]["successful"]["percentiles"]["p001"], + "p1": results["metrics"]["time_per_output_token_ms"]["successful"]["percentiles"]["p01"], + "p5": results["metrics"]["time_per_output_token_ms"]["successful"]["percentiles"]["p05"], + "p10": results["metrics"]["time_per_output_token_ms"]["successful"]["percentiles"]["p10"], + "p25": results["metrics"]["time_per_output_token_ms"]["successful"]["percentiles"]["p25"], + "p50": results["metrics"]["time_per_output_token_ms"]["successful"]["percentiles"]["p50"], + "p75": results["metrics"]["time_per_output_token_ms"]["successful"]["percentiles"]["p75"], + "p90": results["metrics"]["time_per_output_token_ms"]["successful"]["percentiles"]["p90"], + "p95": results["metrics"]["time_per_output_token_ms"]["successful"]["percentiles"]["p95"], + "p99": results["metrics"]["time_per_output_token_ms"]["successful"]["percentiles"]["p99"], + "p99p9": results["metrics"]["time_per_output_token_ms"]["successful"]["percentiles"]["p999"], + "max": results["metrics"]["time_per_output_token_ms"]["successful"]["max"], + }, + "inter_token_latency": { + "units": Units.MS_PER_TOKEN, + "mean": results["metrics"]["inter_token_latency_ms"]["successful"]["mean"], + "mode": results["metrics"]["inter_token_latency_ms"]["successful"]["mode"], + "stddev": results["metrics"]["inter_token_latency_ms"]["successful"]["std_dev"], + "min": results["metrics"]["inter_token_latency_ms"]["successful"]["min"], + "p0p1": results["metrics"]["inter_token_latency_ms"]["successful"]["percentiles"]["p001"], + "p1": results["metrics"]["inter_token_latency_ms"]["successful"]["percentiles"]["p01"], + "p5": results["metrics"]["inter_token_latency_ms"]["successful"]["percentiles"]["p05"], + "p10": results["metrics"]["inter_token_latency_ms"]["successful"]["percentiles"]["p10"], + "p25": results["metrics"]["inter_token_latency_ms"]["successful"]["percentiles"]["p25"], + "p50": results["metrics"]["inter_token_latency_ms"]["successful"]["percentiles"]["p50"], + "p75": results["metrics"]["inter_token_latency_ms"]["successful"]["percentiles"]["p75"], + "p90": results["metrics"]["inter_token_latency_ms"]["successful"]["percentiles"]["p90"], + "p95": results["metrics"]["inter_token_latency_ms"]["successful"]["percentiles"]["p95"], + "p99": results["metrics"]["inter_token_latency_ms"]["successful"]["percentiles"]["p99"], + "p99p9": results["metrics"]["inter_token_latency_ms"]["successful"]["percentiles"]["p999"], + "max": results["metrics"]["inter_token_latency_ms"]["successful"]["max"], + }, + "request_latency": { + "units": Units.MS, + "mean": results["metrics"]["request_latency"]["successful"]["mean"], + "mode": results["metrics"]["request_latency"]["successful"]["mode"], + "stddev": results["metrics"]["request_latency"]["successful"]["std_dev"], + "min": results["metrics"]["request_latency"]["successful"]["min"], + "p0p1": results["metrics"]["request_latency"]["successful"]["percentiles"]["p001"], + "p1": results["metrics"]["request_latency"]["successful"]["percentiles"]["p01"], + "p5": results["metrics"]["request_latency"]["successful"]["percentiles"]["p05"], + "p10": results["metrics"]["request_latency"]["successful"]["percentiles"]["p10"], + "p25": results["metrics"]["request_latency"]["successful"]["percentiles"]["p25"], + "p50": results["metrics"]["request_latency"]["successful"]["percentiles"]["p50"], + "p75": results["metrics"]["request_latency"]["successful"]["percentiles"]["p75"], + "p90": results["metrics"]["request_latency"]["successful"]["percentiles"]["p90"], + "p95": results["metrics"]["request_latency"]["successful"]["percentiles"]["p95"], + "p99": results["metrics"]["request_latency"]["successful"]["percentiles"]["p99"], + "p99p9": results["metrics"]["request_latency"]["successful"]["percentiles"]["p999"], + "max": results["metrics"]["request_latency"]["successful"]["max"], + }, + }, + "throughput": { + "output_tokens_per_sec": results["metrics"]["output_tokens_per_second"]["successful"]["mean"], + "total_tokens_per_sec": results["metrics"]["tokens_per_second"]["successful"]["mean"], + "requests_per_sec": results["metrics"]["requests_per_second"]["successful"]["mean"], + }, + }, + }, + ) + + return BenchmarkReport(**br_dict) + + +def import_fmperf(results_file: str) -> BenchmarkReport: + """Import data from a fmperf run as a BenchmarkReport. + + Args: + results_file (str): Results file to import. + + Returns: + BenchmarkReport: Imported data. + """ + check_file(results_file) + + results = import_csv_with_header(results_file) + + # Get environment variables from llm-d-benchmark run as a dict following the + # schema of BenchmarkReport + br_dict = _get_llmd_benchmark_envars() + if br_dict: + model_name = br_dict["scenario"]["model"]["name"] + else: + model_name = "unknown" + # Append to that dict the data from fmperf + duration = results["finish_time"][-1] - results["launch_time"][0] + req_latency = results["finish_time"] - results["launch_time"] + tpot = (req_latency - results["ttft"]) / (results["generation_tokens"] - 1) + itl = tpot + update_dict( + br_dict, + { + "scenario": { + "model": {"name": model_name}, + "load": { + "name": WorkloadGenerator.FMPERF, + }, + }, + "metrics": { + "time": { + "duration": duration, + "start": results["launch_time"][0], + "stop": results["finish_time"][-1], + }, + "requests": { + "total": len(results["prompt_tokens"]), + "input_length": { + "units": Units.COUNT, + "mean": results["prompt_tokens"].mean(), + "mode": stats.mode(results["prompt_tokens"])[0], + "stddev": results["prompt_tokens"].std(), + "min": results["prompt_tokens"].min(), + "p0p1": np.percentile(results["prompt_tokens"], 0.1), + "p1": np.percentile(results["prompt_tokens"], 1), + "p5": np.percentile(results["prompt_tokens"], 5), + "p10": np.percentile(results["prompt_tokens"], 10), + "p25": np.percentile(results["prompt_tokens"], 25), + "p50": np.percentile(results["prompt_tokens"], 50), + "p75": np.percentile(results["prompt_tokens"], 75), + "p90": np.percentile(results["prompt_tokens"], 90), + "p95": np.percentile(results["prompt_tokens"], 95), + "p99": np.percentile(results["prompt_tokens"], 99), + "p99p9": np.percentile(results["prompt_tokens"], 99.9), + "max": results["prompt_tokens"].max(), + }, + "output_length": { + "units": Units.COUNT, + "mean": results["generation_tokens"].mean(), + "mode": stats.mode(results["generation_tokens"])[0], + "stddev": results["generation_tokens"].std(), + "min": results["generation_tokens"].min(), + "p0p1": np.percentile(results["generation_tokens"], 0.1), + "p1": np.percentile(results["generation_tokens"], 1), + "p5": np.percentile(results["generation_tokens"], 5), + "p10": np.percentile(results["generation_tokens"], 10), + "p25": np.percentile(results["generation_tokens"], 25), + "p50": np.percentile(results["generation_tokens"], 50), + "p75": np.percentile(results["generation_tokens"], 75), + "p90": np.percentile(results["generation_tokens"], 90), + "p95": np.percentile(results["generation_tokens"], 95), + "p99": np.percentile(results["generation_tokens"], 99), + "p99p9": np.percentile(results["generation_tokens"], 99.9), + "max": results["generation_tokens"].max(), + }, + }, + "latency": { + "time_to_first_token": { + "units": Units.MS, + "mean": results["ttft"].mean(), + "mode": stats.mode(results["ttft"])[0], + "stddev": results["ttft"].std(), + "min": results["ttft"].min(), + "p0p1": np.percentile(results["ttft"], 0.1), + "p1": np.percentile(results["ttft"], 1), + "p5": np.percentile(results["ttft"], 5), + "p10": np.percentile(results["ttft"], 10), + "p25": np.percentile(results["ttft"], 25), + "p50": np.percentile(results["ttft"], 50), + "p75": np.percentile(results["ttft"], 75), + "p90": np.percentile(results["ttft"], 90), + "p95": np.percentile(results["ttft"], 95), + "p99": np.percentile(results["ttft"], 99), + "p99p9": np.percentile(results["ttft"], 99.9), + "max": results["ttft"].max(), + }, + "time_per_output_token": { + "units": Units.MS_PER_TOKEN, + "mean": tpot.mean(), + "mode": stats.mode(tpot)[0], + "stddev": tpot.std(), + "min": tpot.min(), + "p0p1": np.percentile(tpot, 0.1), + "p1": np.percentile(tpot, 1), + "p5": np.percentile(tpot, 5), + "p10": np.percentile(tpot, 10), + "p25": np.percentile(tpot, 25), + "p50": np.percentile(tpot, 50), + "p75": np.percentile(tpot, 75), + "p90": np.percentile(tpot, 90), + "p95": np.percentile(tpot, 95), + "p99": np.percentile(tpot, 99), + "p99p9": np.percentile(tpot, 99.9), + "max": tpot.max(), + }, + "inter_token_latency": { + "units": Units.MS_PER_TOKEN, + "mean": itl.mean(), + "mode": stats.mode(itl)[0], + "stddev": itl.std(), + "min": itl.min(), + "p0p1": np.percentile(itl, 0.1), + "p1": np.percentile(itl, 1), + "p5": np.percentile(itl, 5), + "p10": np.percentile(itl, 10), + "p25": np.percentile(itl, 25), + "p50": np.percentile(itl, 50), + "p75": np.percentile(itl, 75), + "p90": np.percentile(itl, 90), + "p95": np.percentile(itl, 95), + "p99": np.percentile(itl, 99), + "p99p9": np.percentile(itl, 99.9), + "max": itl.max(), + }, + "request_latency": { + "units": Units.MS, + "mean": req_latency.mean(), + "mode": stats.mode(req_latency)[0], + "stddev": req_latency.std(), + "min": req_latency.min(), + "p0p1": np.percentile(req_latency, 0.1), + "p1": np.percentile(req_latency, 1), + "p5": np.percentile(req_latency, 5), + "p10": np.percentile(req_latency, 10), + "p25": np.percentile(req_latency, 25), + "p50": np.percentile(req_latency, 50), + "p75": np.percentile(req_latency, 75), + "p90": np.percentile(req_latency, 90), + "p95": np.percentile(req_latency, 95), + "p99": np.percentile(req_latency, 99), + "p99p9": np.percentile(req_latency, 99.9), + "max": req_latency.max(), + }, + }, + "throughput": { + "output_tokens_per_sec": results["generation_tokens"].sum() / duration, + "total_tokens_per_sec": (results["prompt_tokens"].sum() + results["generation_tokens"].sum()) + / duration, + "requests_per_sec": len(results["prompt_tokens"]) / duration, + }, + }, + }, + ) + + return BenchmarkReport(**br_dict) + + +def import_inference_perf(results_file: str) -> BenchmarkReport: + """Import data from a Inference Perf run as a BenchmarkReport. + + Args: + results_file (str): Results file to import. + + Returns: + BenchmarkReport: Imported data. + """ + check_file(results_file) + + # Import results from Inference Perf + results = import_yaml(results_file) + + # Get stage number from metrics filename + stage = int(results_file.rsplit("stage_")[-1].split("_", 1)[0]) + + # Import Inference Perf config file + config_file = os.path.join(os.path.dirname(results_file), "config.yaml") + if os.path.isfile(config_file): + config = import_yaml(config_file) + else: + config = {} + + # Get environment variables from llm-d-benchmark run as a dict following the + # schema of BenchmarkReport + br_dict = _get_llmd_benchmark_envars() + if br_dict: + model_name = br_dict["scenario"]["model"]["name"] + else: + model_name = "unknown" + # Append to that dict the data from Inference Perf + update_dict( + br_dict, + { + "scenario": { + "model": {"name": model_name}, + "load": { + "name": WorkloadGenerator.INFERENCE_PERF, + "args": config, + "metadata": { + "stage": stage, + }, + }, + }, + "metrics": { + "time": { + "duration": results["load_summary"][ + "send_duration" + ], # TODO this isn't exactly what we need, we may need to pull apart per_request_lifecycle_metrics.json + }, + "requests": { + "total": results["load_summary"]["count"], + "failures": results["failures"]["count"], + "input_length": { + "units": Units.COUNT, + "mean": results["successes"]["prompt_len"]["mean"], + "min": results["successes"]["prompt_len"]["min"], + "p0p1": results["successes"]["prompt_len"]["p0.1"], + "p1": results["successes"]["prompt_len"]["p1"], + "p5": results["successes"]["prompt_len"]["p5"], + "p10": results["successes"]["prompt_len"]["p10"], + "p25": results["successes"]["prompt_len"]["p25"], + "p50": results["successes"]["prompt_len"]["median"], + "p75": results["successes"]["prompt_len"]["p75"], + "p90": results["successes"]["prompt_len"]["p90"], + "p95": results["successes"]["prompt_len"]["p95"], + "p99": results["successes"]["prompt_len"]["p99"], + "p99p9": results["successes"]["prompt_len"]["p99.9"], + "max": results["successes"]["prompt_len"]["max"], + }, + "output_length": { + "units": Units.COUNT, + "mean": results["successes"]["output_len"]["mean"], + "min": results["successes"]["output_len"]["min"], + "p0p1": results["successes"]["output_len"]["p0.1"], + "p1": results["successes"]["output_len"]["p1"], + "p5": results["successes"]["output_len"]["p5"], + "p10": results["successes"]["output_len"]["p10"], + "p25": results["successes"]["output_len"]["p25"], + "p50": results["successes"]["output_len"]["median"], + "p75": results["successes"]["output_len"]["p75"], + "p90": results["successes"]["output_len"]["p90"], + "p95": results["successes"]["output_len"]["p95"], + "p99": results["successes"]["output_len"]["p99"], + "p99p9": results["successes"]["output_len"]["p99.9"], + "max": results["successes"]["output_len"]["max"], + }, + }, + "latency": { + "time_to_first_token": { + "units": Units.S, + "mean": results["successes"]["latency"]["time_to_first_token"]["mean"], + "min": results["successes"]["latency"]["time_to_first_token"]["min"], + "p0p1": results["successes"]["latency"]["time_to_first_token"]["p0.1"], + "p1": results["successes"]["latency"]["time_to_first_token"]["p1"], + "p5": results["successes"]["latency"]["time_to_first_token"]["p5"], + "p10": results["successes"]["latency"]["time_to_first_token"]["p10"], + "p25": results["successes"]["latency"]["time_to_first_token"]["p25"], + "p50": results["successes"]["latency"]["time_to_first_token"]["median"], + "p75": results["successes"]["latency"]["time_to_first_token"]["p75"], + "p90": results["successes"]["latency"]["time_to_first_token"]["p90"], + "p95": results["successes"]["latency"]["time_to_first_token"]["p95"], + "p99": results["successes"]["latency"]["time_to_first_token"]["p99"], + "p99p9": results["successes"]["latency"]["time_to_first_token"]["p99.9"], + "max": results["successes"]["latency"]["time_to_first_token"]["max"], + }, + "normalized_time_per_output_token": { + "units": Units.S_PER_TOKEN, + "mean": results["successes"]["latency"]["normalized_time_per_output_token"]["mean"], + "min": results["successes"]["latency"]["normalized_time_per_output_token"]["min"], + "p0p1": results["successes"]["latency"]["normalized_time_per_output_token"]["p0.1"], + "p1": results["successes"]["latency"]["normalized_time_per_output_token"]["p1"], + "p5": results["successes"]["latency"]["normalized_time_per_output_token"]["p5"], + "p10": results["successes"]["latency"]["normalized_time_per_output_token"]["p10"], + "p25": results["successes"]["latency"]["normalized_time_per_output_token"]["p25"], + "p50": results["successes"]["latency"]["normalized_time_per_output_token"]["median"], + "p75": results["successes"]["latency"]["normalized_time_per_output_token"]["p75"], + "p90": results["successes"]["latency"]["normalized_time_per_output_token"]["p90"], + "p95": results["successes"]["latency"]["normalized_time_per_output_token"]["p95"], + "p99": results["successes"]["latency"]["normalized_time_per_output_token"]["p99"], + "p99p9": results["successes"]["latency"]["normalized_time_per_output_token"]["p99.9"], + "max": results["successes"]["latency"]["normalized_time_per_output_token"]["max"], + }, + "time_per_output_token": { + "units": Units.S_PER_TOKEN, + "mean": results["successes"]["latency"]["time_per_output_token"]["mean"], + "min": results["successes"]["latency"]["time_per_output_token"]["min"], + "p0p1": results["successes"]["latency"]["time_per_output_token"]["p0.1"], + "p1": results["successes"]["latency"]["time_per_output_token"]["p1"], + "p5": results["successes"]["latency"]["time_per_output_token"]["p5"], + "p10": results["successes"]["latency"]["time_per_output_token"]["p10"], + "p25": results["successes"]["latency"]["time_per_output_token"]["p25"], + "p50": results["successes"]["latency"]["time_per_output_token"]["median"], + "p75": results["successes"]["latency"]["time_per_output_token"]["p75"], + "p90": results["successes"]["latency"]["time_per_output_token"]["p90"], + "p95": results["successes"]["latency"]["time_per_output_token"]["p95"], + "p99": results["successes"]["latency"]["time_per_output_token"]["p99"], + "p99p9": results["successes"]["latency"]["time_per_output_token"]["p99.9"], + "max": results["successes"]["latency"]["time_per_output_token"]["max"], + }, + "inter_token_latency": { + "units": Units.S_PER_TOKEN, + "mean": results["successes"]["latency"]["inter_token_latency"]["mean"], + "min": results["successes"]["latency"]["inter_token_latency"]["min"], + "p0p1": results["successes"]["latency"]["inter_token_latency"]["p0.1"], + "p1": results["successes"]["latency"]["inter_token_latency"]["p1"], + "p5": results["successes"]["latency"]["inter_token_latency"]["p5"], + "p10": results["successes"]["latency"]["inter_token_latency"]["p10"], + "p25": results["successes"]["latency"]["inter_token_latency"]["p25"], + "p50": results["successes"]["latency"]["inter_token_latency"]["median"], + "p75": results["successes"]["latency"]["inter_token_latency"]["p75"], + "p90": results["successes"]["latency"]["inter_token_latency"]["p90"], + "p95": results["successes"]["latency"]["inter_token_latency"]["p95"], + "p99": results["successes"]["latency"]["inter_token_latency"]["p99"], + "p99p9": results["successes"]["latency"]["inter_token_latency"]["p99.9"], + "max": results["successes"]["latency"]["inter_token_latency"]["max"], + }, + "request_latency": { + "units": Units.S, + "mean": results["successes"]["latency"]["request_latency"]["mean"], + "min": results["successes"]["latency"]["request_latency"]["min"], + "p0p1": results["successes"]["latency"]["request_latency"]["p0.1"], + "p1": results["successes"]["latency"]["request_latency"]["p1"], + "p5": results["successes"]["latency"]["request_latency"]["p5"], + "p10": results["successes"]["latency"]["request_latency"]["p10"], + "p25": results["successes"]["latency"]["request_latency"]["p25"], + "p50": results["successes"]["latency"]["request_latency"]["median"], + "p75": results["successes"]["latency"]["request_latency"]["p75"], + "p90": results["successes"]["latency"]["request_latency"]["p90"], + "p95": results["successes"]["latency"]["request_latency"]["p95"], + "p99": results["successes"]["latency"]["request_latency"]["p99"], + "p99p9": results["successes"]["latency"]["request_latency"]["p99.9"], + "max": results["successes"]["latency"]["request_latency"]["max"], + }, + }, + "throughput": { + "output_tokens_per_sec": results["successes"]["throughput"]["output_tokens_per_sec"], + "total_tokens_per_sec": results["successes"]["throughput"]["total_tokens_per_sec"], + "requests_per_sec": results["successes"]["throughput"]["requests_per_sec"], + }, + }, + }, + ) + + return BenchmarkReport(**br_dict) + + +def import_nop(results_file: str) -> BenchmarkReport: + """Import data from a nop run as a BenchmarkReport. + + Args: + results_file (str): Results file to import. + + Returns: + BenchmarkReport: Imported data. + """ + check_file(results_file) + + results = import_yaml(results_file) + + def _import_categories(cat_list: list[dict[str, Any]]) -> list[dict[str, Any]]: + new_cat_list = [] + for cat in cat_list: + cat_dict = {} + cat_dict["title"] = cat["title"] + process = cat.get("process") + if process is not None: + cat_dict["process"] = process["name"] + cat_dict["elapsed"] = { + "units": Units.S, + "value": cat["elapsed"], + } + categories = cat.get("categories") + if categories is not None: + cat_dict["categories"] = _import_categories(categories) + + new_cat_list.append(cat_dict) + + return new_cat_list + + categories = _import_categories(results["metrics"]["categories"]) + + # Get environment variables from llm-d-benchmark run as a dict following the + # schema of BenchmarkReport + br_dict = _get_llmd_benchmark_envars() + + results_dict = { + "scenario": { + "model": {"name": results["scenario"]["model"]["name"]}, + "load": { + "name": WorkloadGenerator.NOP, + }, + "platform": {"engine": [results["scenario"]["platform"]["engine"]]}, + "metadata": { + "load_format": results["scenario"]["load_format"], + "sleep_mode": results["scenario"]["sleep_mode"], + }, + }, + "metrics": { + "metadata": { + "load_time": { + "units": Units.S, + "value": results["metrics"]["load_time"], + }, + "size": { + "units": Units.GIB, + "value": results["metrics"]["size"], + }, + "transfer_rate": { + "units": Units.GIB_PER_S, + "value": results["metrics"]["transfer_rate"], + }, + "sleep": { + "units": Units.S, + "value": results["metrics"]["sleep"], + }, + "gpu_freed": { + "units": Units.GIB, + "value": results["metrics"]["gpu_freed"], + }, + "gpu_in_use": { + "units": Units.GIB, + "value": results["metrics"]["gpu_in_use"], + }, + "wake": { + "units": Units.S, + "value": results["metrics"]["wake"], + }, + "categories": categories, + }, + "time": { + "duration": results["metrics"]["time"]["duration"], + "start": results["metrics"]["time"]["start"], + "stop": results["metrics"]["time"]["stop"], + }, + "requests": { + "total": 0, + "failures": 0, + "input_length": { + "units": Units.COUNT, + "mean": 0, + "min": 0, + "p10": 0, + "p50": 0, + "p90": 0, + "max": 0, + }, + "output_length": { + "units": Units.COUNT, + "mean": 0, + "min": 0, + "p10": 0, + "p50": 0, + "p90": 0, + "max": 0, + }, + }, + "latency": { + "time_to_first_token": { + "units": Units.MS, + "mean": 0, + "min": 0, + "p10": 0, + "p50": 0, + "p90": 0, + "max": 0, + }, + "normalized_time_per_output_token": { + "units": Units.MS_PER_TOKEN, + "mean": 0, + "min": 0, + "p10": 0, + "p50": 0, + "p90": 0, + "max": 0, + }, + "time_per_output_token": { + "units": Units.MS_PER_TOKEN, + "mean": 0, + "min": 0, + "p10": 0, + "p50": 0, + "p90": 0, + "max": 0, + }, + "inter_token_latency": { + "units": Units.MS_PER_TOKEN, + "mean": 0, + "min": 0, + "p10": 0, + "p50": 0, + "p90": 0, + "max": 0, + }, + "request_latency": { + "units": Units.MS, + "mean": 0, + "min": 0, + "p10": 0, + "p50": 0, + "p90": 0, + "max": 0, + }, + }, + "throughput": { + "output_tokens_per_sec": 0, + "total_tokens_per_sec": 0, + "requests_per_sec": 0, + }, + }, + } + + for name in ["load_cached_compiled_graph", "compile_graph"]: + value = results["metrics"].get(name) + if value is not None: + results_dict["metrics"]["metadata"][name] = { + "units": Units.S, + "value": value, + } + + update_dict(br_dict, results_dict) + + return BenchmarkReport(**br_dict) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Convert benchmark run data to standard benchmark report format.") + parser.add_argument("results_file", type=str, help="Results file to convert.") + parser.add_argument("output_file", type=str, default=None, nargs="?", help="Output file for benchark report.") + parser.add_argument( + "-f", "--force", action=argparse.BooleanOptionalAction, help="Write to output file even if it already exists." + ) + parser.add_argument( + "-w", + "--workload-generator", + type=str, + default=WorkloadGenerator.VLLM_BENCHMARK, + help="Workload generator used.", + ) + + args = parser.parse_args() + if args.output_file and os.path.exists(args.output_file) and not args.force: + sys.stderr.write("Output file already exists: %s\n" % args.output_file) + sys.exit(1) + + match args.workload_generator: + case WorkloadGenerator.FMPERF: + if args.output_file: + import_fmperf(args.results_file).export_yaml(args.output_file) + else: + import_fmperf(args.results_file).print_yaml() + case WorkloadGenerator.GUIDELLM: + if args.output_file: + import_guidellm(args.results_file).export_yaml(args.output_file) + else: + import_guidellm(args.results_file).print_yaml() + case WorkloadGenerator.INFERENCE_PERF: + if args.output_file: + import_inference_perf(args.results_file).export_yaml(args.output_file) + else: + import_inference_perf(args.results_file).print_yaml() + case WorkloadGenerator.VLLM_BENCHMARK: + if args.output_file: + import_vllm_benchmark(args.results_file).export_yaml(args.output_file) + else: + import_vllm_benchmark(args.results_file).print_yaml() + case WorkloadGenerator.NOP: + if args.output_file: + import_nop(args.results_file).export_yaml(args.output_file) + else: + import_nop(args.results_file).print_yaml() + case _: + sys.stderr.write("Unsupported workload generator: %s\n" % args.workload_generator) + sys.stderr.write("Must be one of: %s\n" % str([wg.value for wg in WorkloadGenerator])[1:-1]) + sys.exit(1) diff --git a/workload/report/report_json_schema.json b/llm_d_benchmark/workload/report/report_json_schema.json similarity index 100% rename from workload/report/report_json_schema.json rename to llm_d_benchmark/workload/report/report_json_schema.json diff --git a/workload/report/schema.py b/llm_d_benchmark/workload/report/schema.py similarity index 85% rename from workload/report/schema.py rename to llm_d_benchmark/workload/report/schema.py index 0c9b093d..5f2562ab 100755 --- a/workload/report/schema.py +++ b/llm_d_benchmark/workload/report/schema.py @@ -1,16 +1,17 @@ #!/usr/bin/env python3 -from enum import StrEnum, auto import json +from enum import StrEnum, auto from operator import attrgetter -from typing import Optional, Any +from typing import Any, Optional -from pydantic import BaseModel, model_validator import yaml +from pydantic import BaseModel, model_validator # BenchmarkReport schema version -VERSION = '0.1' +VERSION = "0.1" + class Parallelism(BaseModel): """Accelerator parallelism details.""" @@ -64,7 +65,7 @@ class Host(BaseModel): type: list[HostType] metadata: Optional[Any] = None - @model_validator(mode='after') + @model_validator(mode="after") def check_types(self): """Types must be either all 'replica' or a mix of 'prefill' and 'decode'.""" if len(self.type) <= 1: @@ -73,12 +74,12 @@ def check_types(self): type_ref = self.type[0] if type_ref == HostType.REPLICA: if HostType.DECODE in self.type: - raise ValueError(f'Cannot mix "replica" with "prefill"/"decode" types.') + raise ValueError('Cannot mix "replica" with "prefill"/"decode" types.') if HostType.PREFILL in self.type: - raise ValueError(f'Cannot mix "replica" with "prefill"/"decode" types.') + raise ValueError('Cannot mix "replica" with "prefill"/"decode" types.') else: if HostType.REPLICA in self.type: - raise ValueError(f'Cannot mix "replica" with "prefill"/"decode" types.') + raise ValueError('Cannot mix "replica" with "prefill"/"decode" types.') return self @@ -101,6 +102,7 @@ class Platform(BaseModel): class Model(BaseModel): """AI model details.""" + name: str quantization: Optional[str] = None adapters: Optional[list[dict[str, str]]] = None @@ -126,9 +128,9 @@ class WorkloadGenerator(StrEnum): FMPERF = auto() GUIDELLM = auto() - INFERENCE_PERF = 'inference-perf' - VLLM_BENCHMARK = 'vllm-benchmark' - NOP = 'nop' + INFERENCE_PERF = "inference-perf" + VLLM_BENCHMARK = "vllm-benchmark" + NOP = "nop" class Load(BaseModel): @@ -218,33 +220,41 @@ class Units(StrEnum): MS = auto() S = auto() # Memory - MB = 'MB' - GB = 'GB' - TB = 'TB' - MIB = 'MiB' - GIB = 'GiB' - TIB = 'TiB' + MB = "MB" + GB = "GB" + TB = "TB" + MIB = "MiB" + GIB = "GiB" + TIB = "TiB" # Bandwidth - MBIT_PER_S = 'Mbit/s' - GBIT_PER_S = 'Gbit/s' - TBIT_PER_S = 'Tbit/s' + MBIT_PER_S = "Mbit/s" + GBIT_PER_S = "Gbit/s" + TBIT_PER_S = "Tbit/s" GIB_PER_S = "GiB/s" - MB_PER_S = 'MB/s' - GB_PER_S = 'GB/s' - TB_PER_S = 'TB/s' + MB_PER_S = "MB/s" + GB_PER_S = "GB/s" + TB_PER_S = "TB/s" # Generation latency - MS_PER_TOKEN = 'ms/token' - S_PER_TOKEN = 's/token' + MS_PER_TOKEN = "ms/token" + S_PER_TOKEN = "s/token" # Power WATTS = "Watts" + # Lists of compatible units units_quantity = [Units.COUNT] units_portion = [Units.PERCENT, Units.FRACTION] units_time = [Units.MS, Units.S] units_memory = [Units.MB, Units.GB, Units.TB, Units.MIB, Units.GIB, Units.TIB] -units_bandwidth = [Units.MBIT_PER_S, Units.GBIT_PER_S, Units.TBIT_PER_S, Units.MB_PER_S, Units.GB_PER_S, Units.TB_PER_S] +units_bandwidth = [ + Units.MBIT_PER_S, + Units.GBIT_PER_S, + Units.TBIT_PER_S, + Units.MB_PER_S, + Units.GB_PER_S, + Units.TB_PER_S, +] units_gen_latency = [Units.MS_PER_TOKEN, Units.S_PER_TOKEN] units_power = [Units.WATTS] @@ -262,7 +272,7 @@ class Statistics(BaseModel): p5: Optional[float | int] = None p10: Optional[float | int] = None p25: Optional[float | int] = None - p50: Optional[float | int] = None # This is the same as median + p50: Optional[float | int] = None # This is the same as median p75: Optional[float | int] = None p90: Optional[float | int] = None p95: Optional[float | int] = None @@ -285,7 +295,7 @@ class Requests(BaseModel): output_length: Statistics """Output sequence length.""" - @model_validator(mode='after') + @model_validator(mode="after") def check_units(self): if self.input_length.units not in units_quantity: raise ValueError(f'Invalid units "{self.input_length.units}", must be one of: {" ".join(units_quantity)}') @@ -323,16 +333,27 @@ class Latency(BaseModel): request_latency: Optional[Statistics] = None """End-to-end request latency.""" - @model_validator(mode='after') + @model_validator(mode="after") def check_units(self): if self.time_to_first_token.units not in units_time: - raise ValueError(f'Invalid units "{self.time_to_first_token.units}", must be one of: {" ".join(units_time)}') - if self.normalized_time_per_output_token and self.normalized_time_per_output_token.units not in units_gen_latency: - raise ValueError(f'Invalid units "{self.normalized_time_per_output_token.units}", must be one of: {" ".join(units_gen_latency)}') + raise ValueError( + f'Invalid units "{self.time_to_first_token.units}", must be one of: {" ".join(units_time)}' + ) + if ( + self.normalized_time_per_output_token + and self.normalized_time_per_output_token.units not in units_gen_latency + ): + raise ValueError( + f'Invalid units "{self.normalized_time_per_output_token.units}", must be one of: {" ".join(units_gen_latency)}' + ) if self.time_per_output_token and self.time_per_output_token.units not in units_gen_latency: - raise ValueError(f'Invalid units "{self.time_per_output_token.units}", must be one of: {" ".join(units_gen_latency)}') + raise ValueError( + f'Invalid units "{self.time_per_output_token.units}", must be one of: {" ".join(units_gen_latency)}' + ) if self.inter_token_latency and self.inter_token_latency.units not in units_gen_latency: - raise ValueError(f'Invalid units "{self.inter_token_latency.units}", must be one of: {" ".join(units_gen_latency)}') + raise ValueError( + f'Invalid units "{self.inter_token_latency.units}", must be one of: {" ".join(units_gen_latency)}' + ) if self.request_latency and self.request_latency.units not in units_time: raise ValueError(f'Invalid units "{self.request_latency.units}", must be one of: {" ".join(units_time)}') return self @@ -354,7 +375,7 @@ class Service(BaseModel): queue_size: Optional[Statistics] = None kv_cache_size: Optional[Statistics] = None - @model_validator(mode='after') + @model_validator(mode="after") def check_units(self): if self.batch_size and self.batch_size.units not in units_quantity: raise ValueError(f'Invalid units "{self.batch_size.units}", must be one of: {" ".join(units_quantity)}') @@ -372,7 +393,7 @@ class MemoryMetrics(BaseModel): utilization: Optional[Statistics] = None bandwidth: Optional[Statistics] = None - @model_validator(mode='after') + @model_validator(mode="after") def check_units(self): if self.consumption and self.consumption.units not in units_memory: raise ValueError(f'Invalid units "{self.consumption.units}", must be one of: {" ".join(units_memory)}') @@ -388,7 +409,7 @@ class ComputeMetrics(BaseModel): utilization: Optional[Statistics] = None - @model_validator(mode='after') + @model_validator(mode="after") def check_units(self): if self.utilization.units not in units_portion: raise ValueError(f'Invalid units "{self.utilization.units}", must be one of: {" ".join(units_portion)}') @@ -402,7 +423,7 @@ class AcceleratorMetrics(BaseModel): compute: Optional[ComputeMetrics] = None power: Optional[Statistics] = None - @model_validator(mode='after') + @model_validator(mode="after") def check_units(self): if self.power and self.power.units not in units_power: raise ValueError(f'Invalid units "{self.power.units}", must be one of: {" ".join(units_power)}') @@ -438,20 +459,20 @@ class BenchmarkReport(BaseModel): metrics: Metrics metadata: Optional[Any] = None - @model_validator(mode='after') + @model_validator(mode="after") def check_version(self): """Ensure version is compatible.""" if self.version != VERSION: raise ValueError(f'Invalid version "{self.version}", must be "{VERSION}".') return self - @model_validator(mode='after') + @model_validator(mode="after") def check_corresponding_lengths(self): """Ensure the lengths of the following match (if present): - - scenario.host.accelerator - - scenario.host.type - - scenario.platform.engine - - metrics.resources.accelerator + - scenario.host.accelerator + - scenario.host.type + - scenario.platform.engine + - metrics.resources.accelerator """ entity_lengths = { "scenario.host.accelerator": None, @@ -477,9 +498,7 @@ def check_corresponding_lengths(self): length_ref = entity_lengths.pop(entity_ref) for entity, length in entity_lengths.items(): if length != length_ref: - raise ValueError( - f'Length of "{entity}" ({length}) must match "{entity_ref}" ({length_ref})' - ) + raise ValueError(f'Length of "{entity}" ({length}) must match "{entity_ref}" ({length_ref})') return self def dump(self) -> dict[str, Any]: @@ -500,7 +519,7 @@ def export_json(self, filename) -> None: Args: filename: File to save BenchmarkReport to. """ - with open(filename, 'w') as file: + with open(filename, "w") as file: json.dump(self.dump(), file, indent=2) def export_yaml(self, filename) -> None: @@ -509,20 +528,16 @@ def export_yaml(self, filename) -> None: Args: filename: File to save BenchmarkReport to. """ - with open(filename, 'w') as file: + with open(filename, "w") as file: yaml.dump(self.dump(), file, indent=2) def print_json(self) -> None: """Print BenchmarkReport as JSON.""" - print( - json.dumps(self.dump(), indent=2) - ) + print(json.dumps(self.dump(), indent=2)) def print_yaml(self) -> None: """Print BenchmarkReport as YAML.""" - print( - yaml.dump(self.dump(), indent=2) - ) + print(yaml.dump(self.dump(), indent=2)) def make_json_schema() -> str: @@ -547,6 +562,7 @@ def create_from_str(yaml_str: str) -> BenchmarkReport: """ return BenchmarkReport(**yaml.safe_load(yaml_str)) + # If this is executed directly, print JSON schema. if __name__ == "__main__": print(make_json_schema()) diff --git a/pdm.lock b/pdm.lock new file mode 100644 index 00000000..954db78c --- /dev/null +++ b/pdm.lock @@ -0,0 +1,2447 @@ +# This file is @generated by PDM. +# It is not intended for manual editing. + +[metadata] +groups = ["default", "dev"] +strategy = ["inherit_metadata"] +lock_version = "4.5.0" +content_hash = "sha256:d04b455ac095087a66434d0c14da31e92e63118d31b01b735c54fcd3056b97bf" + +[[metadata.targets]] +requires_python = ">=3.12" + +[[package]] +name = "aiohappyeyeballs" +version = "2.6.1" +requires_python = ">=3.9" +summary = "Happy Eyeballs for asyncio" +groups = ["default"] +files = [ + {file = "aiohappyeyeballs-2.6.1-py3-none-any.whl", hash = "sha256:f349ba8f4b75cb25c99c5c2d84e997e485204d2902a9597802b0371f09331fb8"}, + {file = "aiohappyeyeballs-2.6.1.tar.gz", hash = "sha256:c3f9d0113123803ccadfdf3f0faa505bc78e6a72d1cc4806cbd719826e943558"}, +] + +[[package]] +name = "aiohttp" +version = "3.12.15" +requires_python = ">=3.9" +summary = "Async http client/server framework (asyncio)" +groups = ["default"] +dependencies = [ + "aiohappyeyeballs>=2.5.0", + "aiosignal>=1.4.0", + "async-timeout<6.0,>=4.0; python_version < \"3.11\"", + "attrs>=17.3.0", + "frozenlist>=1.1.1", + "multidict<7.0,>=4.5", + "propcache>=0.2.0", + "yarl<2.0,>=1.17.0", +] +files = [ + {file = "aiohttp-3.12.15-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:802d3868f5776e28f7bf69d349c26fc0efadb81676d0afa88ed00d98a26340b7"}, + {file = "aiohttp-3.12.15-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f2800614cd560287be05e33a679638e586a2d7401f4ddf99e304d98878c29444"}, + {file = "aiohttp-3.12.15-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8466151554b593909d30a0a125d638b4e5f3836e5aecde85b66b80ded1cb5b0d"}, + {file = "aiohttp-3.12.15-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e5a495cb1be69dae4b08f35a6c4579c539e9b5706f606632102c0f855bcba7c"}, + {file = "aiohttp-3.12.15-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6404dfc8cdde35c69aaa489bb3542fb86ef215fc70277c892be8af540e5e21c0"}, + {file = "aiohttp-3.12.15-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3ead1c00f8521a5c9070fcb88f02967b1d8a0544e6d85c253f6968b785e1a2ab"}, + {file = "aiohttp-3.12.15-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6990ef617f14450bc6b34941dba4f12d5613cbf4e33805932f853fbd1cf18bfb"}, + {file = "aiohttp-3.12.15-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd736ed420f4db2b8148b52b46b88ed038d0354255f9a73196b7bbce3ea97545"}, + {file = "aiohttp-3.12.15-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c5092ce14361a73086b90c6efb3948ffa5be2f5b6fbcf52e8d8c8b8848bb97c"}, + {file = "aiohttp-3.12.15-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:aaa2234bb60c4dbf82893e934d8ee8dea30446f0647e024074237a56a08c01bd"}, + {file = "aiohttp-3.12.15-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:6d86a2fbdd14192e2f234a92d3b494dd4457e683ba07e5905a0b3ee25389ac9f"}, + {file = "aiohttp-3.12.15-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:a041e7e2612041a6ddf1c6a33b883be6a421247c7afd47e885969ee4cc58bd8d"}, + {file = "aiohttp-3.12.15-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5015082477abeafad7203757ae44299a610e89ee82a1503e3d4184e6bafdd519"}, + {file = "aiohttp-3.12.15-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:56822ff5ddfd1b745534e658faba944012346184fbfe732e0d6134b744516eea"}, + {file = "aiohttp-3.12.15-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b2acbbfff69019d9014508c4ba0401822e8bae5a5fdc3b6814285b71231b60f3"}, + {file = "aiohttp-3.12.15-cp312-cp312-win32.whl", hash = "sha256:d849b0901b50f2185874b9a232f38e26b9b3d4810095a7572eacea939132d4e1"}, + {file = "aiohttp-3.12.15-cp312-cp312-win_amd64.whl", hash = "sha256:b390ef5f62bb508a9d67cb3bba9b8356e23b3996da7062f1a57ce1a79d2b3d34"}, + {file = "aiohttp-3.12.15-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:9f922ffd05034d439dde1c77a20461cf4a1b0831e6caa26151fe7aa8aaebc315"}, + {file = "aiohttp-3.12.15-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2ee8a8ac39ce45f3e55663891d4b1d15598c157b4d494a4613e704c8b43112cd"}, + {file = "aiohttp-3.12.15-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3eae49032c29d356b94eee45a3f39fdf4b0814b397638c2f718e96cfadf4c4e4"}, + {file = "aiohttp-3.12.15-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b97752ff12cc12f46a9b20327104448042fce5c33a624f88c18f66f9368091c7"}, + {file = "aiohttp-3.12.15-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:894261472691d6fe76ebb7fcf2e5870a2ac284c7406ddc95823c8598a1390f0d"}, + {file = "aiohttp-3.12.15-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5fa5d9eb82ce98959fc1031c28198b431b4d9396894f385cb63f1e2f3f20ca6b"}, + {file = "aiohttp-3.12.15-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0fa751efb11a541f57db59c1dd821bec09031e01452b2b6217319b3a1f34f3d"}, + {file = "aiohttp-3.12.15-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5346b93e62ab51ee2a9d68e8f73c7cf96ffb73568a23e683f931e52450e4148d"}, + {file = "aiohttp-3.12.15-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:049ec0360f939cd164ecbfd2873eaa432613d5e77d6b04535e3d1fbae5a9e645"}, + {file = "aiohttp-3.12.15-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b52dcf013b57464b6d1e51b627adfd69a8053e84b7103a7cd49c030f9ca44461"}, + {file = "aiohttp-3.12.15-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:9b2af240143dd2765e0fb661fd0361a1b469cab235039ea57663cda087250ea9"}, + {file = "aiohttp-3.12.15-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:ac77f709a2cde2cc71257ab2d8c74dd157c67a0558a0d2799d5d571b4c63d44d"}, + {file = "aiohttp-3.12.15-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:47f6b962246f0a774fbd3b6b7be25d59b06fdb2f164cf2513097998fc6a29693"}, + {file = "aiohttp-3.12.15-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:760fb7db442f284996e39cf9915a94492e1896baac44f06ae551974907922b64"}, + {file = "aiohttp-3.12.15-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ad702e57dc385cae679c39d318def49aef754455f237499d5b99bea4ef582e51"}, + {file = "aiohttp-3.12.15-cp313-cp313-win32.whl", hash = "sha256:f813c3e9032331024de2eb2e32a88d86afb69291fbc37a3a3ae81cc9917fb3d0"}, + {file = "aiohttp-3.12.15-cp313-cp313-win_amd64.whl", hash = "sha256:1a649001580bdb37c6fdb1bebbd7e3bc688e8ec2b5c6f52edbb664662b17dc84"}, + {file = "aiohttp-3.12.15.tar.gz", hash = "sha256:4fc61385e9c98d72fcdf47e6dd81833f47b2f77c114c29cd64a361be57a763a2"}, +] + +[[package]] +name = "aiosignal" +version = "1.4.0" +requires_python = ">=3.9" +summary = "aiosignal: a list of registered asynchronous callbacks" +groups = ["default"] +dependencies = [ + "frozenlist>=1.1.0", + "typing-extensions>=4.2; python_version < \"3.13\"", +] +files = [ + {file = "aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e"}, + {file = "aiosignal-1.4.0.tar.gz", hash = "sha256:f47eecd9468083c2029cc99945502cb7708b082c232f9aca65da147157b251c7"}, +] + +[[package]] +name = "altair" +version = "5.5.0" +requires_python = ">=3.9" +summary = "Vega-Altair: A declarative statistical visualization library for Python." +groups = ["default"] +dependencies = [ + "jinja2", + "jsonschema>=3.0", + "narwhals>=1.14.2", + "packaging", + "typing-extensions>=4.10.0; python_version < \"3.14\"", +] +files = [ + {file = "altair-5.5.0-py3-none-any.whl", hash = "sha256:91a310b926508d560fe0148d02a194f38b824122641ef528113d029fcd129f8c"}, + {file = "altair-5.5.0.tar.gz", hash = "sha256:d960ebe6178c56de3855a68c47b516be38640b73fb3b5111c2a9ca90546dd73d"}, +] + +[[package]] +name = "annotated-types" +version = "0.7.0" +requires_python = ">=3.8" +summary = "Reusable constraint types to use with typing.Annotated" +groups = ["default"] +dependencies = [ + "typing-extensions>=4.0.0; python_version < \"3.9\"", +] +files = [ + {file = "annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53"}, + {file = "annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89"}, +] + +[[package]] +name = "attrs" +version = "25.3.0" +requires_python = ">=3.8" +summary = "Classes Without Boilerplate" +groups = ["default"] +files = [ + {file = "attrs-25.3.0-py3-none-any.whl", hash = "sha256:427318ce031701fea540783410126f03899a97ffc6f61596ad581ac2e40e3bc3"}, + {file = "attrs-25.3.0.tar.gz", hash = "sha256:75d7cefc7fb576747b2c81b4442d4d4a1ce0900973527c011d1030fd3bf4af1b"}, +] + +[[package]] +name = "blinker" +version = "1.9.0" +requires_python = ">=3.9" +summary = "Fast, simple object-to-object and broadcast signaling" +groups = ["default"] +files = [ + {file = "blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc"}, + {file = "blinker-1.9.0.tar.gz", hash = "sha256:b4ce2265a7abece45e7cc896e98dbebe6cead56bcf805a3d23136d145f5445bf"}, +] + +[[package]] +name = "boxsdk" +version = "10.0.0" +summary = "Official Box Python SDK" +groups = ["default"] +dependencies = [ + "requests", + "requests-toolbelt", +] +files = [ + {file = "boxsdk-10.0.0-py3-none-any.whl", hash = "sha256:533c38eae1015cac7e42822c5206777f301e6b02ab2f9a9e73604b09e2742581"}, + {file = "boxsdk-10.0.0.tar.gz", hash = "sha256:5236dd7559226f846eb3acadfe324dc27fbc48a0ad796749d920271d5df8ef8c"}, +] + +[[package]] +name = "cachetools" +version = "6.2.0" +requires_python = ">=3.9" +summary = "Extensible memoizing collections and decorators" +groups = ["default"] +files = [ + {file = "cachetools-6.2.0-py3-none-any.whl", hash = "sha256:1c76a8960c0041fcc21097e357f882197c79da0dbff766e7317890a65d7d8ba6"}, + {file = "cachetools-6.2.0.tar.gz", hash = "sha256:38b328c0889450f05f5e120f56ab68c8abaf424e1275522b138ffc93253f7e32"}, +] + +[[package]] +name = "certifi" +version = "2025.8.3" +requires_python = ">=3.7" +summary = "Python package for providing Mozilla's CA Bundle." +groups = ["default"] +files = [ + {file = "certifi-2025.8.3-py3-none-any.whl", hash = "sha256:f6c12493cfb1b06ba2ff328595af9350c65d6644968e5d3a2ffd78699af217a5"}, + {file = "certifi-2025.8.3.tar.gz", hash = "sha256:e564105f78ded564e3ae7c923924435e1daa7463faeab5bb932bc53ffae63407"}, +] + +[[package]] +name = "cfgv" +version = "3.4.0" +requires_python = ">=3.8" +summary = "Validate configuration and produce human readable error messages." +groups = ["dev"] +files = [ + {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, + {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.4.3" +requires_python = ">=3.7" +summary = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +groups = ["default"] +files = [ + {file = "charset_normalizer-3.4.3-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e28e334d3ff134e88989d90ba04b47d84382a828c061d0d1027b1b12a62b39b1"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0cacf8f7297b0c4fcb74227692ca46b4a5852f8f4f24b3c766dd94a1075c4884"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c6fd51128a41297f5409deab284fecbe5305ebd7e5a1f959bee1c054622b7018"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:3cfb2aad70f2c6debfbcb717f23b7eb55febc0bb23dcffc0f076009da10c6392"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1606f4a55c0fd363d754049cdf400175ee96c992b1f8018b993941f221221c5f"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:027b776c26d38b7f15b26a5da1044f376455fb3766df8fc38563b4efbc515154"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:42e5088973e56e31e4fa58eb6bd709e42fc03799c11c42929592889a2e54c491"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:cc34f233c9e71701040d772aa7490318673aa7164a0efe3172b2981218c26d93"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:320e8e66157cc4e247d9ddca8e21f427efc7a04bbd0ac8a9faf56583fa543f9f"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-win32.whl", hash = "sha256:fb6fecfd65564f208cbf0fba07f107fb661bcd1a7c389edbced3f7a493f70e37"}, + {file = "charset_normalizer-3.4.3-cp312-cp312-win_amd64.whl", hash = "sha256:86df271bf921c2ee3818f0522e9a5b8092ca2ad8b065ece5d7d9d0e9f4849bcc"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:14c2a87c65b351109f6abfc424cab3927b3bdece6f706e4d12faaf3d52ee5efe"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41d1fc408ff5fdfb910200ec0e74abc40387bccb3252f3f27c0676731df2b2c8"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:1bb60174149316da1c35fa5233681f7c0f9f514509b8e399ab70fea5f17e45c9"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:30d006f98569de3459c2fc1f2acde170b7b2bd265dc1943e87e1a4efe1b67c31"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:416175faf02e4b0810f1f38bcb54682878a4af94059a1cd63b8747244420801f"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6aab0f181c486f973bc7262a97f5aca3ee7e1437011ef0c2ec04b5a11d16c927"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:fdabf8315679312cfa71302f9bd509ded4f2f263fb5b765cf1433b39106c3cc9"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:bd28b817ea8c70215401f657edef3a8aa83c29d447fb0b622c35403780ba11d5"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:18343b2d246dc6761a249ba1fb13f9ee9a2bcd95decc767319506056ea4ad4dc"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-win32.whl", hash = "sha256:6fb70de56f1859a3f71261cbe41005f56a7842cc348d3aeb26237560bfa5e0ce"}, + {file = "charset_normalizer-3.4.3-cp313-cp313-win_amd64.whl", hash = "sha256:cf1ebb7d78e1ad8ec2a8c4732c7be2e736f6e5123a4146c5b89c9d1f585f8cef"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:3cd35b7e8aedeb9e34c41385fda4f73ba609e561faedfae0a9e75e44ac558a15"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b89bc04de1d83006373429975f8ef9e7932534b8cc9ca582e4db7d20d91816db"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2001a39612b241dae17b4687898843f254f8748b796a2e16f1051a17078d991d"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8dcfc373f888e4fb39a7bc57e93e3b845e7f462dacc008d9749568b1c4ece096"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:18b97b8404387b96cdbd30ad660f6407799126d26a39ca65729162fd810a99aa"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ccf600859c183d70eb47e05a44cd80a4ce77394d1ac0f79dbd2dd90a69a3a049"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:53cd68b185d98dde4ad8990e56a58dea83a4162161b1ea9272e5c9182ce415e0"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:30a96e1e1f865f78b030d65241c1ee850cdf422d869e9028e2fc1d5e4db73b92"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d716a916938e03231e86e43782ca7878fb602a125a91e7acb8b5112e2e96ac16"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-win32.whl", hash = "sha256:c6dbd0ccdda3a2ba7c2ecd9d77b37f3b5831687d8dc1b6ca5f56a4880cc7b7ce"}, + {file = "charset_normalizer-3.4.3-cp314-cp314-win_amd64.whl", hash = "sha256:73dc19b562516fc9bcf6e5d6e596df0b4eb98d87e4f79f3ae71840e6ed21361c"}, + {file = "charset_normalizer-3.4.3-py3-none-any.whl", hash = "sha256:ce571ab16d890d23b5c278547ba694193a45011ff86a9162a71307ed9f86759a"}, + {file = "charset_normalizer-3.4.3.tar.gz", hash = "sha256:6fce4b8500244f6fcb71465d4a4930d132ba9ab8e71a7859e6a5d59851068d14"}, +] + +[[package]] +name = "click" +version = "8.3.0" +requires_python = ">=3.10" +summary = "Composable command line interface toolkit" +groups = ["default"] +dependencies = [ + "colorama; platform_system == \"Windows\"", +] +files = [ + {file = "click-8.3.0-py3-none-any.whl", hash = "sha256:9b9f285302c6e3064f4330c05f05b81945b2a39544279343e6e7c5f27a9baddc"}, + {file = "click-8.3.0.tar.gz", hash = "sha256:e7b8232224eba16f4ebe410c25ced9f7875cb5f3263ffc93cc3e8da705e229c4"}, +] + +[[package]] +name = "colorama" +version = "0.4.6" +requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +summary = "Cross-platform colored terminal text." +groups = ["default", "dev"] +marker = "sys_platform == \"win32\" or platform_system == \"Windows\"" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "contourpy" +version = "1.3.3" +requires_python = ">=3.11" +summary = "Python library for calculating contours of 2D quadrilateral grids" +groups = ["default"] +dependencies = [ + "numpy>=1.25", +] +files = [ + {file = "contourpy-1.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b08a32ea2f8e42cf1d4be3169a98dd4be32bafe4f22b6c4cb4ba810fa9e5d2cb"}, + {file = "contourpy-1.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:556dba8fb6f5d8742f2923fe9457dbdd51e1049c4a43fd3986a0b14a1d815fc6"}, + {file = "contourpy-1.3.3-cp312-cp312-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92d9abc807cf7d0e047b95ca5d957cf4792fcd04e920ca70d48add15c1a90ea7"}, + {file = "contourpy-1.3.3-cp312-cp312-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:b2e8faa0ed68cb29af51edd8e24798bb661eac3bd9f65420c1887b6ca89987c8"}, + {file = "contourpy-1.3.3-cp312-cp312-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:626d60935cf668e70a5ce6ff184fd713e9683fb458898e4249b63be9e28286ea"}, + {file = "contourpy-1.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4d00e655fcef08aba35ec9610536bfe90267d7ab5ba944f7032549c55a146da1"}, + {file = "contourpy-1.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:451e71b5a7d597379ef572de31eeb909a87246974d960049a9848c3bc6c41bf7"}, + {file = "contourpy-1.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:459c1f020cd59fcfe6650180678a9993932d80d44ccde1fa1868977438f0b411"}, + {file = "contourpy-1.3.3-cp312-cp312-win32.whl", hash = "sha256:023b44101dfe49d7d53932be418477dba359649246075c996866106da069af69"}, + {file = "contourpy-1.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:8153b8bfc11e1e4d75bcb0bff1db232f9e10b274e0929de9d608027e0d34ff8b"}, + {file = "contourpy-1.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:07ce5ed73ecdc4a03ffe3e1b3e3c1166db35ae7584be76f65dbbe28a7791b0cc"}, + {file = "contourpy-1.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:177fb367556747a686509d6fef71d221a4b198a3905fe824430e5ea0fda54eb5"}, + {file = "contourpy-1.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d002b6f00d73d69333dac9d0b8d5e84d9724ff9ef044fd63c5986e62b7c9e1b1"}, + {file = "contourpy-1.3.3-cp313-cp313-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:348ac1f5d4f1d66d3322420f01d42e43122f43616e0f194fc1c9f5d830c5b286"}, + {file = "contourpy-1.3.3-cp313-cp313-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:655456777ff65c2c548b7c454af9c6f33f16c8884f11083244b5819cc214f1b5"}, + {file = "contourpy-1.3.3-cp313-cp313-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:644a6853d15b2512d67881586bd03f462c7ab755db95f16f14d7e238f2852c67"}, + {file = "contourpy-1.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4debd64f124ca62069f313a9cb86656ff087786016d76927ae2cf37846b006c9"}, + {file = "contourpy-1.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a15459b0f4615b00bbd1e91f1b9e19b7e63aea7483d03d804186f278c0af2659"}, + {file = "contourpy-1.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca0fdcd73925568ca027e0b17ab07aad764be4706d0a925b89227e447d9737b7"}, + {file = "contourpy-1.3.3-cp313-cp313-win32.whl", hash = "sha256:b20c7c9a3bf701366556e1b1984ed2d0cedf999903c51311417cf5f591d8c78d"}, + {file = "contourpy-1.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:1cadd8b8969f060ba45ed7c1b714fe69185812ab43bd6b86a9123fe8f99c3263"}, + {file = "contourpy-1.3.3-cp313-cp313-win_arm64.whl", hash = "sha256:fd914713266421b7536de2bfa8181aa8c699432b6763a0ea64195ebe28bff6a9"}, + {file = "contourpy-1.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:88df9880d507169449d434c293467418b9f6cbe82edd19284aa0409e7fdb933d"}, + {file = "contourpy-1.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:d06bb1f751ba5d417047db62bca3c8fde202b8c11fb50742ab3ab962c81e8216"}, + {file = "contourpy-1.3.3-cp313-cp313t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e4e6b05a45525357e382909a4c1600444e2a45b4795163d3b22669285591c1ae"}, + {file = "contourpy-1.3.3-cp313-cp313t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ab3074b48c4e2cf1a960e6bbeb7f04566bf36b1861d5c9d4d8ac04b82e38ba20"}, + {file = "contourpy-1.3.3-cp313-cp313t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:6c3d53c796f8647d6deb1abe867daeb66dcc8a97e8455efa729516b997b8ed99"}, + {file = "contourpy-1.3.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:50ed930df7289ff2a8d7afeb9603f8289e5704755c7e5c3bbd929c90c817164b"}, + {file = "contourpy-1.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:4feffb6537d64b84877da813a5c30f1422ea5739566abf0bd18065ac040e120a"}, + {file = "contourpy-1.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:2b7e9480ffe2b0cd2e787e4df64270e3a0440d9db8dc823312e2c940c167df7e"}, + {file = "contourpy-1.3.3-cp313-cp313t-win32.whl", hash = "sha256:283edd842a01e3dcd435b1c5116798d661378d83d36d337b8dde1d16a5fc9ba3"}, + {file = "contourpy-1.3.3-cp313-cp313t-win_amd64.whl", hash = "sha256:87acf5963fc2b34825e5b6b048f40e3635dd547f590b04d2ab317c2619ef7ae8"}, + {file = "contourpy-1.3.3-cp313-cp313t-win_arm64.whl", hash = "sha256:3c30273eb2a55024ff31ba7d052dde990d7d8e5450f4bbb6e913558b3d6c2301"}, + {file = "contourpy-1.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fde6c716d51c04b1c25d0b90364d0be954624a0ee9d60e23e850e8d48353d07a"}, + {file = "contourpy-1.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:cbedb772ed74ff5be440fa8eee9bd49f64f6e3fc09436d9c7d8f1c287b121d77"}, + {file = "contourpy-1.3.3-cp314-cp314-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:22e9b1bd7a9b1d652cd77388465dc358dafcd2e217d35552424aa4f996f524f5"}, + {file = "contourpy-1.3.3-cp314-cp314-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a22738912262aa3e254e4f3cb079a95a67132fc5a063890e224393596902f5a4"}, + {file = "contourpy-1.3.3-cp314-cp314-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:afe5a512f31ee6bd7d0dda52ec9864c984ca3d66664444f2d72e0dc4eb832e36"}, + {file = "contourpy-1.3.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f64836de09927cba6f79dcd00fdd7d5329f3fccc633468507079c829ca4db4e3"}, + {file = "contourpy-1.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:1fd43c3be4c8e5fd6e4f2baeae35ae18176cf2e5cced681cca908addf1cdd53b"}, + {file = "contourpy-1.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6afc576f7b33cf00996e5c1102dc2a8f7cc89e39c0b55df93a0b78c1bd992b36"}, + {file = "contourpy-1.3.3-cp314-cp314-win32.whl", hash = "sha256:66c8a43a4f7b8df8b71ee1840e4211a3c8d93b214b213f590e18a1beca458f7d"}, + {file = "contourpy-1.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:cf9022ef053f2694e31d630feaacb21ea24224be1c3ad0520b13d844274614fd"}, + {file = "contourpy-1.3.3-cp314-cp314-win_arm64.whl", hash = "sha256:95b181891b4c71de4bb404c6621e7e2390745f887f2a026b2d99e92c17892339"}, + {file = "contourpy-1.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:33c82d0138c0a062380332c861387650c82e4cf1747aaa6938b9b6516762e772"}, + {file = "contourpy-1.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ea37e7b45949df430fe649e5de8351c423430046a2af20b1c1961cae3afcda77"}, + {file = "contourpy-1.3.3-cp314-cp314t-manylinux_2_26_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d304906ecc71672e9c89e87c4675dc5c2645e1f4269a5063b99b0bb29f232d13"}, + {file = "contourpy-1.3.3-cp314-cp314t-manylinux_2_26_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:ca658cd1a680a5c9ea96dc61cdbae1e85c8f25849843aa799dfd3cb370ad4fbe"}, + {file = "contourpy-1.3.3-cp314-cp314t-manylinux_2_26_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ab2fd90904c503739a75b7c8c5c01160130ba67944a7b77bbf36ef8054576e7f"}, + {file = "contourpy-1.3.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b7301b89040075c30e5768810bc96a8e8d78085b47d8be6e4c3f5a0b4ed478a0"}, + {file = "contourpy-1.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:2a2a8b627d5cc6b7c41a4beff6c5ad5eb848c88255fda4a8745f7e901b32d8e4"}, + {file = "contourpy-1.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:fd6ec6be509c787f1caf6b247f0b1ca598bef13f4ddeaa126b7658215529ba0f"}, + {file = "contourpy-1.3.3-cp314-cp314t-win32.whl", hash = "sha256:e74a9a0f5e3fff48fb5a7f2fd2b9b70a3fe014a67522f79b7cca4c0c7e43c9ae"}, + {file = "contourpy-1.3.3-cp314-cp314t-win_amd64.whl", hash = "sha256:13b68d6a62db8eafaebb8039218921399baf6e47bf85006fd8529f2a08ef33fc"}, + {file = "contourpy-1.3.3-cp314-cp314t-win_arm64.whl", hash = "sha256:b7448cb5a725bb1e35ce88771b86fba35ef418952474492cf7c764059933ff8b"}, + {file = "contourpy-1.3.3.tar.gz", hash = "sha256:083e12155b210502d0bca491432bb04d56dc3432f95a979b429f2848c3dbe880"}, +] + +[[package]] +name = "coverage" +version = "7.10.7" +requires_python = ">=3.9" +summary = "Code coverage measurement for Python" +groups = ["dev"] +files = [ + {file = "coverage-7.10.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7bb3b9ddb87ef7725056572368040c32775036472d5a033679d1fa6c8dc08417"}, + {file = "coverage-7.10.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:18afb24843cbc175687225cab1138c95d262337f5473512010e46831aa0c2973"}, + {file = "coverage-7.10.7-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:399a0b6347bcd3822be369392932884b8216d0944049ae22925631a9b3d4ba4c"}, + {file = "coverage-7.10.7-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:314f2c326ded3f4b09be11bc282eb2fc861184bc95748ae67b360ac962770be7"}, + {file = "coverage-7.10.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c41e71c9cfb854789dee6fc51e46743a6d138b1803fab6cb860af43265b42ea6"}, + {file = "coverage-7.10.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc01f57ca26269c2c706e838f6422e2a8788e41b3e3c65e2f41148212e57cd59"}, + {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a6442c59a8ac8b85812ce33bc4d05bde3fb22321fa8294e2a5b487c3505f611b"}, + {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:78a384e49f46b80fb4c901d52d92abe098e78768ed829c673fbb53c498bef73a"}, + {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:5e1e9802121405ede4b0133aa4340ad8186a1d2526de5b7c3eca519db7bb89fb"}, + {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d41213ea25a86f69efd1575073d34ea11aabe075604ddf3d148ecfec9e1e96a1"}, + {file = "coverage-7.10.7-cp312-cp312-win32.whl", hash = "sha256:77eb4c747061a6af8d0f7bdb31f1e108d172762ef579166ec84542f711d90256"}, + {file = "coverage-7.10.7-cp312-cp312-win_amd64.whl", hash = "sha256:f51328ffe987aecf6d09f3cd9d979face89a617eacdaea43e7b3080777f647ba"}, + {file = "coverage-7.10.7-cp312-cp312-win_arm64.whl", hash = "sha256:bda5e34f8a75721c96085903c6f2197dc398c20ffd98df33f866a9c8fd95f4bf"}, + {file = "coverage-7.10.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:981a651f543f2854abd3b5fcb3263aac581b18209be49863ba575de6edf4c14d"}, + {file = "coverage-7.10.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:73ab1601f84dc804f7812dc297e93cd99381162da39c47040a827d4e8dafe63b"}, + {file = "coverage-7.10.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a8b6f03672aa6734e700bbcd65ff050fd19cddfec4b031cc8cf1c6967de5a68e"}, + {file = "coverage-7.10.7-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10b6ba00ab1132a0ce4428ff68cf50a25efd6840a42cdf4239c9b99aad83be8b"}, + {file = "coverage-7.10.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c79124f70465a150e89340de5963f936ee97097d2ef76c869708c4248c63ca49"}, + {file = "coverage-7.10.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:69212fbccdbd5b0e39eac4067e20a4a5256609e209547d86f740d68ad4f04911"}, + {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7ea7c6c9d0d286d04ed3541747e6597cbe4971f22648b68248f7ddcd329207f0"}, + {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b9be91986841a75042b3e3243d0b3cb0b2434252b977baaf0cd56e960fe1e46f"}, + {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b281d5eca50189325cfe1f365fafade89b14b4a78d9b40b05ddd1fc7d2a10a9c"}, + {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:99e4aa63097ab1118e75a848a28e40d68b08a5e19ce587891ab7fd04475e780f"}, + {file = "coverage-7.10.7-cp313-cp313-win32.whl", hash = "sha256:dc7c389dce432500273eaf48f410b37886be9208b2dd5710aaf7c57fd442c698"}, + {file = "coverage-7.10.7-cp313-cp313-win_amd64.whl", hash = "sha256:cac0fdca17b036af3881a9d2729a850b76553f3f716ccb0360ad4dbc06b3b843"}, + {file = "coverage-7.10.7-cp313-cp313-win_arm64.whl", hash = "sha256:4b6f236edf6e2f9ae8fcd1332da4e791c1b6ba0dc16a2dc94590ceccb482e546"}, + {file = "coverage-7.10.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0ec07fd264d0745ee396b666d47cef20875f4ff2375d7c4f58235886cc1ef0c"}, + {file = "coverage-7.10.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd5e856ebb7bfb7672b0086846db5afb4567a7b9714b8a0ebafd211ec7ce6a15"}, + {file = "coverage-7.10.7-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f57b2a3c8353d3e04acf75b3fed57ba41f5c0646bbf1d10c7c282291c97936b4"}, + {file = "coverage-7.10.7-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ef2319dd15a0b009667301a3f84452a4dc6fddfd06b0c5c53ea472d3989fbf0"}, + {file = "coverage-7.10.7-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83082a57783239717ceb0ad584de3c69cf581b2a95ed6bf81ea66034f00401c0"}, + {file = "coverage-7.10.7-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:50aa94fb1fb9a397eaa19c0d5ec15a5edd03a47bf1a3a6111a16b36e190cff65"}, + {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2120043f147bebb41c85b97ac45dd173595ff14f2a584f2963891cbcc3091541"}, + {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2fafd773231dd0378fdba66d339f84904a8e57a262f583530f4f156ab83863e6"}, + {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:0b944ee8459f515f28b851728ad224fa2d068f1513ef6b7ff1efafeb2185f999"}, + {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4b583b97ab2e3efe1b3e75248a9b333bd3f8b0b1b8e5b45578e05e5850dfb2c2"}, + {file = "coverage-7.10.7-cp313-cp313t-win32.whl", hash = "sha256:2a78cd46550081a7909b3329e2266204d584866e8d97b898cd7fb5ac8d888b1a"}, + {file = "coverage-7.10.7-cp313-cp313t-win_amd64.whl", hash = "sha256:33a5e6396ab684cb43dc7befa386258acb2d7fae7f67330ebb85ba4ea27938eb"}, + {file = "coverage-7.10.7-cp313-cp313t-win_arm64.whl", hash = "sha256:86b0e7308289ddde73d863b7683f596d8d21c7d8664ce1dee061d0bcf3fbb4bb"}, + {file = "coverage-7.10.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b06f260b16ead11643a5a9f955bd4b5fd76c1a4c6796aeade8520095b75de520"}, + {file = "coverage-7.10.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:212f8f2e0612778f09c55dd4872cb1f64a1f2b074393d139278ce902064d5b32"}, + {file = "coverage-7.10.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3445258bcded7d4aa630ab8296dea4d3f15a255588dd535f980c193ab6b95f3f"}, + {file = "coverage-7.10.7-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb45474711ba385c46a0bfe696c695a929ae69ac636cda8f532be9e8c93d720a"}, + {file = "coverage-7.10.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:813922f35bd800dca9994c5971883cbc0d291128a5de6b167c7aa697fcf59360"}, + {file = "coverage-7.10.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c1b03552081b2a4423091d6fb3787265b8f86af404cff98d1b5342713bdd69"}, + {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cc87dd1b6eaf0b848eebb1c86469b9f72a1891cb42ac7adcfbce75eadb13dd14"}, + {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:39508ffda4f343c35f3236fe8d1a6634a51f4581226a1262769d7f970e73bffe"}, + {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:925a1edf3d810537c5a3abe78ec5530160c5f9a26b1f4270b40e62cc79304a1e"}, + {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2c8b9a0636f94c43cd3576811e05b89aa9bc2d0a85137affc544ae5cb0e4bfbd"}, + {file = "coverage-7.10.7-cp314-cp314-win32.whl", hash = "sha256:b7b8288eb7cdd268b0304632da8cb0bb93fadcfec2fe5712f7b9cc8f4d487be2"}, + {file = "coverage-7.10.7-cp314-cp314-win_amd64.whl", hash = "sha256:1ca6db7c8807fb9e755d0379ccc39017ce0a84dcd26d14b5a03b78563776f681"}, + {file = "coverage-7.10.7-cp314-cp314-win_arm64.whl", hash = "sha256:097c1591f5af4496226d5783d036bf6fd6cd0cbc132e071b33861de756efb880"}, + {file = "coverage-7.10.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a62c6ef0d50e6de320c270ff91d9dd0a05e7250cac2a800b7784bae474506e63"}, + {file = "coverage-7.10.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9fa6e4dd51fe15d8738708a973470f67a855ca50002294852e9571cdbd9433f2"}, + {file = "coverage-7.10.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8fb190658865565c549b6b4706856d6a7b09302c797eb2cf8e7fe9dabb043f0d"}, + {file = "coverage-7.10.7-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:affef7c76a9ef259187ef31599a9260330e0335a3011732c4b9effa01e1cd6e0"}, + {file = "coverage-7.10.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e16e07d85ca0cf8bafe5f5d23a0b850064e8e945d5677492b06bbe6f09cc699"}, + {file = "coverage-7.10.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:03ffc58aacdf65d2a82bbeb1ffe4d01ead4017a21bfd0454983b88ca73af94b9"}, + {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1b4fd784344d4e52647fd7857b2af5b3fbe6c239b0b5fa63e94eb67320770e0f"}, + {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0ebbaddb2c19b71912c6f2518e791aa8b9f054985a0769bdb3a53ebbc765c6a1"}, + {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a2d9a3b260cc1d1dbdb1c582e63ddcf5363426a1a68faa0f5da28d8ee3c722a0"}, + {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a3cc8638b2480865eaa3926d192e64ce6c51e3d29c849e09d5b4ad95efae5399"}, + {file = "coverage-7.10.7-cp314-cp314t-win32.whl", hash = "sha256:67f8c5cbcd3deb7a60b3345dffc89a961a484ed0af1f6f73de91705cc6e31235"}, + {file = "coverage-7.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e1ed71194ef6dea7ed2d5cb5f7243d4bcd334bfb63e59878519be558078f848d"}, + {file = "coverage-7.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7fe650342addd8524ca63d77b2362b02345e5f1a093266787d210c70a50b471a"}, + {file = "coverage-7.10.7-py3-none-any.whl", hash = "sha256:f7941f6f2fe6dd6807a1208737b8a0cbcf1cc6d7b07d24998ad2d63590868260"}, + {file = "coverage-7.10.7.tar.gz", hash = "sha256:f4ab143ab113be368a3e9b795f9cd7906c5ef407d6173fe9675a902e1fffc239"}, +] + +[[package]] +name = "coverage" +version = "7.10.7" +extras = ["toml"] +requires_python = ">=3.9" +summary = "Code coverage measurement for Python" +groups = ["dev"] +dependencies = [ + "coverage==7.10.7", + "tomli; python_full_version <= \"3.11.0a6\"", +] +files = [ + {file = "coverage-7.10.7-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7bb3b9ddb87ef7725056572368040c32775036472d5a033679d1fa6c8dc08417"}, + {file = "coverage-7.10.7-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:18afb24843cbc175687225cab1138c95d262337f5473512010e46831aa0c2973"}, + {file = "coverage-7.10.7-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:399a0b6347bcd3822be369392932884b8216d0944049ae22925631a9b3d4ba4c"}, + {file = "coverage-7.10.7-cp312-cp312-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:314f2c326ded3f4b09be11bc282eb2fc861184bc95748ae67b360ac962770be7"}, + {file = "coverage-7.10.7-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c41e71c9cfb854789dee6fc51e46743a6d138b1803fab6cb860af43265b42ea6"}, + {file = "coverage-7.10.7-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:bc01f57ca26269c2c706e838f6422e2a8788e41b3e3c65e2f41148212e57cd59"}, + {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a6442c59a8ac8b85812ce33bc4d05bde3fb22321fa8294e2a5b487c3505f611b"}, + {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:78a384e49f46b80fb4c901d52d92abe098e78768ed829c673fbb53c498bef73a"}, + {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:5e1e9802121405ede4b0133aa4340ad8186a1d2526de5b7c3eca519db7bb89fb"}, + {file = "coverage-7.10.7-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:d41213ea25a86f69efd1575073d34ea11aabe075604ddf3d148ecfec9e1e96a1"}, + {file = "coverage-7.10.7-cp312-cp312-win32.whl", hash = "sha256:77eb4c747061a6af8d0f7bdb31f1e108d172762ef579166ec84542f711d90256"}, + {file = "coverage-7.10.7-cp312-cp312-win_amd64.whl", hash = "sha256:f51328ffe987aecf6d09f3cd9d979face89a617eacdaea43e7b3080777f647ba"}, + {file = "coverage-7.10.7-cp312-cp312-win_arm64.whl", hash = "sha256:bda5e34f8a75721c96085903c6f2197dc398c20ffd98df33f866a9c8fd95f4bf"}, + {file = "coverage-7.10.7-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:981a651f543f2854abd3b5fcb3263aac581b18209be49863ba575de6edf4c14d"}, + {file = "coverage-7.10.7-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:73ab1601f84dc804f7812dc297e93cd99381162da39c47040a827d4e8dafe63b"}, + {file = "coverage-7.10.7-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:a8b6f03672aa6734e700bbcd65ff050fd19cddfec4b031cc8cf1c6967de5a68e"}, + {file = "coverage-7.10.7-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:10b6ba00ab1132a0ce4428ff68cf50a25efd6840a42cdf4239c9b99aad83be8b"}, + {file = "coverage-7.10.7-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c79124f70465a150e89340de5963f936ee97097d2ef76c869708c4248c63ca49"}, + {file = "coverage-7.10.7-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:69212fbccdbd5b0e39eac4067e20a4a5256609e209547d86f740d68ad4f04911"}, + {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:7ea7c6c9d0d286d04ed3541747e6597cbe4971f22648b68248f7ddcd329207f0"}, + {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b9be91986841a75042b3e3243d0b3cb0b2434252b977baaf0cd56e960fe1e46f"}, + {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:b281d5eca50189325cfe1f365fafade89b14b4a78d9b40b05ddd1fc7d2a10a9c"}, + {file = "coverage-7.10.7-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:99e4aa63097ab1118e75a848a28e40d68b08a5e19ce587891ab7fd04475e780f"}, + {file = "coverage-7.10.7-cp313-cp313-win32.whl", hash = "sha256:dc7c389dce432500273eaf48f410b37886be9208b2dd5710aaf7c57fd442c698"}, + {file = "coverage-7.10.7-cp313-cp313-win_amd64.whl", hash = "sha256:cac0fdca17b036af3881a9d2729a850b76553f3f716ccb0360ad4dbc06b3b843"}, + {file = "coverage-7.10.7-cp313-cp313-win_arm64.whl", hash = "sha256:4b6f236edf6e2f9ae8fcd1332da4e791c1b6ba0dc16a2dc94590ceccb482e546"}, + {file = "coverage-7.10.7-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:a0ec07fd264d0745ee396b666d47cef20875f4ff2375d7c4f58235886cc1ef0c"}, + {file = "coverage-7.10.7-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:dd5e856ebb7bfb7672b0086846db5afb4567a7b9714b8a0ebafd211ec7ce6a15"}, + {file = "coverage-7.10.7-cp313-cp313t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f57b2a3c8353d3e04acf75b3fed57ba41f5c0646bbf1d10c7c282291c97936b4"}, + {file = "coverage-7.10.7-cp313-cp313t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:1ef2319dd15a0b009667301a3f84452a4dc6fddfd06b0c5c53ea472d3989fbf0"}, + {file = "coverage-7.10.7-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:83082a57783239717ceb0ad584de3c69cf581b2a95ed6bf81ea66034f00401c0"}, + {file = "coverage-7.10.7-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:50aa94fb1fb9a397eaa19c0d5ec15a5edd03a47bf1a3a6111a16b36e190cff65"}, + {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2120043f147bebb41c85b97ac45dd173595ff14f2a584f2963891cbcc3091541"}, + {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:2fafd773231dd0378fdba66d339f84904a8e57a262f583530f4f156ab83863e6"}, + {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:0b944ee8459f515f28b851728ad224fa2d068f1513ef6b7ff1efafeb2185f999"}, + {file = "coverage-7.10.7-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4b583b97ab2e3efe1b3e75248a9b333bd3f8b0b1b8e5b45578e05e5850dfb2c2"}, + {file = "coverage-7.10.7-cp313-cp313t-win32.whl", hash = "sha256:2a78cd46550081a7909b3329e2266204d584866e8d97b898cd7fb5ac8d888b1a"}, + {file = "coverage-7.10.7-cp313-cp313t-win_amd64.whl", hash = "sha256:33a5e6396ab684cb43dc7befa386258acb2d7fae7f67330ebb85ba4ea27938eb"}, + {file = "coverage-7.10.7-cp313-cp313t-win_arm64.whl", hash = "sha256:86b0e7308289ddde73d863b7683f596d8d21c7d8664ce1dee061d0bcf3fbb4bb"}, + {file = "coverage-7.10.7-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:b06f260b16ead11643a5a9f955bd4b5fd76c1a4c6796aeade8520095b75de520"}, + {file = "coverage-7.10.7-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:212f8f2e0612778f09c55dd4872cb1f64a1f2b074393d139278ce902064d5b32"}, + {file = "coverage-7.10.7-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3445258bcded7d4aa630ab8296dea4d3f15a255588dd535f980c193ab6b95f3f"}, + {file = "coverage-7.10.7-cp314-cp314-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:bb45474711ba385c46a0bfe696c695a929ae69ac636cda8f532be9e8c93d720a"}, + {file = "coverage-7.10.7-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:813922f35bd800dca9994c5971883cbc0d291128a5de6b167c7aa697fcf59360"}, + {file = "coverage-7.10.7-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:93c1b03552081b2a4423091d6fb3787265b8f86af404cff98d1b5342713bdd69"}, + {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:cc87dd1b6eaf0b848eebb1c86469b9f72a1891cb42ac7adcfbce75eadb13dd14"}, + {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:39508ffda4f343c35f3236fe8d1a6634a51f4581226a1262769d7f970e73bffe"}, + {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:925a1edf3d810537c5a3abe78ec5530160c5f9a26b1f4270b40e62cc79304a1e"}, + {file = "coverage-7.10.7-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2c8b9a0636f94c43cd3576811e05b89aa9bc2d0a85137affc544ae5cb0e4bfbd"}, + {file = "coverage-7.10.7-cp314-cp314-win32.whl", hash = "sha256:b7b8288eb7cdd268b0304632da8cb0bb93fadcfec2fe5712f7b9cc8f4d487be2"}, + {file = "coverage-7.10.7-cp314-cp314-win_amd64.whl", hash = "sha256:1ca6db7c8807fb9e755d0379ccc39017ce0a84dcd26d14b5a03b78563776f681"}, + {file = "coverage-7.10.7-cp314-cp314-win_arm64.whl", hash = "sha256:097c1591f5af4496226d5783d036bf6fd6cd0cbc132e071b33861de756efb880"}, + {file = "coverage-7.10.7-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:a62c6ef0d50e6de320c270ff91d9dd0a05e7250cac2a800b7784bae474506e63"}, + {file = "coverage-7.10.7-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:9fa6e4dd51fe15d8738708a973470f67a855ca50002294852e9571cdbd9433f2"}, + {file = "coverage-7.10.7-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:8fb190658865565c549b6b4706856d6a7b09302c797eb2cf8e7fe9dabb043f0d"}, + {file = "coverage-7.10.7-cp314-cp314t-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:affef7c76a9ef259187ef31599a9260330e0335a3011732c4b9effa01e1cd6e0"}, + {file = "coverage-7.10.7-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6e16e07d85ca0cf8bafe5f5d23a0b850064e8e945d5677492b06bbe6f09cc699"}, + {file = "coverage-7.10.7-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:03ffc58aacdf65d2a82bbeb1ffe4d01ead4017a21bfd0454983b88ca73af94b9"}, + {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:1b4fd784344d4e52647fd7857b2af5b3fbe6c239b0b5fa63e94eb67320770e0f"}, + {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:0ebbaddb2c19b71912c6f2518e791aa8b9f054985a0769bdb3a53ebbc765c6a1"}, + {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:a2d9a3b260cc1d1dbdb1c582e63ddcf5363426a1a68faa0f5da28d8ee3c722a0"}, + {file = "coverage-7.10.7-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a3cc8638b2480865eaa3926d192e64ce6c51e3d29c849e09d5b4ad95efae5399"}, + {file = "coverage-7.10.7-cp314-cp314t-win32.whl", hash = "sha256:67f8c5cbcd3deb7a60b3345dffc89a961a484ed0af1f6f73de91705cc6e31235"}, + {file = "coverage-7.10.7-cp314-cp314t-win_amd64.whl", hash = "sha256:e1ed71194ef6dea7ed2d5cb5f7243d4bcd334bfb63e59878519be558078f848d"}, + {file = "coverage-7.10.7-cp314-cp314t-win_arm64.whl", hash = "sha256:7fe650342addd8524ca63d77b2362b02345e5f1a093266787d210c70a50b471a"}, + {file = "coverage-7.10.7-py3-none-any.whl", hash = "sha256:f7941f6f2fe6dd6807a1208737b8a0cbcf1cc6d7b07d24998ad2d63590868260"}, + {file = "coverage-7.10.7.tar.gz", hash = "sha256:f4ab143ab113be368a3e9b795f9cd7906c5ef407d6173fe9675a902e1fffc239"}, +] + +[[package]] +name = "cycler" +version = "0.12.1" +requires_python = ">=3.8" +summary = "Composable style cycles" +groups = ["default"] +files = [ + {file = "cycler-0.12.1-py3-none-any.whl", hash = "sha256:85cef7cff222d8644161529808465972e51340599459b8ac3ccbac5a854e0d30"}, + {file = "cycler-0.12.1.tar.gz", hash = "sha256:88bb128f02ba341da8ef447245a9e138fae777f6a23943da4540077d3601eb1c"}, +] + +[[package]] +name = "distlib" +version = "0.4.0" +summary = "Distribution utilities" +groups = ["dev"] +files = [ + {file = "distlib-0.4.0-py2.py3-none-any.whl", hash = "sha256:9659f7d87e46584a30b5780e43ac7a2143098441670ff0a49d5f9034c54a6c16"}, + {file = "distlib-0.4.0.tar.gz", hash = "sha256:feec40075be03a04501a973d81f633735b4b69f98b05450592310c0f401a4e0d"}, +] + +[[package]] +name = "durationpy" +version = "0.10" +summary = "Module for converting between datetime.timedelta and Go's Duration strings." +groups = ["default"] +files = [ + {file = "durationpy-0.10-py3-none-any.whl", hash = "sha256:3b41e1b601234296b4fb368338fdcd3e13e0b4fb5b67345948f4f2bf9868b286"}, + {file = "durationpy-0.10.tar.gz", hash = "sha256:1fa6893409a6e739c9c72334fc65cca1f355dbdd93405d30f726deb5bde42fba"}, +] + +[[package]] +name = "filelock" +version = "3.19.1" +requires_python = ">=3.9" +summary = "A platform independent file lock." +groups = ["default", "dev"] +files = [ + {file = "filelock-3.19.1-py3-none-any.whl", hash = "sha256:d38e30481def20772f5baf097c122c3babc4fcdb7e14e57049eb9d88c6dc017d"}, + {file = "filelock-3.19.1.tar.gz", hash = "sha256:66eda1888b0171c998b35be2bcc0f6d75c388a7ce20c3f3f37aa8e96c2dddf58"}, +] + +[[package]] +name = "fonttools" +version = "4.60.1" +requires_python = ">=3.9" +summary = "Tools to manipulate font files" +groups = ["default"] +files = [ + {file = "fonttools-4.60.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:7b0c6d57ab00dae9529f3faf187f2254ea0aa1e04215cf2f1a8ec277c96661bc"}, + {file = "fonttools-4.60.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:839565cbf14645952d933853e8ade66a463684ed6ed6c9345d0faf1f0e868877"}, + {file = "fonttools-4.60.1-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8177ec9676ea6e1793c8a084a90b65a9f778771998eb919d05db6d4b1c0b114c"}, + {file = "fonttools-4.60.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:996a4d1834524adbb423385d5a629b868ef9d774670856c63c9a0408a3063401"}, + {file = "fonttools-4.60.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a46b2f450bc79e06ef3b6394f0c68660529ed51692606ad7f953fc2e448bc903"}, + {file = "fonttools-4.60.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6ec722ee589e89a89f5b7574f5c45604030aa6ae24cb2c751e2707193b466fed"}, + {file = "fonttools-4.60.1-cp312-cp312-win32.whl", hash = "sha256:b2cf105cee600d2de04ca3cfa1f74f1127f8455b71dbad02b9da6ec266e116d6"}, + {file = "fonttools-4.60.1-cp312-cp312-win_amd64.whl", hash = "sha256:992775c9fbe2cf794786fa0ffca7f09f564ba3499b8fe9f2f80bd7197db60383"}, + {file = "fonttools-4.60.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:6f68576bb4bbf6060c7ab047b1574a1ebe5c50a17de62830079967b211059ebb"}, + {file = "fonttools-4.60.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:eedacb5c5d22b7097482fa834bda0dafa3d914a4e829ec83cdea2a01f8c813c4"}, + {file = "fonttools-4.60.1-cp313-cp313-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:b33a7884fabd72bdf5f910d0cf46be50dce86a0362a65cfc746a4168c67eb96c"}, + {file = "fonttools-4.60.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2409d5fb7b55fd70f715e6d34e7a6e4f7511b8ad29a49d6df225ee76da76dd77"}, + {file = "fonttools-4.60.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:c8651e0d4b3bdeda6602b85fdc2abbefc1b41e573ecb37b6779c4ca50753a199"}, + {file = "fonttools-4.60.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:145daa14bf24824b677b9357c5e44fd8895c2a8f53596e1b9ea3496081dc692c"}, + {file = "fonttools-4.60.1-cp313-cp313-win32.whl", hash = "sha256:2299df884c11162617a66b7c316957d74a18e3758c0274762d2cc87df7bc0272"}, + {file = "fonttools-4.60.1-cp313-cp313-win_amd64.whl", hash = "sha256:a3db56f153bd4c5c2b619ab02c5db5192e222150ce5a1bc10f16164714bc39ac"}, + {file = "fonttools-4.60.1-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:a884aef09d45ba1206712c7dbda5829562d3fea7726935d3289d343232ecb0d3"}, + {file = "fonttools-4.60.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8a44788d9d91df72d1a5eac49b31aeb887a5f4aab761b4cffc4196c74907ea85"}, + {file = "fonttools-4.60.1-cp314-cp314-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:e852d9dda9f93ad3651ae1e3bb770eac544ec93c3807888798eccddf84596537"}, + {file = "fonttools-4.60.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:154cb6ee417e417bf5f7c42fe25858c9140c26f647c7347c06f0cc2d47eff003"}, + {file = "fonttools-4.60.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:5664fd1a9ea7f244487ac8f10340c4e37664675e8667d6fee420766e0fb3cf08"}, + {file = "fonttools-4.60.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:583b7f8e3c49486e4d489ad1deacfb8d5be54a8ef34d6df824f6a171f8511d99"}, + {file = "fonttools-4.60.1-cp314-cp314-win32.whl", hash = "sha256:66929e2ea2810c6533a5184f938502cfdaea4bc3efb7130d8cc02e1c1b4108d6"}, + {file = "fonttools-4.60.1-cp314-cp314-win_amd64.whl", hash = "sha256:f3d5be054c461d6a2268831f04091dc82753176f6ea06dc6047a5e168265a987"}, + {file = "fonttools-4.60.1-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:b6379e7546ba4ae4b18f8ae2b9bc5960936007a1c0e30b342f662577e8bc3299"}, + {file = "fonttools-4.60.1-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9d0ced62b59e0430b3690dbc5373df1c2aa7585e9a8ce38eff87f0fd993c5b01"}, + {file = "fonttools-4.60.1-cp314-cp314t-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:875cb7764708b3132637f6c5fb385b16eeba0f7ac9fa45a69d35e09b47045801"}, + {file = "fonttools-4.60.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a184b2ea57b13680ab6d5fbde99ccef152c95c06746cb7718c583abd8f945ccc"}, + {file = "fonttools-4.60.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:026290e4ec76583881763fac284aca67365e0be9f13a7fb137257096114cb3bc"}, + {file = "fonttools-4.60.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f0e8817c7d1a0c2eedebf57ef9a9896f3ea23324769a9a2061a80fe8852705ed"}, + {file = "fonttools-4.60.1-cp314-cp314t-win32.whl", hash = "sha256:1410155d0e764a4615774e5c2c6fc516259fe3eca5882f034eb9bfdbee056259"}, + {file = "fonttools-4.60.1-cp314-cp314t-win_amd64.whl", hash = "sha256:022beaea4b73a70295b688f817ddc24ed3e3418b5036ffcd5658141184ef0d0c"}, + {file = "fonttools-4.60.1-py3-none-any.whl", hash = "sha256:906306ac7afe2156fcf0042173d6ebbb05416af70f6b370967b47f8f00103bbb"}, + {file = "fonttools-4.60.1.tar.gz", hash = "sha256:ef00af0439ebfee806b25f24c8f92109157ff3fac5731dc7867957812e87b8d9"}, +] + +[[package]] +name = "frozenlist" +version = "1.7.0" +requires_python = ">=3.9" +summary = "A list-like structure which implements collections.abc.MutableSequence" +groups = ["default"] +files = [ + {file = "frozenlist-1.7.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:3dbf9952c4bb0e90e98aec1bd992b3318685005702656bc6f67c1a32b76787f2"}, + {file = "frozenlist-1.7.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:1f5906d3359300b8a9bb194239491122e6cf1444c2efb88865426f170c262cdb"}, + {file = "frozenlist-1.7.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3dabd5a8f84573c8d10d8859a50ea2dec01eea372031929871368c09fa103478"}, + {file = "frozenlist-1.7.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:aa57daa5917f1738064f302bf2626281a1cb01920c32f711fbc7bc36111058a8"}, + {file = "frozenlist-1.7.0-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c193dda2b6d49f4c4398962810fa7d7c78f032bf45572b3e04dd5249dff27e08"}, + {file = "frozenlist-1.7.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bfe2b675cf0aaa6d61bf8fbffd3c274b3c9b7b1623beb3809df8a81399a4a9c4"}, + {file = "frozenlist-1.7.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8fc5d5cda37f62b262405cf9652cf0856839c4be8ee41be0afe8858f17f4c94b"}, + {file = "frozenlist-1.7.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b0d5ce521d1dd7d620198829b87ea002956e4319002ef0bc8d3e6d045cb4646e"}, + {file = "frozenlist-1.7.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:488d0a7d6a0008ca0db273c542098a0fa9e7dfaa7e57f70acef43f32b3f69dca"}, + {file = "frozenlist-1.7.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:15a7eaba63983d22c54d255b854e8108e7e5f3e89f647fc854bd77a237e767df"}, + {file = "frozenlist-1.7.0-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:1eaa7e9c6d15df825bf255649e05bd8a74b04a4d2baa1ae46d9c2d00b2ca2cb5"}, + {file = "frozenlist-1.7.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:e4389e06714cfa9d47ab87f784a7c5be91d3934cd6e9a7b85beef808297cc025"}, + {file = "frozenlist-1.7.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:73bd45e1488c40b63fe5a7df892baf9e2a4d4bb6409a2b3b78ac1c6236178e01"}, + {file = "frozenlist-1.7.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:99886d98e1643269760e5fe0df31e5ae7050788dd288947f7f007209b8c33f08"}, + {file = "frozenlist-1.7.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:290a172aae5a4c278c6da8a96222e6337744cd9c77313efe33d5670b9f65fc43"}, + {file = "frozenlist-1.7.0-cp312-cp312-win32.whl", hash = "sha256:426c7bc70e07cfebc178bc4c2bf2d861d720c4fff172181eeb4a4c41d4ca2ad3"}, + {file = "frozenlist-1.7.0-cp312-cp312-win_amd64.whl", hash = "sha256:563b72efe5da92e02eb68c59cb37205457c977aa7a449ed1b37e6939e5c47c6a"}, + {file = "frozenlist-1.7.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ee80eeda5e2a4e660651370ebffd1286542b67e268aa1ac8d6dbe973120ef7ee"}, + {file = "frozenlist-1.7.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d1a81c85417b914139e3a9b995d4a1c84559afc839a93cf2cb7f15e6e5f6ed2d"}, + {file = "frozenlist-1.7.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:cbb65198a9132ebc334f237d7b0df163e4de83fb4f2bdfe46c1e654bdb0c5d43"}, + {file = "frozenlist-1.7.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dab46c723eeb2c255a64f9dc05b8dd601fde66d6b19cdb82b2e09cc6ff8d8b5d"}, + {file = "frozenlist-1.7.0-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:6aeac207a759d0dedd2e40745575ae32ab30926ff4fa49b1635def65806fddee"}, + {file = "frozenlist-1.7.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bd8c4e58ad14b4fa7802b8be49d47993182fdd4023393899632c88fd8cd994eb"}, + {file = "frozenlist-1.7.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:04fb24d104f425da3540ed83cbfc31388a586a7696142004c577fa61c6298c3f"}, + {file = "frozenlist-1.7.0-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6a5c505156368e4ea6b53b5ac23c92d7edc864537ff911d2fb24c140bb175e60"}, + {file = "frozenlist-1.7.0-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8bd7eb96a675f18aa5c553eb7ddc24a43c8c18f22e1f9925528128c052cdbe00"}, + {file = "frozenlist-1.7.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:05579bf020096fe05a764f1f84cd104a12f78eaab68842d036772dc6d4870b4b"}, + {file = "frozenlist-1.7.0-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:376b6222d114e97eeec13d46c486facd41d4f43bab626b7c3f6a8b4e81a5192c"}, + {file = "frozenlist-1.7.0-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:0aa7e176ebe115379b5b1c95b4096fb1c17cce0847402e227e712c27bdb5a949"}, + {file = "frozenlist-1.7.0-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:3fbba20e662b9c2130dc771e332a99eff5da078b2b2648153a40669a6d0e36ca"}, + {file = "frozenlist-1.7.0-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:f3f4410a0a601d349dd406b5713fec59b4cee7e71678d5b17edda7f4655a940b"}, + {file = "frozenlist-1.7.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:e2cdfaaec6a2f9327bf43c933c0319a7c429058e8537c508964a133dffee412e"}, + {file = "frozenlist-1.7.0-cp313-cp313-win32.whl", hash = "sha256:5fc4df05a6591c7768459caba1b342d9ec23fa16195e744939ba5914596ae3e1"}, + {file = "frozenlist-1.7.0-cp313-cp313-win_amd64.whl", hash = "sha256:52109052b9791a3e6b5d1b65f4b909703984b770694d3eb64fad124c835d7cba"}, + {file = "frozenlist-1.7.0-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:a6f86e4193bb0e235ef6ce3dde5cbabed887e0b11f516ce8a0f4d3b33078ec2d"}, + {file = "frozenlist-1.7.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:82d664628865abeb32d90ae497fb93df398a69bb3434463d172b80fc25b0dd7d"}, + {file = "frozenlist-1.7.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:912a7e8375a1c9a68325a902f3953191b7b292aa3c3fb0d71a216221deca460b"}, + {file = "frozenlist-1.7.0-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9537c2777167488d539bc5de2ad262efc44388230e5118868e172dd4a552b146"}, + {file = "frozenlist-1.7.0-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:f34560fb1b4c3e30ba35fa9a13894ba39e5acfc5f60f57d8accde65f46cc5e74"}, + {file = "frozenlist-1.7.0-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:acd03d224b0175f5a850edc104ac19040d35419eddad04e7cf2d5986d98427f1"}, + {file = "frozenlist-1.7.0-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2038310bc582f3d6a09b3816ab01737d60bf7b1ec70f5356b09e84fb7408ab1"}, + {file = "frozenlist-1.7.0-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b8c05e4c8e5f36e5e088caa1bf78a687528f83c043706640a92cb76cd6999384"}, + {file = "frozenlist-1.7.0-cp313-cp313t-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:765bb588c86e47d0b68f23c1bee323d4b703218037765dcf3f25c838c6fecceb"}, + {file = "frozenlist-1.7.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:32dc2e08c67d86d0969714dd484fd60ff08ff81d1a1e40a77dd34a387e6ebc0c"}, + {file = "frozenlist-1.7.0-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:c0303e597eb5a5321b4de9c68e9845ac8f290d2ab3f3e2c864437d3c5a30cd65"}, + {file = "frozenlist-1.7.0-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:a47f2abb4e29b3a8d0b530f7c3598badc6b134562b1a5caee867f7c62fee51e3"}, + {file = "frozenlist-1.7.0-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:3d688126c242a6fabbd92e02633414d40f50bb6002fa4cf995a1d18051525657"}, + {file = "frozenlist-1.7.0-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:4e7e9652b3d367c7bd449a727dc79d5043f48b88d0cbfd4f9f1060cf2b414104"}, + {file = "frozenlist-1.7.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:1a85e345b4c43db8b842cab1feb41be5cc0b10a1830e6295b69d7310f99becaf"}, + {file = "frozenlist-1.7.0-cp313-cp313t-win32.whl", hash = "sha256:3a14027124ddb70dfcee5148979998066897e79f89f64b13328595c4bdf77c81"}, + {file = "frozenlist-1.7.0-cp313-cp313t-win_amd64.whl", hash = "sha256:3bf8010d71d4507775f658e9823210b7427be36625b387221642725b515dcf3e"}, + {file = "frozenlist-1.7.0-py3-none-any.whl", hash = "sha256:9a5af342e34f7e97caf8c995864c7a396418ae2859cc6fdf1b1073020d516a7e"}, + {file = "frozenlist-1.7.0.tar.gz", hash = "sha256:2e310d81923c2437ea8670467121cc3e9b0f76d3043cc1d2331d56c7fb7a3a8f"}, +] + +[[package]] +name = "fsspec" +version = "2025.9.0" +requires_python = ">=3.9" +summary = "File-system specification" +groups = ["default"] +files = [ + {file = "fsspec-2025.9.0-py3-none-any.whl", hash = "sha256:530dc2a2af60a414a832059574df4a6e10cce927f6f4a78209390fe38955cfb7"}, + {file = "fsspec-2025.9.0.tar.gz", hash = "sha256:19fd429483d25d28b65ec68f9f4adc16c17ea2c7c7bf54ec61360d478fb19c19"}, +] + +[[package]] +name = "gitdb" +version = "4.0.12" +requires_python = ">=3.7" +summary = "Git Object Database" +groups = ["default"] +dependencies = [ + "smmap<6,>=3.0.1", +] +files = [ + {file = "gitdb-4.0.12-py3-none-any.whl", hash = "sha256:67073e15955400952c6565cc3e707c554a4eea2e428946f7a4c162fab9bd9bcf"}, + {file = "gitdb-4.0.12.tar.gz", hash = "sha256:5ef71f855d191a3326fcfbc0d5da835f26b13fbcba60c32c21091c349ffdb571"}, +] + +[[package]] +name = "gitpython" +version = "3.1.45" +requires_python = ">=3.7" +summary = "GitPython is a Python library used to interact with Git repositories" +groups = ["default"] +dependencies = [ + "gitdb<5,>=4.0.1", + "typing-extensions>=3.10.0.2; python_version < \"3.10\"", +] +files = [ + {file = "gitpython-3.1.45-py3-none-any.whl", hash = "sha256:8908cb2e02fb3b93b7eb0f2827125cb699869470432cc885f019b8fd0fccff77"}, + {file = "gitpython-3.1.45.tar.gz", hash = "sha256:85b0ee964ceddf211c41b9f27a49086010a190fd8132a24e21f362a4b36a791c"}, +] + +[[package]] +name = "google-auth" +version = "2.41.1" +requires_python = ">=3.7" +summary = "Google Authentication Library" +groups = ["default"] +dependencies = [ + "cachetools<7.0,>=2.0.0", + "pyasn1-modules>=0.2.1", + "rsa<5,>=3.1.4", +] +files = [ + {file = "google_auth-2.41.1-py2.py3-none-any.whl", hash = "sha256:754843be95575b9a19c604a848a41be03f7f2afd8c019f716dc1f51ee41c639d"}, + {file = "google_auth-2.41.1.tar.gz", hash = "sha256:b76b7b1f9e61f0cb7e88870d14f6a94aeef248959ef6992670efee37709cbfd2"}, +] + +[[package]] +name = "hf-xet" +version = "1.1.10" +requires_python = ">=3.8" +summary = "Fast transfer of large files with the Hugging Face Hub." +groups = ["default"] +marker = "platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\"" +files = [ + {file = "hf_xet-1.1.10-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:686083aca1a6669bc85c21c0563551cbcdaa5cf7876a91f3d074a030b577231d"}, + {file = "hf_xet-1.1.10-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:71081925383b66b24eedff3013f8e6bbd41215c3338be4b94ba75fd75b21513b"}, + {file = "hf_xet-1.1.10-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6b6bceb6361c80c1cc42b5a7b4e3efd90e64630bcf11224dcac50ef30a47e435"}, + {file = "hf_xet-1.1.10-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:eae7c1fc8a664e54753ffc235e11427ca61f4b0477d757cc4eb9ae374b69f09c"}, + {file = "hf_xet-1.1.10-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0a0005fd08f002180f7a12d4e13b22be277725bc23ed0529f8add5c7a6309c06"}, + {file = "hf_xet-1.1.10-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:f900481cf6e362a6c549c61ff77468bd59d6dd082f3170a36acfef2eb6a6793f"}, + {file = "hf_xet-1.1.10-cp37-abi3-win_amd64.whl", hash = "sha256:5f54b19cc347c13235ae7ee98b330c26dd65ef1df47e5316ffb1e87713ca7045"}, + {file = "hf_xet-1.1.10.tar.gz", hash = "sha256:408aef343800a2102374a883f283ff29068055c111f003ff840733d3b715bb97"}, +] + +[[package]] +name = "huggingface-hub" +version = "0.34.4" +requires_python = ">=3.8.0" +summary = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" +groups = ["default"] +dependencies = [ + "filelock", + "fsspec>=2023.5.0", + "hf-xet<2.0.0,>=1.1.3; platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"arm64\" or platform_machine == \"aarch64\"", + "packaging>=20.9", + "pyyaml>=5.1", + "requests", + "tqdm>=4.42.1", + "typing-extensions>=3.7.4.3", +] +files = [ + {file = "huggingface_hub-0.34.4-py3-none-any.whl", hash = "sha256:9b365d781739c93ff90c359844221beef048403f1bc1f1c123c191257c3c890a"}, + {file = "huggingface_hub-0.34.4.tar.gz", hash = "sha256:a4228daa6fb001be3f4f4bdaf9a0db00e1739235702848df00885c9b5742c85c"}, +] + +[[package]] +name = "identify" +version = "2.6.15" +requires_python = ">=3.9" +summary = "File identification library for Python" +groups = ["dev"] +files = [ + {file = "identify-2.6.15-py2.py3-none-any.whl", hash = "sha256:1181ef7608e00704db228516541eb83a88a9f94433a8c80bb9b5bd54b1d81757"}, + {file = "identify-2.6.15.tar.gz", hash = "sha256:e4f4864b96c6557ef2a1e1c951771838f4edc9df3a72ec7118b338801b11c7bf"}, +] + +[[package]] +name = "idna" +version = "3.10" +requires_python = ">=3.6" +summary = "Internationalized Domain Names in Applications (IDNA)" +groups = ["default"] +files = [ + {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, + {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, +] + +[[package]] +name = "iniconfig" +version = "2.1.0" +requires_python = ">=3.8" +summary = "brain-dead simple config-ini parsing" +groups = ["dev"] +files = [ + {file = "iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760"}, + {file = "iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7"}, +] + +[[package]] +name = "jinja2" +version = "3.1.6" +requires_python = ">=3.7" +summary = "A very fast and expressive template engine." +groups = ["default"] +dependencies = [ + "MarkupSafe>=2.0", +] +files = [ + {file = "jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67"}, + {file = "jinja2-3.1.6.tar.gz", hash = "sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d"}, +] + +[[package]] +name = "jsonschema" +version = "4.25.1" +requires_python = ">=3.9" +summary = "An implementation of JSON Schema validation for Python" +groups = ["default"] +dependencies = [ + "attrs>=22.2.0", + "jsonschema-specifications>=2023.03.6", + "referencing>=0.28.4", + "rpds-py>=0.7.1", +] +files = [ + {file = "jsonschema-4.25.1-py3-none-any.whl", hash = "sha256:3fba0169e345c7175110351d456342c364814cfcf3b964ba4587f22915230a63"}, + {file = "jsonschema-4.25.1.tar.gz", hash = "sha256:e4a9655ce0da0c0b67a085847e00a3a51449e1157f4f75e9fb5aa545e122eb85"}, +] + +[[package]] +name = "jsonschema-specifications" +version = "2025.9.1" +requires_python = ">=3.9" +summary = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" +groups = ["default"] +dependencies = [ + "referencing>=0.31.0", +] +files = [ + {file = "jsonschema_specifications-2025.9.1-py3-none-any.whl", hash = "sha256:98802fee3a11ee76ecaca44429fda8a41bff98b00a0f2838151b113f210cc6fe"}, + {file = "jsonschema_specifications-2025.9.1.tar.gz", hash = "sha256:b540987f239e745613c7a9176f3edb72b832a4ac465cf02712288397832b5e8d"}, +] + +[[package]] +name = "kiwisolver" +version = "1.4.9" +requires_python = ">=3.10" +summary = "A fast implementation of the Cassowary constraint solver" +groups = ["default"] +files = [ + {file = "kiwisolver-1.4.9-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:ac5a486ac389dddcc5bef4f365b6ae3ffff2c433324fb38dd35e3fab7c957999"}, + {file = "kiwisolver-1.4.9-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f2ba92255faa7309d06fe44c3a4a97efe1c8d640c2a79a5ef728b685762a6fd2"}, + {file = "kiwisolver-1.4.9-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:4a2899935e724dd1074cb568ce7ac0dce28b2cd6ab539c8e001a8578eb106d14"}, + {file = "kiwisolver-1.4.9-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f6008a4919fdbc0b0097089f67a1eb55d950ed7e90ce2cc3e640abadd2757a04"}, + {file = "kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:67bb8b474b4181770f926f7b7d2f8c0248cbcb78b660fdd41a47054b28d2a752"}, + {file = "kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2327a4a30d3ee07d2fbe2e7933e8a37c591663b96ce42a00bc67461a87d7df77"}, + {file = "kiwisolver-1.4.9-cp312-cp312-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7a08b491ec91b1d5053ac177afe5290adacf1f0f6307d771ccac5de30592d198"}, + {file = "kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d8fc5c867c22b828001b6a38d2eaeb88160bf5783c6cb4a5e440efc981ce286d"}, + {file = "kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:3b3115b2581ea35bb6d1f24a4c90af37e5d9b49dcff267eeed14c3893c5b86ab"}, + {file = "kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:858e4c22fb075920b96a291928cb7dea5644e94c0ee4fcd5af7e865655e4ccf2"}, + {file = "kiwisolver-1.4.9-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:ed0fecd28cc62c54b262e3736f8bb2512d8dcfdc2bcf08be5f47f96bf405b145"}, + {file = "kiwisolver-1.4.9-cp312-cp312-win_amd64.whl", hash = "sha256:f68208a520c3d86ea51acf688a3e3002615a7f0238002cccc17affecc86a8a54"}, + {file = "kiwisolver-1.4.9-cp312-cp312-win_arm64.whl", hash = "sha256:2c1a4f57df73965f3f14df20b80ee29e6a7930a57d2d9e8491a25f676e197c60"}, + {file = "kiwisolver-1.4.9-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:a5d0432ccf1c7ab14f9949eec60c5d1f924f17c037e9f8b33352fa05799359b8"}, + {file = "kiwisolver-1.4.9-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efb3a45b35622bb6c16dbfab491a8f5a391fe0e9d45ef32f4df85658232ca0e2"}, + {file = "kiwisolver-1.4.9-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1a12cf6398e8a0a001a059747a1cbf24705e18fe413bc22de7b3d15c67cffe3f"}, + {file = "kiwisolver-1.4.9-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b67e6efbf68e077dd71d1a6b37e43e1a99d0bff1a3d51867d45ee8908b931098"}, + {file = "kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5656aa670507437af0207645273ccdfee4f14bacd7f7c67a4306d0dcaeaf6eed"}, + {file = "kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:bfc08add558155345129c7803b3671cf195e6a56e7a12f3dde7c57d9b417f525"}, + {file = "kiwisolver-1.4.9-cp313-cp313-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:40092754720b174e6ccf9e845d0d8c7d8e12c3d71e7fc35f55f3813e96376f78"}, + {file = "kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:497d05f29a1300d14e02e6441cf0f5ee81c1ff5a304b0d9fb77423974684e08b"}, + {file = "kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:bdd1a81a1860476eb41ac4bc1e07b3f07259e6d55bbf739b79c8aaedcf512799"}, + {file = "kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:e6b93f13371d341afee3be9f7c5964e3fe61d5fa30f6a30eb49856935dfe4fc3"}, + {file = "kiwisolver-1.4.9-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d75aa530ccfaa593da12834b86a0724f58bff12706659baa9227c2ccaa06264c"}, + {file = "kiwisolver-1.4.9-cp313-cp313-win_amd64.whl", hash = "sha256:dd0a578400839256df88c16abddf9ba14813ec5f21362e1fe65022e00c883d4d"}, + {file = "kiwisolver-1.4.9-cp313-cp313-win_arm64.whl", hash = "sha256:d4188e73af84ca82468f09cadc5ac4db578109e52acb4518d8154698d3a87ca2"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:5a0f2724dfd4e3b3ac5a82436a8e6fd16baa7d507117e4279b660fe8ca38a3a1"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:1b11d6a633e4ed84fc0ddafd4ebfd8ea49b3f25082c04ad12b8315c11d504dc1"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61874cdb0a36016354853593cffc38e56fc9ca5aa97d2c05d3dcf6922cd55a11"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:60c439763a969a6af93b4881db0eed8fadf93ee98e18cbc35bc8da868d0c4f0c"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:92a2f997387a1b79a75e7803aa7ded2cfbe2823852ccf1ba3bcf613b62ae3197"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a31d512c812daea6d8b3be3b2bfcbeb091dbb09177706569bcfc6240dcf8b41c"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:52a15b0f35dad39862d376df10c5230155243a2c1a436e39eb55623ccbd68185"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a30fd6fdef1430fd9e1ba7b3398b5ee4e2887783917a687d86ba69985fb08748"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:cc9617b46837c6468197b5945e196ee9ca43057bb7d9d1ae688101e4e1dddf64"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:0ab74e19f6a2b027ea4f845a78827969af45ce790e6cb3e1ebab71bdf9f215ff"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dba5ee5d3981160c28d5490f0d1b7ed730c22470ff7f6cc26cfcfaacb9896a07"}, + {file = "kiwisolver-1.4.9-cp313-cp313t-win_arm64.whl", hash = "sha256:0749fd8f4218ad2e851e11cc4dc05c7cbc0cbc4267bdfdb31782e65aace4ee9c"}, + {file = "kiwisolver-1.4.9-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:9928fe1eb816d11ae170885a74d074f57af3a0d65777ca47e9aeb854a1fba386"}, + {file = "kiwisolver-1.4.9-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d0005b053977e7b43388ddec89fa567f43d4f6d5c2c0affe57de5ebf290dc552"}, + {file = "kiwisolver-1.4.9-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:2635d352d67458b66fd0667c14cb1d4145e9560d503219034a18a87e971ce4f3"}, + {file = "kiwisolver-1.4.9-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:767c23ad1c58c9e827b649a9ab7809fd5fd9db266a9cf02b0e926ddc2c680d58"}, + {file = "kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:72d0eb9fba308b8311685c2268cf7d0a0639a6cd027d8128659f72bdd8a024b4"}, + {file = "kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f68e4f3eeca8fb22cc3d731f9715a13b652795ef657a13df1ad0c7dc0e9731df"}, + {file = "kiwisolver-1.4.9-cp314-cp314-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d84cd4061ae292d8ac367b2c3fa3aad11cb8625a95d135fe93f286f914f3f5a6"}, + {file = "kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:a60ea74330b91bd22a29638940d115df9dc00af5035a9a2a6ad9399ffb4ceca5"}, + {file = "kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:ce6a3a4e106cf35c2d9c4fa17c05ce0b180db622736845d4315519397a77beaf"}, + {file = "kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:77937e5e2a38a7b48eef0585114fe7930346993a88060d0bf886086d2aa49ef5"}, + {file = "kiwisolver-1.4.9-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:24c175051354f4a28c5d6a31c93906dc653e2bf234e8a4bbfb964892078898ce"}, + {file = "kiwisolver-1.4.9-cp314-cp314-win_amd64.whl", hash = "sha256:0763515d4df10edf6d06a3c19734e2566368980d21ebec439f33f9eb936c07b7"}, + {file = "kiwisolver-1.4.9-cp314-cp314-win_arm64.whl", hash = "sha256:0e4e2bf29574a6a7b7f6cb5fa69293b9f96c928949ac4a53ba3f525dffb87f9c"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:d976bbb382b202f71c67f77b0ac11244021cfa3f7dfd9e562eefcea2df711548"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2489e4e5d7ef9a1c300a5e0196e43d9c739f066ef23270607d45aba368b91f2d"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:e2ea9f7ab7fbf18fffb1b5434ce7c69a07582f7acc7717720f1d69f3e806f90c"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:b34e51affded8faee0dfdb705416153819d8ea9250bbbf7ea1b249bdeb5f1122"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d8aacd3d4b33b772542b2e01beb50187536967b514b00003bdda7589722d2a64"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7cf974dd4e35fa315563ac99d6287a1024e4dc2077b8a7d7cd3d2fb65d283134"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:85bd218b5ecfbee8c8a82e121802dcb519a86044c9c3b2e4aef02fa05c6da370"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:0856e241c2d3df4efef7c04a1e46b1936b6120c9bcf36dd216e3acd84bc4fb21"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:9af39d6551f97d31a4deebeac6f45b156f9755ddc59c07b402c148f5dbb6482a"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:bb4ae2b57fc1d8cbd1cf7b1d9913803681ffa903e7488012be5b76dedf49297f"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:aedff62918805fb62d43a4aa2ecd4482c380dc76cd31bd7c8878588a61bd0369"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-win_amd64.whl", hash = "sha256:1fa333e8b2ce4d9660f2cda9c0e1b6bafcfb2457a9d259faa82289e73ec24891"}, + {file = "kiwisolver-1.4.9-cp314-cp314t-win_arm64.whl", hash = "sha256:4a48a2ce79d65d363597ef7b567ce3d14d68783d2b2263d98db3d9477805ba32"}, + {file = "kiwisolver-1.4.9.tar.gz", hash = "sha256:c3b22c26c6fd6811b0ae8363b95ca8ce4ea3c202d3d0975b2914310ceb1bcc4d"}, +] + +[[package]] +name = "kubernetes" +version = "34.1.0" +requires_python = ">=3.6" +summary = "Kubernetes python client" +groups = ["default"] +dependencies = [ + "certifi>=14.05.14", + "durationpy>=0.7", + "google-auth>=1.0.1", + "python-dateutil>=2.5.3", + "pyyaml>=5.4.1", + "requests", + "requests-oauthlib", + "six>=1.9.0", + "urllib3<2.4.0,>=1.24.2", + "websocket-client!=0.40.0,!=0.41.*,!=0.42.*,>=0.32.0", +] +files = [ + {file = "kubernetes-34.1.0-py2.py3-none-any.whl", hash = "sha256:bffba2272534e224e6a7a74d582deb0b545b7c9879d2cd9e4aae9481d1f2cc2a"}, + {file = "kubernetes-34.1.0.tar.gz", hash = "sha256:8fe8edb0b5d290a2f3ac06596b23f87c658977d46b5f8df9d0f4ea83d0003912"}, +] + +[[package]] +name = "kubernetes-asyncio" +version = "33.3.0" +summary = "Kubernetes asynchronous python client" +groups = ["default"] +dependencies = [ + "aiohttp<4.0.0,>=3.9.0", + "certifi>=14.05.14", + "python-dateutil>=2.5.3", + "pyyaml>=3.12", + "six>=1.9.0", + "urllib3>=1.24.2", +] +files = [ + {file = "kubernetes_asyncio-33.3.0-py3-none-any.whl", hash = "sha256:25e6e265932ebb1aeecbdb30a107dbef3ee0bcd388ed12d092be70915733982b"}, + {file = "kubernetes_asyncio-33.3.0.tar.gz", hash = "sha256:4c59cd4c99b197995ef38ef0c8ff45aab24b84830ebf0ddcb67355caea9674c9"}, +] + +[[package]] +name = "markupsafe" +version = "3.0.3" +requires_python = ">=3.9" +summary = "Safely add untrusted strings to HTML/XML markup." +groups = ["default"] +files = [ + {file = "markupsafe-3.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d53197da72cc091b024dd97249dfc7794d6a56530370992a5e1a08983ad9230e"}, + {file = "markupsafe-3.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1872df69a4de6aead3491198eaf13810b565bdbeec3ae2dc8780f14458ec73ce"}, + {file = "markupsafe-3.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a7e8ae81ae39e62a41ec302f972ba6ae23a5c5396c8e60113e9066ef893da0d"}, + {file = "markupsafe-3.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d6dd0be5b5b189d31db7cda48b91d7e0a9795f31430b7f271219ab30f1d3ac9d"}, + {file = "markupsafe-3.0.3-cp312-cp312-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:94c6f0bb423f739146aec64595853541634bde58b2135f27f61c1ffd1cd4d16a"}, + {file = "markupsafe-3.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:be8813b57049a7dc738189df53d69395eba14fb99345e0a5994914a3864c8a4b"}, + {file = "markupsafe-3.0.3-cp312-cp312-musllinux_1_2_riscv64.whl", hash = "sha256:83891d0e9fb81a825d9a6d61e3f07550ca70a076484292a70fde82c4b807286f"}, + {file = "markupsafe-3.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:77f0643abe7495da77fb436f50f8dab76dbc6e5fd25d39589a0f1fe6548bfa2b"}, + {file = "markupsafe-3.0.3-cp312-cp312-win32.whl", hash = "sha256:d88b440e37a16e651bda4c7c2b930eb586fd15ca7406cb39e211fcff3bf3017d"}, + {file = "markupsafe-3.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:26a5784ded40c9e318cfc2bdb30fe164bdb8665ded9cd64d500a34fb42067b1c"}, + {file = "markupsafe-3.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:35add3b638a5d900e807944a078b51922212fb3dedb01633a8defc4b01a3c85f"}, + {file = "markupsafe-3.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e1cf1972137e83c5d4c136c43ced9ac51d0e124706ee1c8aa8532c1287fa8795"}, + {file = "markupsafe-3.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:116bb52f642a37c115f517494ea5feb03889e04df47eeff5b130b1808ce7c219"}, + {file = "markupsafe-3.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:133a43e73a802c5562be9bbcd03d090aa5a1fe899db609c29e8c8d815c5f6de6"}, + {file = "markupsafe-3.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ccfcd093f13f0f0b7fdd0f198b90053bf7b2f02a3927a30e63f3ccc9df56b676"}, + {file = "markupsafe-3.0.3-cp313-cp313-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:509fa21c6deb7a7a273d629cf5ec029bc209d1a51178615ddf718f5918992ab9"}, + {file = "markupsafe-3.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a4afe79fb3de0b7097d81da19090f4df4f8d3a2b3adaa8764138aac2e44f3af1"}, + {file = "markupsafe-3.0.3-cp313-cp313-musllinux_1_2_riscv64.whl", hash = "sha256:795e7751525cae078558e679d646ae45574b47ed6e7771863fcc079a6171a0fc"}, + {file = "markupsafe-3.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8485f406a96febb5140bfeca44a73e3ce5116b2501ac54fe953e488fb1d03b12"}, + {file = "markupsafe-3.0.3-cp313-cp313-win32.whl", hash = "sha256:bdd37121970bfd8be76c5fb069c7751683bdf373db1ed6c010162b2a130248ed"}, + {file = "markupsafe-3.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:9a1abfdc021a164803f4d485104931fb8f8c1efd55bc6b748d2f5774e78b62c5"}, + {file = "markupsafe-3.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:7e68f88e5b8799aa49c85cd116c932a1ac15caaa3f5db09087854d218359e485"}, + {file = "markupsafe-3.0.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:218551f6df4868a8d527e3062d0fb968682fe92054e89978594c28e642c43a73"}, + {file = "markupsafe-3.0.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3524b778fe5cfb3452a09d31e7b5adefeea8c5be1d43c4f810ba09f2ceb29d37"}, + {file = "markupsafe-3.0.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4e885a3d1efa2eadc93c894a21770e4bc67899e3543680313b09f139e149ab19"}, + {file = "markupsafe-3.0.3-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:8709b08f4a89aa7586de0aadc8da56180242ee0ada3999749b183aa23df95025"}, + {file = "markupsafe-3.0.3-cp313-cp313t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:b8512a91625c9b3da6f127803b166b629725e68af71f8184ae7e7d54686a56d6"}, + {file = "markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:9b79b7a16f7fedff2495d684f2b59b0457c3b493778c9eed31111be64d58279f"}, + {file = "markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_riscv64.whl", hash = "sha256:12c63dfb4a98206f045aa9563db46507995f7ef6d83b2f68eda65c307c6829eb"}, + {file = "markupsafe-3.0.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8f71bc33915be5186016f675cd83a1e08523649b0e33efdb898db577ef5bb009"}, + {file = "markupsafe-3.0.3-cp313-cp313t-win32.whl", hash = "sha256:69c0b73548bc525c8cb9a251cddf1931d1db4d2258e9599c28c07ef3580ef354"}, + {file = "markupsafe-3.0.3-cp313-cp313t-win_amd64.whl", hash = "sha256:1b4b79e8ebf6b55351f0d91fe80f893b4743f104bff22e90697db1590e47a218"}, + {file = "markupsafe-3.0.3-cp313-cp313t-win_arm64.whl", hash = "sha256:ad2cf8aa28b8c020ab2fc8287b0f823d0a7d8630784c31e9ee5edea20f406287"}, + {file = "markupsafe-3.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:eaa9599de571d72e2daf60164784109f19978b327a3910d3e9de8c97b5b70cfe"}, + {file = "markupsafe-3.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:c47a551199eb8eb2121d4f0f15ae0f923d31350ab9280078d1e5f12b249e0026"}, + {file = "markupsafe-3.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f34c41761022dd093b4b6896d4810782ffbabe30f2d443ff5f083e0cbbb8c737"}, + {file = "markupsafe-3.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:457a69a9577064c05a97c41f4e65148652db078a3a509039e64d3467b9e7ef97"}, + {file = "markupsafe-3.0.3-cp314-cp314-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e8afc3f2ccfa24215f8cb28dcf43f0113ac3c37c2f0f0806d8c70e4228c5cf4d"}, + {file = "markupsafe-3.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:ec15a59cf5af7be74194f7ab02d0f59a62bdcf1a537677ce67a2537c9b87fcda"}, + {file = "markupsafe-3.0.3-cp314-cp314-musllinux_1_2_riscv64.whl", hash = "sha256:0eb9ff8191e8498cca014656ae6b8d61f39da5f95b488805da4bb029cccbfbaf"}, + {file = "markupsafe-3.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:2713baf880df847f2bece4230d4d094280f4e67b1e813eec43b4c0e144a34ffe"}, + {file = "markupsafe-3.0.3-cp314-cp314-win32.whl", hash = "sha256:729586769a26dbceff69f7a7dbbf59ab6572b99d94576a5592625d5b411576b9"}, + {file = "markupsafe-3.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:bdc919ead48f234740ad807933cdf545180bfbe9342c2bb451556db2ed958581"}, + {file = "markupsafe-3.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:5a7d5dc5140555cf21a6fefbdbf8723f06fcd2f63ef108f2854de715e4422cb4"}, + {file = "markupsafe-3.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:1353ef0c1b138e1907ae78e2f6c63ff67501122006b0f9abad68fda5f4ffc6ab"}, + {file = "markupsafe-3.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:1085e7fbddd3be5f89cc898938f42c0b3c711fdcb37d75221de2666af647c175"}, + {file = "markupsafe-3.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1b52b4fb9df4eb9ae465f8d0c228a00624de2334f216f178a995ccdcf82c4634"}, + {file = "markupsafe-3.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:fed51ac40f757d41b7c48425901843666a6677e3e8eb0abcff09e4ba6e664f50"}, + {file = "markupsafe-3.0.3-cp314-cp314t-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f190daf01f13c72eac4efd5c430a8de82489d9cff23c364c3ea822545032993e"}, + {file = "markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:e56b7d45a839a697b5eb268c82a71bd8c7f6c94d6fd50c3d577fa39a9f1409f5"}, + {file = "markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_riscv64.whl", hash = "sha256:f3e98bb3798ead92273dc0e5fd0f31ade220f59a266ffd8a4f6065e0a3ce0523"}, + {file = "markupsafe-3.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5678211cb9333a6468fb8d8be0305520aa073f50d17f089b5b4b477ea6e67fdc"}, + {file = "markupsafe-3.0.3-cp314-cp314t-win32.whl", hash = "sha256:915c04ba3851909ce68ccc2b8e2cd691618c4dc4c4232fb7982bca3f41fd8c3d"}, + {file = "markupsafe-3.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4faffd047e07c38848ce017e8725090413cd80cbc23d86e55c587bf979e579c9"}, + {file = "markupsafe-3.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:32001d6a8fc98c8cb5c947787c5d08b0a50663d139f1305bac5885d98d9b40fa"}, + {file = "markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698"}, +] + +[[package]] +name = "matplotlib" +version = "3.10.6" +requires_python = ">=3.10" +summary = "Python plotting package" +groups = ["default"] +dependencies = [ + "contourpy>=1.0.1", + "cycler>=0.10", + "fonttools>=4.22.0", + "kiwisolver>=1.3.1", + "numpy>=1.23", + "packaging>=20.0", + "pillow>=8", + "pyparsing>=2.3.1", + "python-dateutil>=2.7", +] +files = [ + {file = "matplotlib-3.10.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:31ca662df6a80bd426f871105fdd69db7543e28e73a9f2afe80de7e531eb2347"}, + {file = "matplotlib-3.10.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1678bb61d897bb4ac4757b5ecfb02bfb3fddf7f808000fb81e09c510712fda75"}, + {file = "matplotlib-3.10.6-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:56cd2d20842f58c03d2d6e6c1f1cf5548ad6f66b91e1e48f814e4fb5abd1cb95"}, + {file = "matplotlib-3.10.6-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:662df55604a2f9a45435566d6e2660e41efe83cd94f4288dfbf1e6d1eae4b0bb"}, + {file = "matplotlib-3.10.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:08f141d55148cd1fc870c3387d70ca4df16dee10e909b3b038782bd4bda6ea07"}, + {file = "matplotlib-3.10.6-cp312-cp312-win_amd64.whl", hash = "sha256:590f5925c2d650b5c9d813c5b3b5fc53f2929c3f8ef463e4ecfa7e052044fb2b"}, + {file = "matplotlib-3.10.6-cp312-cp312-win_arm64.whl", hash = "sha256:f44c8d264a71609c79a78d50349e724f5d5fc3684ead7c2a473665ee63d868aa"}, + {file = "matplotlib-3.10.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:819e409653c1106c8deaf62e6de6b8611449c2cd9939acb0d7d4e57a3d95cc7a"}, + {file = "matplotlib-3.10.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:59c8ac8382fefb9cb71308dde16a7c487432f5255d8f1fd32473523abecfecdf"}, + {file = "matplotlib-3.10.6-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:84e82d9e0fd70c70bc55739defbd8055c54300750cbacf4740c9673a24d6933a"}, + {file = "matplotlib-3.10.6-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:25f7a3eb42d6c1c56e89eacd495661fc815ffc08d9da750bca766771c0fd9110"}, + {file = "matplotlib-3.10.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f9c862d91ec0b7842920a4cfdaaec29662195301914ea54c33e01f1a28d014b2"}, + {file = "matplotlib-3.10.6-cp313-cp313-win_amd64.whl", hash = "sha256:1b53bd6337eba483e2e7d29c5ab10eee644bc3a2491ec67cc55f7b44583ffb18"}, + {file = "matplotlib-3.10.6-cp313-cp313-win_arm64.whl", hash = "sha256:cbd5eb50b7058b2892ce45c2f4e92557f395c9991f5c886d1bb74a1582e70fd6"}, + {file = "matplotlib-3.10.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:acc86dd6e0e695c095001a7fccff158c49e45e0758fdf5dcdbb0103318b59c9f"}, + {file = "matplotlib-3.10.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:e228cd2ffb8f88b7d0b29e37f68ca9aaf83e33821f24a5ccc4f082dd8396bc27"}, + {file = "matplotlib-3.10.6-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:658bc91894adeab669cf4bb4a186d049948262987e80f0857216387d7435d833"}, + {file = "matplotlib-3.10.6-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8913b7474f6dd83ac444c9459c91f7f0f2859e839f41d642691b104e0af056aa"}, + {file = "matplotlib-3.10.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:091cea22e059b89f6d7d1a18e2c33a7376c26eee60e401d92a4d6726c4e12706"}, + {file = "matplotlib-3.10.6-cp313-cp313t-win_amd64.whl", hash = "sha256:491e25e02a23d7207629d942c666924a6b61e007a48177fdd231a0097b7f507e"}, + {file = "matplotlib-3.10.6-cp313-cp313t-win_arm64.whl", hash = "sha256:3d80d60d4e54cda462e2cd9a086d85cd9f20943ead92f575ce86885a43a565d5"}, + {file = "matplotlib-3.10.6-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:70aaf890ce1d0efd482df969b28a5b30ea0b891224bb315810a3940f67182899"}, + {file = "matplotlib-3.10.6-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1565aae810ab79cb72e402b22facfa6501365e73ebab70a0fdfb98488d2c3c0c"}, + {file = "matplotlib-3.10.6-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:f3b23315a01981689aa4e1a179dbf6ef9fbd17143c3eea77548c2ecfb0499438"}, + {file = "matplotlib-3.10.6-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:30fdd37edf41a4e6785f9b37969de57aea770696cb637d9946eb37470c94a453"}, + {file = "matplotlib-3.10.6-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:bc31e693da1c08012c764b053e702c1855378e04102238e6a5ee6a7117c53a47"}, + {file = "matplotlib-3.10.6-cp314-cp314-win_amd64.whl", hash = "sha256:05be9bdaa8b242bc6ff96330d18c52f1fc59c6fb3a4dd411d953d67e7e1baf98"}, + {file = "matplotlib-3.10.6-cp314-cp314-win_arm64.whl", hash = "sha256:f56a0d1ab05d34c628592435781d185cd99630bdfd76822cd686fb5a0aecd43a"}, + {file = "matplotlib-3.10.6-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:94f0b4cacb23763b64b5dace50d5b7bfe98710fed5f0cef5c08135a03399d98b"}, + {file = "matplotlib-3.10.6-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cc332891306b9fb39462673d8225d1b824c89783fee82840a709f96714f17a5c"}, + {file = "matplotlib-3.10.6-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee1d607b3fb1590deb04b69f02ea1d53ed0b0bf75b2b1a5745f269afcbd3cdd3"}, + {file = "matplotlib-3.10.6-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:376a624a218116461696b27b2bbf7a8945053e6d799f6502fc03226d077807bf"}, + {file = "matplotlib-3.10.6-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:83847b47f6524c34b4f2d3ce726bb0541c48c8e7692729865c3df75bfa0f495a"}, + {file = "matplotlib-3.10.6-cp314-cp314t-win_amd64.whl", hash = "sha256:c7e0518e0d223683532a07f4b512e2e0729b62674f1b3a1a69869f98e6b1c7e3"}, + {file = "matplotlib-3.10.6-cp314-cp314t-win_arm64.whl", hash = "sha256:4dd83e029f5b4801eeb87c64efd80e732452781c16a9cf7415b7b63ec8f374d7"}, + {file = "matplotlib-3.10.6.tar.gz", hash = "sha256:ec01b645840dd1996df21ee37f208cd8ba57644779fa20464010638013d3203c"}, +] + +[[package]] +name = "multidict" +version = "6.6.4" +requires_python = ">=3.9" +summary = "multidict implementation" +groups = ["default"] +dependencies = [ + "typing-extensions>=4.1.0; python_version < \"3.11\"", +] +files = [ + {file = "multidict-6.6.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:0ffb87be160942d56d7b87b0fdf098e81ed565add09eaa1294268c7f3caac4c8"}, + {file = "multidict-6.6.4-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d191de6cbab2aff5de6c5723101705fd044b3e4c7cfd587a1929b5028b9714b3"}, + {file = "multidict-6.6.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:38a0956dd92d918ad5feff3db8fcb4a5eb7dba114da917e1a88475619781b57b"}, + {file = "multidict-6.6.4-cp312-cp312-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:6865f6d3b7900ae020b495d599fcf3765653bc927951c1abb959017f81ae8287"}, + {file = "multidict-6.6.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0a2088c126b6f72db6c9212ad827d0ba088c01d951cee25e758c450da732c138"}, + {file = "multidict-6.6.4-cp312-cp312-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:0f37bed7319b848097085d7d48116f545985db988e2256b2e6f00563a3416ee6"}, + {file = "multidict-6.6.4-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:01368e3c94032ba6ca0b78e7ccb099643466cf24f8dc8eefcfdc0571d56e58f9"}, + {file = "multidict-6.6.4-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:8fe323540c255db0bffee79ad7f048c909f2ab0edb87a597e1c17da6a54e493c"}, + {file = "multidict-6.6.4-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b8eb3025f17b0a4c3cd08cda49acf312a19ad6e8a4edd9dbd591e6506d999402"}, + {file = "multidict-6.6.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:bbc14f0365534d35a06970d6a83478b249752e922d662dc24d489af1aa0d1be7"}, + {file = "multidict-6.6.4-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:75aa52fba2d96bf972e85451b99d8e19cc37ce26fd016f6d4aa60da9ab2b005f"}, + {file = "multidict-6.6.4-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4fefd4a815e362d4f011919d97d7b4a1e566f1dde83dc4ad8cfb5b41de1df68d"}, + {file = "multidict-6.6.4-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:db9801fe021f59a5b375ab778973127ca0ac52429a26e2fd86aa9508f4d26eb7"}, + {file = "multidict-6.6.4-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:a650629970fa21ac1fb06ba25dabfc5b8a2054fcbf6ae97c758aa956b8dba802"}, + {file = "multidict-6.6.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:452ff5da78d4720d7516a3a2abd804957532dd69296cb77319c193e3ffb87e24"}, + {file = "multidict-6.6.4-cp312-cp312-win32.whl", hash = "sha256:8c2fcb12136530ed19572bbba61b407f655e3953ba669b96a35036a11a485793"}, + {file = "multidict-6.6.4-cp312-cp312-win_amd64.whl", hash = "sha256:047d9425860a8c9544fed1b9584f0c8bcd31bcde9568b047c5e567a1025ecd6e"}, + {file = "multidict-6.6.4-cp312-cp312-win_arm64.whl", hash = "sha256:14754eb72feaa1e8ae528468f24250dd997b8e2188c3d2f593f9eba259e4b364"}, + {file = "multidict-6.6.4-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f46a6e8597f9bd71b31cc708195d42b634c8527fecbcf93febf1052cacc1f16e"}, + {file = "multidict-6.6.4-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:22e38b2bc176c5eb9c0a0e379f9d188ae4cd8b28c0f53b52bce7ab0a9e534657"}, + {file = "multidict-6.6.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5df8afd26f162da59e218ac0eefaa01b01b2e6cd606cffa46608f699539246da"}, + {file = "multidict-6.6.4-cp313-cp313-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:49517449b58d043023720aa58e62b2f74ce9b28f740a0b5d33971149553d72aa"}, + {file = "multidict-6.6.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ae9408439537c5afdca05edd128a63f56a62680f4b3c234301055d7a2000220f"}, + {file = "multidict-6.6.4-cp313-cp313-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:87a32d20759dc52a9e850fe1061b6e41ab28e2998d44168a8a341b99ded1dba0"}, + {file = "multidict-6.6.4-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:52e3c8d43cdfff587ceedce9deb25e6ae77daba560b626e97a56ddcad3756879"}, + {file = "multidict-6.6.4-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:ad8850921d3a8d8ff6fbef790e773cecfc260bbfa0566998980d3fa8f520bc4a"}, + {file = "multidict-6.6.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:497a2954adc25c08daff36f795077f63ad33e13f19bfff7736e72c785391534f"}, + {file = "multidict-6.6.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:024ce601f92d780ca1617ad4be5ac15b501cc2414970ffa2bb2bbc2bd5a68fa5"}, + {file = "multidict-6.6.4-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:a693fc5ed9bdd1c9e898013e0da4dcc640de7963a371c0bd458e50e046bf6438"}, + {file = "multidict-6.6.4-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:190766dac95aab54cae5b152a56520fd99298f32a1266d66d27fdd1b5ac00f4e"}, + {file = "multidict-6.6.4-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:34d8f2a5ffdceab9dcd97c7a016deb2308531d5f0fced2bb0c9e1df45b3363d7"}, + {file = "multidict-6.6.4-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:59e8d40ab1f5a8597abcef00d04845155a5693b5da00d2c93dbe88f2050f2812"}, + {file = "multidict-6.6.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:467fe64138cfac771f0e949b938c2e1ada2b5af22f39692aa9258715e9ea613a"}, + {file = "multidict-6.6.4-cp313-cp313-win32.whl", hash = "sha256:14616a30fe6d0a48d0a48d1a633ab3b8bec4cf293aac65f32ed116f620adfd69"}, + {file = "multidict-6.6.4-cp313-cp313-win_amd64.whl", hash = "sha256:40cd05eaeb39e2bc8939451f033e57feaa2ac99e07dbca8afe2be450a4a3b6cf"}, + {file = "multidict-6.6.4-cp313-cp313-win_arm64.whl", hash = "sha256:f6eb37d511bfae9e13e82cb4d1af36b91150466f24d9b2b8a9785816deb16605"}, + {file = "multidict-6.6.4-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:6c84378acd4f37d1b507dfa0d459b449e2321b3ba5f2338f9b085cf7a7ba95eb"}, + {file = "multidict-6.6.4-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0e0558693063c75f3d952abf645c78f3c5dfdd825a41d8c4d8156fc0b0da6e7e"}, + {file = "multidict-6.6.4-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:3f8e2384cb83ebd23fd07e9eada8ba64afc4c759cd94817433ab8c81ee4b403f"}, + {file = "multidict-6.6.4-cp313-cp313t-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:f996b87b420995a9174b2a7c1a8daf7db4750be6848b03eb5e639674f7963773"}, + {file = "multidict-6.6.4-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cc356250cffd6e78416cf5b40dc6a74f1edf3be8e834cf8862d9ed5265cf9b0e"}, + {file = "multidict-6.6.4-cp313-cp313t-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:dadf95aa862714ea468a49ad1e09fe00fcc9ec67d122f6596a8d40caf6cec7d0"}, + {file = "multidict-6.6.4-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7dd57515bebffd8ebd714d101d4c434063322e4fe24042e90ced41f18b6d3395"}, + {file = "multidict-6.6.4-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:967af5f238ebc2eb1da4e77af5492219fbd9b4b812347da39a7b5f5c72c0fa45"}, + {file = "multidict-6.6.4-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2a4c6875c37aae9794308ec43e3530e4aa0d36579ce38d89979bbf89582002bb"}, + {file = "multidict-6.6.4-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:7f683a551e92bdb7fac545b9c6f9fa2aebdeefa61d607510b3533286fcab67f5"}, + {file = "multidict-6.6.4-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:3ba5aaf600edaf2a868a391779f7a85d93bed147854925f34edd24cc70a3e141"}, + {file = "multidict-6.6.4-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:580b643b7fd2c295d83cad90d78419081f53fd532d1f1eb67ceb7060f61cff0d"}, + {file = "multidict-6.6.4-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:37b7187197da6af3ee0b044dbc9625afd0c885f2800815b228a0e70f9a7f473d"}, + {file = "multidict-6.6.4-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e1b93790ed0bc26feb72e2f08299691ceb6da5e9e14a0d13cc74f1869af327a0"}, + {file = "multidict-6.6.4-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:a506a77ddee1efcca81ecbeae27ade3e09cdf21a8ae854d766c2bb4f14053f92"}, + {file = "multidict-6.6.4-cp313-cp313t-win32.whl", hash = "sha256:f93b2b2279883d1d0a9e1bd01f312d6fc315c5e4c1f09e112e4736e2f650bc4e"}, + {file = "multidict-6.6.4-cp313-cp313t-win_amd64.whl", hash = "sha256:6d46a180acdf6e87cc41dc15d8f5c2986e1e8739dc25dbb7dac826731ef381a4"}, + {file = "multidict-6.6.4-cp313-cp313t-win_arm64.whl", hash = "sha256:756989334015e3335d087a27331659820d53ba432befdef6a718398b0a8493ad"}, + {file = "multidict-6.6.4-py3-none-any.whl", hash = "sha256:27d8f8e125c07cb954e54d75d04905a9bba8a439c1d84aca94949d4d03d8601c"}, + {file = "multidict-6.6.4.tar.gz", hash = "sha256:d2d4e4787672911b48350df02ed3fa3fffdc2f2e8ca06dd6afdf34189b76a9dd"}, +] + +[[package]] +name = "narwhals" +version = "2.6.0" +requires_python = ">=3.9" +summary = "Extremely lightweight compatibility layer between dataframe libraries" +groups = ["default"] +files = [ + {file = "narwhals-2.6.0-py3-none-any.whl", hash = "sha256:3215ea42afb452c6c8527e79cefbe542b674aa08d7e2e99d46b2c9708870e0d4"}, + {file = "narwhals-2.6.0.tar.gz", hash = "sha256:5c9e2ba923e6a0051017e146184e49fb793548936f978ce130c9f55a9a81240e"}, +] + +[[package]] +name = "nodeenv" +version = "1.9.1" +requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +summary = "Node.js virtual environment builder" +groups = ["dev"] +files = [ + {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, + {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, +] + +[[package]] +name = "numpy" +version = "2.3.3" +requires_python = ">=3.11" +summary = "Fundamental package for array computing in Python" +groups = ["default"] +files = [ + {file = "numpy-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:cfdd09f9c84a1a934cde1eec2267f0a43a7cd44b2cca4ff95b7c0d14d144b0bf"}, + {file = "numpy-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:cb32e3cf0f762aee47ad1ddc6672988f7f27045b0783c887190545baba73aa25"}, + {file = "numpy-2.3.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:396b254daeb0a57b1fe0ecb5e3cff6fa79a380fa97c8f7781a6d08cd429418fe"}, + {file = "numpy-2.3.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:067e3d7159a5d8f8a0b46ee11148fc35ca9b21f61e3c49fbd0a027450e65a33b"}, + {file = "numpy-2.3.3-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1c02d0629d25d426585fb2e45a66154081b9fa677bc92a881ff1d216bc9919a8"}, + {file = "numpy-2.3.3-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d9192da52b9745f7f0766531dcfa978b7763916f158bb63bdb8a1eca0068ab20"}, + {file = "numpy-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:cd7de500a5b66319db419dc3c345244404a164beae0d0937283b907d8152e6ea"}, + {file = "numpy-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:93d4962d8f82af58f0b2eb85daaf1b3ca23fe0a85d0be8f1f2b7bb46034e56d7"}, + {file = "numpy-2.3.3-cp312-cp312-win32.whl", hash = "sha256:5534ed6b92f9b7dca6c0a19d6df12d41c68b991cef051d108f6dbff3babc4ebf"}, + {file = "numpy-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:497d7cad08e7092dba36e3d296fe4c97708c93daf26643a1ae4b03f6294d30eb"}, + {file = "numpy-2.3.3-cp312-cp312-win_arm64.whl", hash = "sha256:ca0309a18d4dfea6fc6262a66d06c26cfe4640c3926ceec90e57791a82b6eee5"}, + {file = "numpy-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f5415fb78995644253370985342cd03572ef8620b934da27d77377a2285955bf"}, + {file = "numpy-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d00de139a3324e26ed5b95870ce63be7ec7352171bc69a4cf1f157a48e3eb6b7"}, + {file = "numpy-2.3.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:9dc13c6a5829610cc07422bc74d3ac083bd8323f14e2827d992f9e52e22cd6a6"}, + {file = "numpy-2.3.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:d79715d95f1894771eb4e60fb23f065663b2298f7d22945d66877aadf33d00c7"}, + {file = "numpy-2.3.3-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:952cfd0748514ea7c3afc729a0fc639e61655ce4c55ab9acfab14bda4f402b4c"}, + {file = "numpy-2.3.3-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5b83648633d46f77039c29078751f80da65aa64d5622a3cd62aaef9d835b6c93"}, + {file = "numpy-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:b001bae8cea1c7dfdb2ae2b017ed0a6f2102d7a70059df1e338e307a4c78a8ae"}, + {file = "numpy-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8e9aced64054739037d42fb84c54dd38b81ee238816c948c8f3ed134665dcd86"}, + {file = "numpy-2.3.3-cp313-cp313-win32.whl", hash = "sha256:9591e1221db3f37751e6442850429b3aabf7026d3b05542d102944ca7f00c8a8"}, + {file = "numpy-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f0dadeb302887f07431910f67a14d57209ed91130be0adea2f9793f1a4f817cf"}, + {file = "numpy-2.3.3-cp313-cp313-win_arm64.whl", hash = "sha256:3c7cf302ac6e0b76a64c4aecf1a09e51abd9b01fc7feee80f6c43e3ab1b1dbc5"}, + {file = "numpy-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:eda59e44957d272846bb407aad19f89dc6f58fecf3504bd144f4c5cf81a7eacc"}, + {file = "numpy-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:823d04112bc85ef5c4fda73ba24e6096c8f869931405a80aa8b0e604510a26bc"}, + {file = "numpy-2.3.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:40051003e03db4041aa325da2a0971ba41cf65714e65d296397cc0e32de6018b"}, + {file = "numpy-2.3.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:6ee9086235dd6ab7ae75aba5662f582a81ced49f0f1c6de4260a78d8f2d91a19"}, + {file = "numpy-2.3.3-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:94fcaa68757c3e2e668ddadeaa86ab05499a70725811e582b6a9858dd472fb30"}, + {file = "numpy-2.3.3-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da1a74b90e7483d6ce5244053399a614b1d6b7bc30a60d2f570e5071f8959d3e"}, + {file = "numpy-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:2990adf06d1ecee3b3dcbb4977dfab6e9f09807598d647f04d385d29e7a3c3d3"}, + {file = "numpy-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ed635ff692483b8e3f0fcaa8e7eb8a75ee71aa6d975388224f70821421800cea"}, + {file = "numpy-2.3.3-cp313-cp313t-win32.whl", hash = "sha256:a333b4ed33d8dc2b373cc955ca57babc00cd6f9009991d9edc5ddbc1bac36bcd"}, + {file = "numpy-2.3.3-cp313-cp313t-win_amd64.whl", hash = "sha256:4384a169c4d8f97195980815d6fcad04933a7e1ab3b530921c3fef7a1c63426d"}, + {file = "numpy-2.3.3-cp313-cp313t-win_arm64.whl", hash = "sha256:75370986cc0bc66f4ce5110ad35aae6d182cc4ce6433c40ad151f53690130bf1"}, + {file = "numpy-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:cd052f1fa6a78dee696b58a914b7229ecfa41f0a6d96dc663c1220a55e137593"}, + {file = "numpy-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:414a97499480067d305fcac9716c29cf4d0d76db6ebf0bf3cbce666677f12652"}, + {file = "numpy-2.3.3-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:50a5fe69f135f88a2be9b6ca0481a68a136f6febe1916e4920e12f1a34e708a7"}, + {file = "numpy-2.3.3-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:b912f2ed2b67a129e6a601e9d93d4fa37bef67e54cac442a2f588a54afe5c67a"}, + {file = "numpy-2.3.3-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9e318ee0596d76d4cb3d78535dc005fa60e5ea348cd131a51e99d0bdbe0b54fe"}, + {file = "numpy-2.3.3-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ce020080e4a52426202bdb6f7691c65bb55e49f261f31a8f506c9f6bc7450421"}, + {file = "numpy-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:e6687dc183aa55dae4a705b35f9c0f8cb178bcaa2f029b241ac5356221d5c021"}, + {file = "numpy-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d8f3b1080782469fdc1718c4ed1d22549b5fb12af0d57d35e992158a772a37cf"}, + {file = "numpy-2.3.3-cp314-cp314-win32.whl", hash = "sha256:cb248499b0bc3be66ebd6578b83e5acacf1d6cb2a77f2248ce0e40fbec5a76d0"}, + {file = "numpy-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:691808c2b26b0f002a032c73255d0bd89751425f379f7bcd22d140db593a96e8"}, + {file = "numpy-2.3.3-cp314-cp314-win_arm64.whl", hash = "sha256:9ad12e976ca7b10f1774b03615a2a4bab8addce37ecc77394d8e986927dc0dfe"}, + {file = "numpy-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:9cc48e09feb11e1db00b320e9d30a4151f7369afb96bd0e48d942d09da3a0d00"}, + {file = "numpy-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:901bf6123879b7f251d3631967fd574690734236075082078e0571977c6a8e6a"}, + {file = "numpy-2.3.3-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:7f025652034199c301049296b59fa7d52c7e625017cae4c75d8662e377bf487d"}, + {file = "numpy-2.3.3-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:533ca5f6d325c80b6007d4d7fb1984c303553534191024ec6a524a4c92a5935a"}, + {file = "numpy-2.3.3-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0edd58682a399824633b66885d699d7de982800053acf20be1eaa46d92009c54"}, + {file = "numpy-2.3.3-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:367ad5d8fbec5d9296d18478804a530f1191e24ab4d75ab408346ae88045d25e"}, + {file = "numpy-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8f6ac61a217437946a1fa48d24c47c91a0c4f725237871117dea264982128097"}, + {file = "numpy-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:179a42101b845a816d464b6fe9a845dfaf308fdfc7925387195570789bb2c970"}, + {file = "numpy-2.3.3-cp314-cp314t-win32.whl", hash = "sha256:1250c5d3d2562ec4174bce2e3a1523041595f9b651065e4a4473f5f48a6bc8a5"}, + {file = "numpy-2.3.3-cp314-cp314t-win_amd64.whl", hash = "sha256:b37a0b2e5935409daebe82c1e42274d30d9dd355852529eab91dab8dcca7419f"}, + {file = "numpy-2.3.3-cp314-cp314t-win_arm64.whl", hash = "sha256:78c9f6560dc7e6b3990e32df7ea1a50bbd0e2a111e05209963f5ddcab7073b0b"}, + {file = "numpy-2.3.3.tar.gz", hash = "sha256:ddc7c39727ba62b80dfdbedf400d1c10ddfa8eefbd7ec8dcb118be8b56d31029"}, +] + +[[package]] +name = "oauthlib" +version = "3.3.1" +requires_python = ">=3.8" +summary = "A generic, spec-compliant, thorough implementation of the OAuth request-signing logic" +groups = ["default"] +files = [ + {file = "oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1"}, + {file = "oauthlib-3.3.1.tar.gz", hash = "sha256:0f0f8aa759826a193cf66c12ea1af1637f87b9b4622d46e866952bb022e538c9"}, +] + +[[package]] +name = "packaging" +version = "25.0" +requires_python = ">=3.8" +summary = "Core utilities for Python packages" +groups = ["default", "dev"] +files = [ + {file = "packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484"}, + {file = "packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f"}, +] + +[[package]] +name = "pandas" +version = "2.3.3" +requires_python = ">=3.9" +summary = "Powerful data structures for data analysis, time series, and statistics" +groups = ["default"] +dependencies = [ + "numpy>=1.22.4; python_version < \"3.11\"", + "numpy>=1.23.2; python_version == \"3.11\"", + "numpy>=1.26.0; python_version >= \"3.12\"", + "python-dateutil>=2.8.2", + "pytz>=2020.1", + "tzdata>=2022.7", +] +files = [ + {file = "pandas-2.3.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6d21f6d74eb1725c2efaa71a2bfc661a0689579b58e9c0ca58a739ff0b002b53"}, + {file = "pandas-2.3.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3fd2f887589c7aa868e02632612ba39acb0b8948faf5cc58f0850e165bd46f35"}, + {file = "pandas-2.3.3-cp312-cp312-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ecaf1e12bdc03c86ad4a7ea848d66c685cb6851d807a26aa245ca3d2017a1908"}, + {file = "pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b3d11d2fda7eb164ef27ffc14b4fcab16a80e1ce67e9f57e19ec0afaf715ba89"}, + {file = "pandas-2.3.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a68e15f780eddf2b07d242e17a04aa187a7ee12b40b930bfdd78070556550e98"}, + {file = "pandas-2.3.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:371a4ab48e950033bcf52b6527eccb564f52dc826c02afd9a1bc0ab731bba084"}, + {file = "pandas-2.3.3-cp312-cp312-win_amd64.whl", hash = "sha256:a16dcec078a01eeef8ee61bf64074b4e524a2a3f4b3be9326420cabe59c4778b"}, + {file = "pandas-2.3.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:56851a737e3470de7fa88e6131f41281ed440d29a9268dcbf0002da5ac366713"}, + {file = "pandas-2.3.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:bdcd9d1167f4885211e401b3036c0c8d9e274eee67ea8d0758a256d60704cfe8"}, + {file = "pandas-2.3.3-cp313-cp313-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e32e7cc9af0f1cc15548288a51a3b681cc2a219faa838e995f7dc53dbab1062d"}, + {file = "pandas-2.3.3-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:318d77e0e42a628c04dc56bcef4b40de67918f7041c2b061af1da41dcff670ac"}, + {file = "pandas-2.3.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:4e0a175408804d566144e170d0476b15d78458795bb18f1304fb94160cabf40c"}, + {file = "pandas-2.3.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:93c2d9ab0fc11822b5eece72ec9587e172f63cff87c00b062f6e37448ced4493"}, + {file = "pandas-2.3.3-cp313-cp313-win_amd64.whl", hash = "sha256:f8bfc0e12dc78f777f323f55c58649591b2cd0c43534e8355c51d3fede5f4dee"}, + {file = "pandas-2.3.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:75ea25f9529fdec2d2e93a42c523962261e567d250b0013b16210e1d40d7c2e5"}, + {file = "pandas-2.3.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74ecdf1d301e812db96a465a525952f4dde225fdb6d8e5a521d47e1f42041e21"}, + {file = "pandas-2.3.3-cp313-cp313t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6435cb949cb34ec11cc9860246ccb2fdc9ecd742c12d3304989017d53f039a78"}, + {file = "pandas-2.3.3-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:900f47d8f20860de523a1ac881c4c36d65efcb2eb850e6948140fa781736e110"}, + {file = "pandas-2.3.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a45c765238e2ed7d7c608fc5bc4a6f88b642f2f01e70c0c23d2224dd21829d86"}, + {file = "pandas-2.3.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:c4fc4c21971a1a9f4bdb4c73978c7f7256caa3e62b323f70d6cb80db583350bc"}, + {file = "pandas-2.3.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:ee15f284898e7b246df8087fc82b87b01686f98ee67d85a17b7ab44143a3a9a0"}, + {file = "pandas-2.3.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1611aedd912e1ff81ff41c745822980c49ce4a7907537be8692c8dbc31924593"}, + {file = "pandas-2.3.3-cp314-cp314-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6d2cefc361461662ac48810cb14365a365ce864afe85ef1f447ff5a1e99ea81c"}, + {file = "pandas-2.3.3-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ee67acbbf05014ea6c763beb097e03cd629961c8a632075eeb34247120abcb4b"}, + {file = "pandas-2.3.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:c46467899aaa4da076d5abc11084634e2d197e9460643dd455ac3db5856b24d6"}, + {file = "pandas-2.3.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:6253c72c6a1d990a410bc7de641d34053364ef8bcd3126f7e7450125887dffe3"}, + {file = "pandas-2.3.3-cp314-cp314-win_amd64.whl", hash = "sha256:1b07204a219b3b7350abaae088f451860223a52cfb8a6c53358e7948735158e5"}, + {file = "pandas-2.3.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:2462b1a365b6109d275250baaae7b760fd25c726aaca0054649286bcfbb3e8ec"}, + {file = "pandas-2.3.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:0242fe9a49aa8b4d78a4fa03acb397a58833ef6199e9aa40a95f027bb3a1b6e7"}, + {file = "pandas-2.3.3-cp314-cp314t-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:a21d830e78df0a515db2b3d2f5570610f5e6bd2e27749770e8bb7b524b89b450"}, + {file = "pandas-2.3.3-cp314-cp314t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2e3ebdb170b5ef78f19bfb71b0dc5dc58775032361fa188e814959b74d726dd5"}, + {file = "pandas-2.3.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:d051c0e065b94b7a3cea50eb1ec32e912cd96dba41647eb24104b6c6c14c5788"}, + {file = "pandas-2.3.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:3869faf4bd07b3b66a9f462417d0ca3a9df29a9f6abd5d0d0dbab15dac7abe87"}, + {file = "pandas-2.3.3.tar.gz", hash = "sha256:e05e1af93b977f7eafa636d043f9f94c7ee3ac81af99c13508215942e64c993b"}, +] + +[[package]] +name = "pillow" +version = "11.3.0" +requires_python = ">=3.9" +summary = "Python Imaging Library (Fork)" +groups = ["default"] +files = [ + {file = "pillow-11.3.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:fdae223722da47b024b867c1ea0be64e0df702c5e0a60e27daad39bf960dd1e4"}, + {file = "pillow-11.3.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:921bd305b10e82b4d1f5e802b6850677f965d8394203d182f078873851dada69"}, + {file = "pillow-11.3.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:eb76541cba2f958032d79d143b98a3a6b3ea87f0959bbe256c0b5e416599fd5d"}, + {file = "pillow-11.3.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:67172f2944ebba3d4a7b54f2e95c786a3a50c21b88456329314caaa28cda70f6"}, + {file = "pillow-11.3.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:97f07ed9f56a3b9b5f49d3661dc9607484e85c67e27f3e8be2c7d28ca032fec7"}, + {file = "pillow-11.3.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:676b2815362456b5b3216b4fd5bd89d362100dc6f4945154ff172e206a22c024"}, + {file = "pillow-11.3.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:3e184b2f26ff146363dd07bde8b711833d7b0202e27d13540bfe2e35a323a809"}, + {file = "pillow-11.3.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:6be31e3fc9a621e071bc17bb7de63b85cbe0bfae91bb0363c893cbe67247780d"}, + {file = "pillow-11.3.0-cp312-cp312-win32.whl", hash = "sha256:7b161756381f0918e05e7cb8a371fff367e807770f8fe92ecb20d905d0e1c149"}, + {file = "pillow-11.3.0-cp312-cp312-win_amd64.whl", hash = "sha256:a6444696fce635783440b7f7a9fc24b3ad10a9ea3f0ab66c5905be1c19ccf17d"}, + {file = "pillow-11.3.0-cp312-cp312-win_arm64.whl", hash = "sha256:2aceea54f957dd4448264f9bf40875da0415c83eb85f55069d89c0ed436e3542"}, + {file = "pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:1c627742b539bba4309df89171356fcb3cc5a9178355b2727d1b74a6cf155fbd"}, + {file = "pillow-11.3.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:30b7c02f3899d10f13d7a48163c8969e4e653f8b43416d23d13d1bbfdc93b9f8"}, + {file = "pillow-11.3.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:7859a4cc7c9295f5838015d8cc0a9c215b77e43d07a25e460f35cf516df8626f"}, + {file = "pillow-11.3.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ec1ee50470b0d050984394423d96325b744d55c701a439d2bd66089bff963d3c"}, + {file = "pillow-11.3.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7db51d222548ccfd274e4572fdbf3e810a5e66b00608862f947b163e613b67dd"}, + {file = "pillow-11.3.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2d6fcc902a24ac74495df63faad1884282239265c6839a0a6416d33faedfae7e"}, + {file = "pillow-11.3.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f0f5d8f4a08090c6d6d578351a2b91acf519a54986c055af27e7a93feae6d3f1"}, + {file = "pillow-11.3.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c37d8ba9411d6003bba9e518db0db0c58a680ab9fe5179f040b0463644bc9805"}, + {file = "pillow-11.3.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13f87d581e71d9189ab21fe0efb5a23e9f28552d5be6979e84001d3b8505abe8"}, + {file = "pillow-11.3.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:023f6d2d11784a465f09fd09a34b150ea4672e85fb3d05931d89f373ab14abb2"}, + {file = "pillow-11.3.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:45dfc51ac5975b938e9809451c51734124e73b04d0f0ac621649821a63852e7b"}, + {file = "pillow-11.3.0-cp313-cp313-win32.whl", hash = "sha256:a4d336baed65d50d37b88ca5b60c0fa9d81e3a87d4a7930d3880d1624d5b31f3"}, + {file = "pillow-11.3.0-cp313-cp313-win_amd64.whl", hash = "sha256:0bce5c4fd0921f99d2e858dc4d4d64193407e1b99478bc5cacecba2311abde51"}, + {file = "pillow-11.3.0-cp313-cp313-win_arm64.whl", hash = "sha256:1904e1264881f682f02b7f8167935cce37bc97db457f8e7849dc3a6a52b99580"}, + {file = "pillow-11.3.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4c834a3921375c48ee6b9624061076bc0a32a60b5532b322cc0ea64e639dd50e"}, + {file = "pillow-11.3.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:5e05688ccef30ea69b9317a9ead994b93975104a677a36a8ed8106be9260aa6d"}, + {file = "pillow-11.3.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1019b04af07fc0163e2810167918cb5add8d74674b6267616021ab558dc98ced"}, + {file = "pillow-11.3.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f944255db153ebb2b19c51fe85dd99ef0ce494123f21b9db4877ffdfc5590c7c"}, + {file = "pillow-11.3.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1f85acb69adf2aaee8b7da124efebbdb959a104db34d3a2cb0f3793dbae422a8"}, + {file = "pillow-11.3.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:05f6ecbeff5005399bb48d198f098a9b4b6bdf27b8487c7f38ca16eeb070cd59"}, + {file = "pillow-11.3.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:a7bc6e6fd0395bc052f16b1a8670859964dbd7003bd0af2ff08342eb6e442cfe"}, + {file = "pillow-11.3.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:83e1b0161c9d148125083a35c1c5a89db5b7054834fd4387499e06552035236c"}, + {file = "pillow-11.3.0-cp313-cp313t-win32.whl", hash = "sha256:2a3117c06b8fb646639dce83694f2f9eac405472713fcb1ae887469c0d4f6788"}, + {file = "pillow-11.3.0-cp313-cp313t-win_amd64.whl", hash = "sha256:857844335c95bea93fb39e0fa2726b4d9d758850b34075a7e3ff4f4fa3aa3b31"}, + {file = "pillow-11.3.0-cp313-cp313t-win_arm64.whl", hash = "sha256:8797edc41f3e8536ae4b10897ee2f637235c94f27404cac7297f7b607dd0716e"}, + {file = "pillow-11.3.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:d9da3df5f9ea2a89b81bb6087177fb1f4d1c7146d583a3fe5c672c0d94e55e12"}, + {file = "pillow-11.3.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:0b275ff9b04df7b640c59ec5a3cb113eefd3795a8df80bac69646ef699c6981a"}, + {file = "pillow-11.3.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:0743841cabd3dba6a83f38a92672cccbd69af56e3e91777b0ee7f4dba4385632"}, + {file = "pillow-11.3.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2465a69cf967b8b49ee1b96d76718cd98c4e925414ead59fdf75cf0fd07df673"}, + {file = "pillow-11.3.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:41742638139424703b4d01665b807c6468e23e699e8e90cffefe291c5832b027"}, + {file = "pillow-11.3.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:93efb0b4de7e340d99057415c749175e24c8864302369e05914682ba642e5d77"}, + {file = "pillow-11.3.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7966e38dcd0fa11ca390aed7c6f20454443581d758242023cf36fcb319b1a874"}, + {file = "pillow-11.3.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:98a9afa7b9007c67ed84c57c9e0ad86a6000da96eaa638e4f8abe5b65ff83f0a"}, + {file = "pillow-11.3.0-cp314-cp314-win32.whl", hash = "sha256:02a723e6bf909e7cea0dac1b0e0310be9d7650cd66222a5f1c571455c0a45214"}, + {file = "pillow-11.3.0-cp314-cp314-win_amd64.whl", hash = "sha256:a418486160228f64dd9e9efcd132679b7a02a5f22c982c78b6fc7dab3fefb635"}, + {file = "pillow-11.3.0-cp314-cp314-win_arm64.whl", hash = "sha256:155658efb5e044669c08896c0c44231c5e9abcaadbc5cd3648df2f7c0b96b9a6"}, + {file = "pillow-11.3.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:59a03cdf019efbfeeed910bf79c7c93255c3d54bc45898ac2a4140071b02b4ae"}, + {file = "pillow-11.3.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:f8a5827f84d973d8636e9dc5764af4f0cf2318d26744b3d902931701b0d46653"}, + {file = "pillow-11.3.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ee92f2fd10f4adc4b43d07ec5e779932b4eb3dbfbc34790ada5a6669bc095aa6"}, + {file = "pillow-11.3.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c96d333dcf42d01f47b37e0979b6bd73ec91eae18614864622d9b87bbd5bbf36"}, + {file = "pillow-11.3.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4c96f993ab8c98460cd0c001447bff6194403e8b1d7e149ade5f00594918128b"}, + {file = "pillow-11.3.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:41342b64afeba938edb034d122b2dda5db2139b9a4af999729ba8818e0056477"}, + {file = "pillow-11.3.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:068d9c39a2d1b358eb9f245ce7ab1b5c3246c7c8c7d9ba58cfa5b43146c06e50"}, + {file = "pillow-11.3.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:a1bc6ba083b145187f648b667e05a2534ecc4b9f2784c2cbe3089e44868f2b9b"}, + {file = "pillow-11.3.0-cp314-cp314t-win32.whl", hash = "sha256:118ca10c0d60b06d006be10a501fd6bbdfef559251ed31b794668ed569c87e12"}, + {file = "pillow-11.3.0-cp314-cp314t-win_amd64.whl", hash = "sha256:8924748b688aa210d79883357d102cd64690e56b923a186f35a82cbc10f997db"}, + {file = "pillow-11.3.0-cp314-cp314t-win_arm64.whl", hash = "sha256:79ea0d14d3ebad43ec77ad5272e6ff9bba5b679ef73375ea760261207fa8e0aa"}, + {file = "pillow-11.3.0.tar.gz", hash = "sha256:3828ee7586cd0b2091b6209e5ad53e20d0649bbe87164a459d0676e035e8f523"}, +] + +[[package]] +name = "platformdirs" +version = "4.4.0" +requires_python = ">=3.9" +summary = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." +groups = ["dev"] +files = [ + {file = "platformdirs-4.4.0-py3-none-any.whl", hash = "sha256:abd01743f24e5287cd7a5db3752faf1a2d65353f38ec26d98e25a6db65958c85"}, + {file = "platformdirs-4.4.0.tar.gz", hash = "sha256:ca753cf4d81dc309bc67b0ea38fd15dc97bc30ce419a7f58d13eb3bf14c4febf"}, +] + +[[package]] +name = "plotly" +version = "6.3.0" +requires_python = ">=3.8" +summary = "An open-source interactive data visualization library for Python" +groups = ["default"] +dependencies = [ + "narwhals>=1.15.1", + "packaging", +] +files = [ + {file = "plotly-6.3.0-py3-none-any.whl", hash = "sha256:7ad806edce9d3cdd882eaebaf97c0c9e252043ed1ed3d382c3e3520ec07806d4"}, + {file = "plotly-6.3.0.tar.gz", hash = "sha256:8840a184d18ccae0f9189c2b9a2943923fd5cae7717b723f36eef78f444e5a73"}, +] + +[[package]] +name = "pluggy" +version = "1.6.0" +requires_python = ">=3.9" +summary = "plugin and hook calling mechanisms for python" +groups = ["dev"] +files = [ + {file = "pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746"}, + {file = "pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3"}, +] + +[[package]] +name = "pre-commit" +version = "4.3.0" +requires_python = ">=3.9" +summary = "A framework for managing and maintaining multi-language pre-commit hooks." +groups = ["dev"] +dependencies = [ + "cfgv>=2.0.0", + "identify>=1.0.0", + "nodeenv>=0.11.1", + "pyyaml>=5.1", + "virtualenv>=20.10.0", +] +files = [ + {file = "pre_commit-4.3.0-py2.py3-none-any.whl", hash = "sha256:2b0747ad7e6e967169136edffee14c16e148a778a54e4f967921aa1ebf2308d8"}, + {file = "pre_commit-4.3.0.tar.gz", hash = "sha256:499fe450cc9d42e9d58e606262795ecb64dd05438943c62b66f6a8673da30b16"}, +] + +[[package]] +name = "propcache" +version = "0.3.2" +requires_python = ">=3.9" +summary = "Accelerated property cache" +groups = ["default"] +files = [ + {file = "propcache-0.3.2-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:8de106b6c84506b31c27168582cd3cb3000a6412c16df14a8628e5871ff83c10"}, + {file = "propcache-0.3.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:28710b0d3975117239c76600ea351934ac7b5ff56e60953474342608dbbb6154"}, + {file = "propcache-0.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce26862344bdf836650ed2487c3d724b00fbfec4233a1013f597b78c1cb73615"}, + {file = "propcache-0.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bca54bd347a253af2cf4544bbec232ab982f4868de0dd684246b67a51bc6b1db"}, + {file = "propcache-0.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:55780d5e9a2ddc59711d727226bb1ba83a22dd32f64ee15594b9392b1f544eb1"}, + {file = "propcache-0.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:035e631be25d6975ed87ab23153db6a73426a48db688070d925aa27e996fe93c"}, + {file = "propcache-0.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ee6f22b6eaa39297c751d0e80c0d3a454f112f5c6481214fcf4c092074cecd67"}, + {file = "propcache-0.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7ca3aee1aa955438c4dba34fc20a9f390e4c79967257d830f137bd5a8a32ed3b"}, + {file = "propcache-0.3.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:7a4f30862869fa2b68380d677cc1c5fcf1e0f2b9ea0cf665812895c75d0ca3b8"}, + {file = "propcache-0.3.2-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:b77ec3c257d7816d9f3700013639db7491a434644c906a2578a11daf13176251"}, + {file = "propcache-0.3.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:cab90ac9d3f14b2d5050928483d3d3b8fb6b4018893fc75710e6aa361ecb2474"}, + {file = "propcache-0.3.2-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:0b504d29f3c47cf6b9e936c1852246c83d450e8e063d50562115a6be6d3a2535"}, + {file = "propcache-0.3.2-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:ce2ac2675a6aa41ddb2a0c9cbff53780a617ac3d43e620f8fd77ba1c84dcfc06"}, + {file = "propcache-0.3.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:62b4239611205294cc433845b914131b2a1f03500ff3c1ed093ed216b82621e1"}, + {file = "propcache-0.3.2-cp312-cp312-win32.whl", hash = "sha256:df4a81b9b53449ebc90cc4deefb052c1dd934ba85012aa912c7ea7b7e38b60c1"}, + {file = "propcache-0.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:7046e79b989d7fe457bb755844019e10f693752d169076138abf17f31380800c"}, + {file = "propcache-0.3.2-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:ca592ed634a73ca002967458187109265e980422116c0a107cf93d81f95af945"}, + {file = "propcache-0.3.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:9ecb0aad4020e275652ba3975740f241bd12a61f1a784df044cf7477a02bc252"}, + {file = "propcache-0.3.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:7f08f1cc28bd2eade7a8a3d2954ccc673bb02062e3e7da09bc75d843386b342f"}, + {file = "propcache-0.3.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1a342c834734edb4be5ecb1e9fb48cb64b1e2320fccbd8c54bf8da8f2a84c33"}, + {file = "propcache-0.3.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8a544caaae1ac73f1fecfae70ded3e93728831affebd017d53449e3ac052ac1e"}, + {file = "propcache-0.3.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:310d11aa44635298397db47a3ebce7db99a4cc4b9bbdfcf6c98a60c8d5261cf1"}, + {file = "propcache-0.3.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4c1396592321ac83157ac03a2023aa6cc4a3cc3cfdecb71090054c09e5a7cce3"}, + {file = "propcache-0.3.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8cabf5b5902272565e78197edb682017d21cf3b550ba0460ee473753f28d23c1"}, + {file = "propcache-0.3.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0a2f2235ac46a7aa25bdeb03a9e7060f6ecbd213b1f9101c43b3090ffb971ef6"}, + {file = "propcache-0.3.2-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:92b69e12e34869a6970fd2f3da91669899994b47c98f5d430b781c26f1d9f387"}, + {file = "propcache-0.3.2-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:54e02207c79968ebbdffc169591009f4474dde3b4679e16634d34c9363ff56b4"}, + {file = "propcache-0.3.2-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:4adfb44cb588001f68c5466579d3f1157ca07f7504fc91ec87862e2b8e556b88"}, + {file = "propcache-0.3.2-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:fd3e6019dc1261cd0291ee8919dd91fbab7b169bb76aeef6c716833a3f65d206"}, + {file = "propcache-0.3.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4c181cad81158d71c41a2bce88edce078458e2dd5ffee7eddd6b05da85079f43"}, + {file = "propcache-0.3.2-cp313-cp313-win32.whl", hash = "sha256:8a08154613f2249519e549de2330cf8e2071c2887309a7b07fb56098f5170a02"}, + {file = "propcache-0.3.2-cp313-cp313-win_amd64.whl", hash = "sha256:e41671f1594fc4ab0a6dec1351864713cb3a279910ae8b58f884a88a0a632c05"}, + {file = "propcache-0.3.2-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:9a3cf035bbaf035f109987d9d55dc90e4b0e36e04bbbb95af3055ef17194057b"}, + {file = "propcache-0.3.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:156c03d07dc1323d8dacaa221fbe028c5c70d16709cdd63502778e6c3ccca1b0"}, + {file = "propcache-0.3.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:74413c0ba02ba86f55cf60d18daab219f7e531620c15f1e23d95563f505efe7e"}, + {file = "propcache-0.3.2-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f066b437bb3fa39c58ff97ab2ca351db465157d68ed0440abecb21715eb24b28"}, + {file = "propcache-0.3.2-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f1304b085c83067914721e7e9d9917d41ad87696bf70f0bc7dee450e9c71ad0a"}, + {file = "propcache-0.3.2-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ab50cef01b372763a13333b4e54021bdcb291fc9a8e2ccb9c2df98be51bcde6c"}, + {file = "propcache-0.3.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fad3b2a085ec259ad2c2842666b2a0a49dea8463579c606426128925af1ed725"}, + {file = "propcache-0.3.2-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:261fa020c1c14deafd54c76b014956e2f86991af198c51139faf41c4d5e83892"}, + {file = "propcache-0.3.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:46d7f8aa79c927e5f987ee3a80205c987717d3659f035c85cf0c3680526bdb44"}, + {file = "propcache-0.3.2-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:6d8f3f0eebf73e3c0ff0e7853f68be638b4043c65a70517bb575eff54edd8dbe"}, + {file = "propcache-0.3.2-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:03c89c1b14a5452cf15403e291c0ccd7751d5b9736ecb2c5bab977ad6c5bcd81"}, + {file = "propcache-0.3.2-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:0cc17efde71e12bbaad086d679ce575268d70bc123a5a71ea7ad76f70ba30bba"}, + {file = "propcache-0.3.2-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:acdf05d00696bc0447e278bb53cb04ca72354e562cf88ea6f9107df8e7fd9770"}, + {file = "propcache-0.3.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:4445542398bd0b5d32df908031cb1b30d43ac848e20470a878b770ec2dcc6330"}, + {file = "propcache-0.3.2-cp313-cp313t-win32.whl", hash = "sha256:f86e5d7cd03afb3a1db8e9f9f6eff15794e79e791350ac48a8c924e6f439f394"}, + {file = "propcache-0.3.2-cp313-cp313t-win_amd64.whl", hash = "sha256:9704bedf6e7cbe3c65eca4379a9b53ee6a83749f047808cbb5044d40d7d72198"}, + {file = "propcache-0.3.2-py3-none-any.whl", hash = "sha256:98f1ec44fb675f5052cccc8e609c46ed23a35a1cfd18545ad4e29002d858a43f"}, + {file = "propcache-0.3.2.tar.gz", hash = "sha256:20d7d62e4e7ef05f221e0db2856b979540686342e7dd9973b815599c7057e168"}, +] + +[[package]] +name = "protobuf" +version = "6.32.1" +requires_python = ">=3.9" +summary = "" +groups = ["default"] +files = [ + {file = "protobuf-6.32.1-cp310-abi3-win32.whl", hash = "sha256:a8a32a84bc9f2aad712041b8b366190f71dde248926da517bde9e832e4412085"}, + {file = "protobuf-6.32.1-cp310-abi3-win_amd64.whl", hash = "sha256:b00a7d8c25fa471f16bc8153d0e53d6c9e827f0953f3c09aaa4331c718cae5e1"}, + {file = "protobuf-6.32.1-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:d8c7e6eb619ffdf105ee4ab76af5a68b60a9d0f66da3ea12d1640e6d8dab7281"}, + {file = "protobuf-6.32.1-cp39-abi3-manylinux2014_aarch64.whl", hash = "sha256:2f5b80a49e1eb7b86d85fcd23fe92df154b9730a725c3b38c4e43b9d77018bf4"}, + {file = "protobuf-6.32.1-cp39-abi3-manylinux2014_x86_64.whl", hash = "sha256:b1864818300c297265c83a4982fd3169f97122c299f56a56e2445c3698d34710"}, + {file = "protobuf-6.32.1-py3-none-any.whl", hash = "sha256:2601b779fc7d32a866c6b4404f9d42a3f67c5b9f3f15b4db3cccabe06b95c346"}, + {file = "protobuf-6.32.1.tar.gz", hash = "sha256:ee2469e4a021474ab9baafea6cd070e5bf27c7d29433504ddea1a4ee5850f68d"}, +] + +[[package]] +name = "pyarrow" +version = "21.0.0" +requires_python = ">=3.9" +summary = "Python library for Apache Arrow" +groups = ["default"] +files = [ + {file = "pyarrow-21.0.0-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:3a302f0e0963db37e0a24a70c56cf91a4faa0bca51c23812279ca2e23481fccd"}, + {file = "pyarrow-21.0.0-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:b6b27cf01e243871390474a211a7922bfbe3bda21e39bc9160daf0da3fe48876"}, + {file = "pyarrow-21.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:e72a8ec6b868e258a2cd2672d91f2860ad532d590ce94cdf7d5e7ec674ccf03d"}, + {file = "pyarrow-21.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:b7ae0bbdc8c6674259b25bef5d2a1d6af5d39d7200c819cf99e07f7dfef1c51e"}, + {file = "pyarrow-21.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:58c30a1729f82d201627c173d91bd431db88ea74dcaa3885855bc6203e433b82"}, + {file = "pyarrow-21.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:072116f65604b822a7f22945a7a6e581cfa28e3454fdcc6939d4ff6090126623"}, + {file = "pyarrow-21.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:cf56ec8b0a5c8c9d7021d6fd754e688104f9ebebf1bf4449613c9531f5346a18"}, + {file = "pyarrow-21.0.0-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:e99310a4ebd4479bcd1964dff9e14af33746300cb014aa4a3781738ac63baf4a"}, + {file = "pyarrow-21.0.0-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:d2fe8e7f3ce329a71b7ddd7498b3cfac0eeb200c2789bd840234f0dc271a8efe"}, + {file = "pyarrow-21.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:f522e5709379d72fb3da7785aa489ff0bb87448a9dc5a75f45763a795a089ebd"}, + {file = "pyarrow-21.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:69cbbdf0631396e9925e048cfa5bce4e8c3d3b41562bbd70c685a8eb53a91e61"}, + {file = "pyarrow-21.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:731c7022587006b755d0bdb27626a1a3bb004bb56b11fb30d98b6c1b4718579d"}, + {file = "pyarrow-21.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:dc56bc708f2d8ac71bd1dcb927e458c93cec10b98eb4120206a4091db7b67b99"}, + {file = "pyarrow-21.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:186aa00bca62139f75b7de8420f745f2af12941595bbbfa7ed3870ff63e25636"}, + {file = "pyarrow-21.0.0-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:a7a102574faa3f421141a64c10216e078df467ab9576684d5cd696952546e2da"}, + {file = "pyarrow-21.0.0-cp313-cp313t-macosx_12_0_x86_64.whl", hash = "sha256:1e005378c4a2c6db3ada3ad4c217b381f6c886f0a80d6a316fe586b90f77efd7"}, + {file = "pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:65f8e85f79031449ec8706b74504a316805217b35b6099155dd7e227eef0d4b6"}, + {file = "pyarrow-21.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:3a81486adc665c7eb1a2bde0224cfca6ceaba344a82a971ef059678417880eb8"}, + {file = "pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:fc0d2f88b81dcf3ccf9a6ae17f89183762c8a94a5bdcfa09e05cfe413acf0503"}, + {file = "pyarrow-21.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:6299449adf89df38537837487a4f8d3bd91ec94354fdd2a7d30bc11c48ef6e79"}, + {file = "pyarrow-21.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:222c39e2c70113543982c6b34f3077962b44fca38c0bd9e68bb6781534425c10"}, + {file = "pyarrow-21.0.0.tar.gz", hash = "sha256:5051f2dccf0e283ff56335760cbc8622cf52264d67e359d5569541ac11b6d5bc"}, +] + +[[package]] +name = "pyasn1" +version = "0.6.1" +requires_python = ">=3.8" +summary = "Pure-Python implementation of ASN.1 types and DER/BER/CER codecs (X.208)" +groups = ["default"] +files = [ + {file = "pyasn1-0.6.1-py3-none-any.whl", hash = "sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629"}, + {file = "pyasn1-0.6.1.tar.gz", hash = "sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034"}, +] + +[[package]] +name = "pyasn1-modules" +version = "0.4.2" +requires_python = ">=3.8" +summary = "A collection of ASN.1-based protocols modules" +groups = ["default"] +dependencies = [ + "pyasn1<0.7.0,>=0.6.1", +] +files = [ + {file = "pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a"}, + {file = "pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6"}, +] + +[[package]] +name = "pydantic" +version = "2.11.9" +requires_python = ">=3.9" +summary = "Data validation using Python type hints" +groups = ["default"] +dependencies = [ + "annotated-types>=0.6.0", + "pydantic-core==2.33.2", + "typing-extensions>=4.12.2", + "typing-inspection>=0.4.0", +] +files = [ + {file = "pydantic-2.11.9-py3-none-any.whl", hash = "sha256:c42dd626f5cfc1c6950ce6205ea58c93efa406da65f479dcb4029d5934857da2"}, + {file = "pydantic-2.11.9.tar.gz", hash = "sha256:6b8ffda597a14812a7975c90b82a8a2e777d9257aba3453f973acd3c032a18e2"}, +] + +[[package]] +name = "pydantic-core" +version = "2.33.2" +requires_python = ">=3.9" +summary = "Core functionality for Pydantic validation and serialization" +groups = ["default"] +dependencies = [ + "typing-extensions!=4.7.0,>=4.6.0", +] +files = [ + {file = "pydantic_core-2.33.2-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:a7ec89dc587667f22b6a0b6579c249fca9026ce7c333fc142ba42411fa243cdc"}, + {file = "pydantic_core-2.33.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:3c6db6e52c6d70aa0d00d45cdb9b40f0433b96380071ea80b09277dba021ddf7"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e61206137cbc65e6d5256e1166f88331d3b6238e082d9f74613b9b765fb9025"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:eb8c529b2819c37140eb51b914153063d27ed88e3bdc31b71198a198e921e011"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c52b02ad8b4e2cf14ca7b3d918f3eb0ee91e63b3167c32591e57c4317e134f8f"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:96081f1605125ba0855dfda83f6f3df5ec90c61195421ba72223de35ccfb2f88"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8f57a69461af2a5fa6e6bbd7a5f60d3b7e6cebb687f55106933188e79ad155c1"}, + {file = "pydantic_core-2.33.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:572c7e6c8bb4774d2ac88929e3d1f12bc45714ae5ee6d9a788a9fb35e60bb04b"}, + {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:db4b41f9bd95fbe5acd76d89920336ba96f03e149097365afe1cb092fceb89a1"}, + {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:fa854f5cf7e33842a892e5c73f45327760bc7bc516339fda888c75ae60edaeb6"}, + {file = "pydantic_core-2.33.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:5f483cfb75ff703095c59e365360cb73e00185e01aaea067cd19acffd2ab20ea"}, + {file = "pydantic_core-2.33.2-cp312-cp312-win32.whl", hash = "sha256:9cb1da0f5a471435a7bc7e439b8a728e8b61e59784b2af70d7c169f8dd8ae290"}, + {file = "pydantic_core-2.33.2-cp312-cp312-win_amd64.whl", hash = "sha256:f941635f2a3d96b2973e867144fde513665c87f13fe0e193c158ac51bfaaa7b2"}, + {file = "pydantic_core-2.33.2-cp312-cp312-win_arm64.whl", hash = "sha256:cca3868ddfaccfbc4bfb1d608e2ccaaebe0ae628e1416aeb9c4d88c001bb45ab"}, + {file = "pydantic_core-2.33.2-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:1082dd3e2d7109ad8b7da48e1d4710c8d06c253cbc4a27c1cff4fbcaa97a9e3f"}, + {file = "pydantic_core-2.33.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f517ca031dfc037a9c07e748cefd8d96235088b83b4f4ba8939105d20fa1dcd6"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a9f2c9dd19656823cb8250b0724ee9c60a82f3cdf68a080979d13092a3b0fef"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b0a451c263b01acebe51895bfb0e1cc842a5c666efe06cdf13846c7418caa9a"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ea40a64d23faa25e62a70ad163571c0b342b8bf66d5fa612ac0dec4f069d916"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0fb2d542b4d66f9470e8065c5469ec676978d625a8b7a363f07d9a501a9cb36a"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fdac5d6ffa1b5a83bca06ffe7583f5576555e6c8b3a91fbd25ea7780f825f7d"}, + {file = "pydantic_core-2.33.2-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:04a1a413977ab517154eebb2d326da71638271477d6ad87a769102f7c2488c56"}, + {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:c8e7af2f4e0194c22b5b37205bfb293d166a7344a5b0d0eaccebc376546d77d5"}, + {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:5c92edd15cd58b3c2d34873597a1e20f13094f59cf88068adb18947df5455b4e"}, + {file = "pydantic_core-2.33.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:65132b7b4a1c0beded5e057324b7e16e10910c106d43675d9bd87d4f38dde162"}, + {file = "pydantic_core-2.33.2-cp313-cp313-win32.whl", hash = "sha256:52fb90784e0a242bb96ec53f42196a17278855b0f31ac7c3cc6f5c1ec4811849"}, + {file = "pydantic_core-2.33.2-cp313-cp313-win_amd64.whl", hash = "sha256:c083a3bdd5a93dfe480f1125926afcdbf2917ae714bdb80b36d34318b2bec5d9"}, + {file = "pydantic_core-2.33.2-cp313-cp313-win_arm64.whl", hash = "sha256:e80b087132752f6b3d714f041ccf74403799d3b23a72722ea2e6ba2e892555b9"}, + {file = "pydantic_core-2.33.2-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:61c18fba8e5e9db3ab908620af374db0ac1baa69f0f32df4f61ae23f15e586ac"}, + {file = "pydantic_core-2.33.2-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:95237e53bb015f67b63c91af7518a62a8660376a6a0db19b89acc77a4d6199f5"}, + {file = "pydantic_core-2.33.2-cp313-cp313t-win_amd64.whl", hash = "sha256:c2fc0a768ef76c15ab9238afa6da7f69895bb5d1ee83aeea2e3509af4472d0b9"}, + {file = "pydantic_core-2.33.2.tar.gz", hash = "sha256:7cb8bc3605c29176e1b105350d2e6474142d7c1bd1d9327c4a9bdb46bf827acc"}, +] + +[[package]] +name = "pydeck" +version = "0.9.1" +requires_python = ">=3.8" +summary = "Widget for deck.gl maps" +groups = ["default"] +dependencies = [ + "jinja2>=2.10.1", + "numpy>=1.16.4", +] +files = [ + {file = "pydeck-0.9.1-py2.py3-none-any.whl", hash = "sha256:b3f75ba0d273fc917094fa61224f3f6076ca8752b93d46faf3bcfd9f9d59b038"}, + {file = "pydeck-0.9.1.tar.gz", hash = "sha256:f74475ae637951d63f2ee58326757f8d4f9cd9f2a457cf42950715003e2cb605"}, +] + +[[package]] +name = "pygments" +version = "2.19.2" +requires_python = ">=3.8" +summary = "Pygments is a syntax highlighting package written in Python." +groups = ["dev"] +files = [ + {file = "pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b"}, + {file = "pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887"}, +] + +[[package]] +name = "pykube-ng" +version = "23.6.0" +requires_python = ">=3.8,<4" +summary = "Python client library for Kubernetes" +groups = ["default"] +dependencies = [ + "pyyaml", + "requests>=2.12", + "urllib3>=1.26.9", +] +files = [ + {file = "pykube-ng-23.6.0.tar.gz", hash = "sha256:46de8e17ed87c1a1014667d60e7d94a1f3fa2b8037b41e67d32c28b5869af35d"}, + {file = "pykube_ng-23.6.0-py3-none-any.whl", hash = "sha256:63f20f634bfcd83966edec32f892286f75dffb817a2c097434ecc039e558ec8f"}, +] + +[[package]] +name = "pyparsing" +version = "3.2.5" +requires_python = ">=3.9" +summary = "pyparsing - Classes and methods to define and execute parsing grammars" +groups = ["default"] +files = [ + {file = "pyparsing-3.2.5-py3-none-any.whl", hash = "sha256:e38a4f02064cf41fe6593d328d0512495ad1f3d8a91c4f73fc401b3079a59a5e"}, + {file = "pyparsing-3.2.5.tar.gz", hash = "sha256:2df8d5b7b2802ef88e8d016a2eb9c7aeaa923529cd251ed0fe4608275d4105b6"}, +] + +[[package]] +name = "pytest" +version = "8.4.2" +requires_python = ">=3.9" +summary = "pytest: simple powerful testing with Python" +groups = ["dev"] +dependencies = [ + "colorama>=0.4; sys_platform == \"win32\"", + "exceptiongroup>=1; python_version < \"3.11\"", + "iniconfig>=1", + "packaging>=20", + "pluggy<2,>=1.5", + "pygments>=2.7.2", + "tomli>=1; python_version < \"3.11\"", +] +files = [ + {file = "pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79"}, + {file = "pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01"}, +] + +[[package]] +name = "pytest-cov" +version = "7.0.0" +requires_python = ">=3.9" +summary = "Pytest plugin for measuring coverage." +groups = ["dev"] +dependencies = [ + "coverage[toml]>=7.10.6", + "pluggy>=1.2", + "pytest>=7", +] +files = [ + {file = "pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861"}, + {file = "pytest_cov-7.0.0.tar.gz", hash = "sha256:33c97eda2e049a0c5298e91f519302a1334c26ac65c1a483d6206fd458361af1"}, +] + +[[package]] +name = "python-dateutil" +version = "2.9.0.post0" +requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +summary = "Extensions to the standard Python datetime module" +groups = ["default"] +dependencies = [ + "six>=1.5", +] +files = [ + {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"}, + {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"}, +] + +[[package]] +name = "pytz" +version = "2025.2" +summary = "World timezone definitions, modern and historical" +groups = ["default"] +files = [ + {file = "pytz-2025.2-py2.py3-none-any.whl", hash = "sha256:5ddf76296dd8c44c26eb8f4b6f35488f3ccbf6fbbd7adee0b7262d43f0ec2f00"}, + {file = "pytz-2025.2.tar.gz", hash = "sha256:360b9e3dbb49a209c21ad61809c7fb453643e048b38924c765813546746e81c3"}, +] + +[[package]] +name = "pyyaml" +version = "6.0.3" +requires_python = ">=3.8" +summary = "YAML parser and emitter for Python" +groups = ["default", "dev"] +files = [ + {file = "pyyaml-6.0.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:7f047e29dcae44602496db43be01ad42fc6f1cc0d8cd6c83d342306c32270196"}, + {file = "pyyaml-6.0.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fc09d0aa354569bc501d4e787133afc08552722d3ab34836a80547331bb5d4a0"}, + {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9149cad251584d5fb4981be1ecde53a1ca46c891a79788c0df828d2f166bda28"}, + {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5fdec68f91a0c6739b380c83b951e2c72ac0197ace422360e6d5a959d8d97b2c"}, + {file = "pyyaml-6.0.3-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ba1cc08a7ccde2d2ec775841541641e4548226580ab850948cbfda66a1befcdc"}, + {file = "pyyaml-6.0.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8dc52c23056b9ddd46818a57b78404882310fb473d63f17b07d5c40421e47f8e"}, + {file = "pyyaml-6.0.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:41715c910c881bc081f1e8872880d3c650acf13dfa8214bad49ed4cede7c34ea"}, + {file = "pyyaml-6.0.3-cp312-cp312-win32.whl", hash = "sha256:96b533f0e99f6579b3d4d4995707cf36df9100d67e0c8303a0c55b27b5f99bc5"}, + {file = "pyyaml-6.0.3-cp312-cp312-win_amd64.whl", hash = "sha256:5fcd34e47f6e0b794d17de1b4ff496c00986e1c83f7ab2fb8fcfe9616ff7477b"}, + {file = "pyyaml-6.0.3-cp312-cp312-win_arm64.whl", hash = "sha256:64386e5e707d03a7e172c0701abfb7e10f0fb753ee1d773128192742712a98fd"}, + {file = "pyyaml-6.0.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:8da9669d359f02c0b91ccc01cac4a67f16afec0dac22c2ad09f46bee0697eba8"}, + {file = "pyyaml-6.0.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:2283a07e2c21a2aa78d9c4442724ec1eb15f5e42a723b99cb3d822d48f5f7ad1"}, + {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ee2922902c45ae8ccada2c5b501ab86c36525b883eff4255313a253a3160861c"}, + {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a33284e20b78bd4a18c8c2282d549d10bc8408a2a7ff57653c0cf0b9be0afce5"}, + {file = "pyyaml-6.0.3-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0f29edc409a6392443abf94b9cf89ce99889a1dd5376d94316ae5145dfedd5d6"}, + {file = "pyyaml-6.0.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:f7057c9a337546edc7973c0d3ba84ddcdf0daa14533c2065749c9075001090e6"}, + {file = "pyyaml-6.0.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:eda16858a3cab07b80edaf74336ece1f986ba330fdb8ee0d6c0d68fe82bc96be"}, + {file = "pyyaml-6.0.3-cp313-cp313-win32.whl", hash = "sha256:d0eae10f8159e8fdad514efdc92d74fd8d682c933a6dd088030f3834bc8e6b26"}, + {file = "pyyaml-6.0.3-cp313-cp313-win_amd64.whl", hash = "sha256:79005a0d97d5ddabfeeea4cf676af11e647e41d81c9a7722a193022accdb6b7c"}, + {file = "pyyaml-6.0.3-cp313-cp313-win_arm64.whl", hash = "sha256:5498cd1645aa724a7c71c8f378eb29ebe23da2fc0d7a08071d89469bf1d2defb"}, + {file = "pyyaml-6.0.3-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8d1fab6bb153a416f9aeb4b8763bc0f22a5586065f86f7664fc23339fc1c1fac"}, + {file = "pyyaml-6.0.3-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:34d5fcd24b8445fadc33f9cf348c1047101756fd760b4dacb5c3e99755703310"}, + {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:501a031947e3a9025ed4405a168e6ef5ae3126c59f90ce0cd6f2bfc477be31b7"}, + {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:b3bc83488de33889877a0f2543ade9f70c67d66d9ebb4ac959502e12de895788"}, + {file = "pyyaml-6.0.3-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c458b6d084f9b935061bc36216e8a69a7e293a2f1e68bf956dcd9e6cbcd143f5"}, + {file = "pyyaml-6.0.3-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7c6610def4f163542a622a73fb39f534f8c101d690126992300bf3207eab9764"}, + {file = "pyyaml-6.0.3-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5190d403f121660ce8d1d2c1bb2ef1bd05b5f68533fc5c2ea899bd15f4399b35"}, + {file = "pyyaml-6.0.3-cp314-cp314-win_amd64.whl", hash = "sha256:4a2e8cebe2ff6ab7d1050ecd59c25d4c8bd7e6f400f5f82b96557ac0abafd0ac"}, + {file = "pyyaml-6.0.3-cp314-cp314-win_arm64.whl", hash = "sha256:93dda82c9c22deb0a405ea4dc5f2d0cda384168e466364dec6255b293923b2f3"}, + {file = "pyyaml-6.0.3-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:02893d100e99e03eda1c8fd5c441d8c60103fd175728e23e431db1b589cf5ab3"}, + {file = "pyyaml-6.0.3-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:c1ff362665ae507275af2853520967820d9124984e0f7466736aea23d8611fba"}, + {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6adc77889b628398debc7b65c073bcb99c4a0237b248cacaf3fe8a557563ef6c"}, + {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a80cb027f6b349846a3bf6d73b5e95e782175e52f22108cfa17876aaeff93702"}, + {file = "pyyaml-6.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00c4bdeba853cc34e7dd471f16b4114f4162dc03e6b7afcc2128711f0eca823c"}, + {file = "pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:66e1674c3ef6f541c35191caae2d429b967b99e02040f5ba928632d9a7f0f065"}, + {file = "pyyaml-6.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:16249ee61e95f858e83976573de0f5b2893b3677ba71c9dd36b9cf8be9ac6d65"}, + {file = "pyyaml-6.0.3-cp314-cp314t-win_amd64.whl", hash = "sha256:4ad1906908f2f5ae4e5a8ddfce73c320c2a1429ec52eafd27138b7f1cbe341c9"}, + {file = "pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b"}, + {file = "pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f"}, +] + +[[package]] +name = "referencing" +version = "0.36.2" +requires_python = ">=3.9" +summary = "JSON Referencing + Python" +groups = ["default"] +dependencies = [ + "attrs>=22.2.0", + "rpds-py>=0.7.0", + "typing-extensions>=4.4.0; python_version < \"3.13\"", +] +files = [ + {file = "referencing-0.36.2-py3-none-any.whl", hash = "sha256:e8699adbbf8b5c7de96d8ffa0eb5c158b3beafce084968e2ea8bb08c6794dcd0"}, + {file = "referencing-0.36.2.tar.gz", hash = "sha256:df2e89862cd09deabbdba16944cc3f10feb6b3e6f18e902f7cc25609a34775aa"}, +] + +[[package]] +name = "regex" +version = "2025.9.18" +requires_python = ">=3.9" +summary = "Alternative regular expression module, to replace re." +groups = ["default"] +files = [ + {file = "regex-2025.9.18-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:436e1b31d7efd4dcd52091d076482031c611dde58bf9c46ca6d0a26e33053a7e"}, + {file = "regex-2025.9.18-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c190af81e5576b9c5fdc708f781a52ff20f8b96386c6e2e0557a78402b029f4a"}, + {file = "regex-2025.9.18-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e4121f1ce2b2b5eec4b397cc1b277686e577e658d8f5870b7eb2d726bd2300ab"}, + {file = "regex-2025.9.18-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:300e25dbbf8299d87205e821a201057f2ef9aa3deb29caa01cd2cac669e508d5"}, + {file = "regex-2025.9.18-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:7b47fcf9f5316c0bdaf449e879407e1b9937a23c3b369135ca94ebc8d74b1742"}, + {file = "regex-2025.9.18-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:57a161bd3acaa4b513220b49949b07e252165e6b6dc910ee7617a37ff4f5b425"}, + {file = "regex-2025.9.18-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4f130c3a7845ba42de42f380fff3c8aebe89a810747d91bcf56d40a069f15352"}, + {file = "regex-2025.9.18-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5f96fa342b6f54dcba928dd452e8d8cb9f0d63e711d1721cd765bb9f73bb048d"}, + {file = "regex-2025.9.18-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:0f0d676522d68c207828dcd01fb6f214f63f238c283d9f01d85fc664c7c85b56"}, + {file = "regex-2025.9.18-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:40532bff8a1a0621e7903ae57fce88feb2e8a9a9116d341701302c9302aef06e"}, + {file = "regex-2025.9.18-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:039f11b618ce8d71a1c364fdee37da1012f5a3e79b1b2819a9f389cd82fd6282"}, + {file = "regex-2025.9.18-cp312-cp312-win32.whl", hash = "sha256:e1dd06f981eb226edf87c55d523131ade7285137fbde837c34dc9d1bf309f459"}, + {file = "regex-2025.9.18-cp312-cp312-win_amd64.whl", hash = "sha256:3d86b5247bf25fa3715e385aa9ff272c307e0636ce0c9595f64568b41f0a9c77"}, + {file = "regex-2025.9.18-cp312-cp312-win_arm64.whl", hash = "sha256:032720248cbeeae6444c269b78cb15664458b7bb9ed02401d3da59fe4d68c3a5"}, + {file = "regex-2025.9.18-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:2a40f929cd907c7e8ac7566ac76225a77701a6221bca937bdb70d56cb61f57b2"}, + {file = "regex-2025.9.18-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c90471671c2cdf914e58b6af62420ea9ecd06d1554d7474d50133ff26ae88feb"}, + {file = "regex-2025.9.18-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1a351aff9e07a2dabb5022ead6380cff17a4f10e4feb15f9100ee56c4d6d06af"}, + {file = "regex-2025.9.18-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:bc4b8e9d16e20ddfe16430c23468a8707ccad3365b06d4536142e71823f3ca29"}, + {file = "regex-2025.9.18-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4b8cdbddf2db1c5e80338ba2daa3cfa3dec73a46fff2a7dda087c8efbf12d62f"}, + {file = "regex-2025.9.18-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:a276937d9d75085b2c91fb48244349c6954f05ee97bba0963ce24a9d915b8b68"}, + {file = "regex-2025.9.18-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:92a8e375ccdc1256401c90e9dc02b8642894443d549ff5e25e36d7cf8a80c783"}, + {file = "regex-2025.9.18-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:0dc6893b1f502d73037cf807a321cdc9be29ef3d6219f7970f842475873712ac"}, + {file = "regex-2025.9.18-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:a61e85bfc63d232ac14b015af1261f826260c8deb19401c0597dbb87a864361e"}, + {file = "regex-2025.9.18-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:1ef86a9ebc53f379d921fb9a7e42b92059ad3ee800fcd9e0fe6181090e9f6c23"}, + {file = "regex-2025.9.18-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d3bc882119764ba3a119fbf2bd4f1b47bc56c1da5d42df4ed54ae1e8e66fdf8f"}, + {file = "regex-2025.9.18-cp313-cp313-win32.whl", hash = "sha256:3810a65675845c3bdfa58c3c7d88624356dd6ee2fc186628295e0969005f928d"}, + {file = "regex-2025.9.18-cp313-cp313-win_amd64.whl", hash = "sha256:16eaf74b3c4180ede88f620f299e474913ab6924d5c4b89b3833bc2345d83b3d"}, + {file = "regex-2025.9.18-cp313-cp313-win_arm64.whl", hash = "sha256:4dc98ba7dd66bd1261927a9f49bd5ee2bcb3660f7962f1ec02617280fc00f5eb"}, + {file = "regex-2025.9.18-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:fe5d50572bc885a0a799410a717c42b1a6b50e2f45872e2b40f4f288f9bce8a2"}, + {file = "regex-2025.9.18-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:1b9d9a2d6cda6621551ca8cf7a06f103adf72831153f3c0d982386110870c4d3"}, + {file = "regex-2025.9.18-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:13202e4c4ac0ef9a317fff817674b293c8f7e8c68d3190377d8d8b749f566e12"}, + {file = "regex-2025.9.18-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:874ff523b0fecffb090f80ae53dc93538f8db954c8bb5505f05b7787ab3402a0"}, + {file = "regex-2025.9.18-cp313-cp313t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:d13ab0490128f2bb45d596f754148cd750411afc97e813e4b3a61cf278a23bb6"}, + {file = "regex-2025.9.18-cp313-cp313t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:05440bc172bc4b4b37fb9667e796597419404dbba62e171e1f826d7d2a9ebcef"}, + {file = "regex-2025.9.18-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5514b8e4031fdfaa3d27e92c75719cbe7f379e28cacd939807289bce76d0e35a"}, + {file = "regex-2025.9.18-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:65d3c38c39efce73e0d9dc019697b39903ba25b1ad45ebbd730d2cf32741f40d"}, + {file = "regex-2025.9.18-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:ae77e447ebc144d5a26d50055c6ddba1d6ad4a865a560ec7200b8b06bc529368"}, + {file = "regex-2025.9.18-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:e3ef8cf53dc8df49d7e28a356cf824e3623764e9833348b655cfed4524ab8a90"}, + {file = "regex-2025.9.18-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:9feb29817df349c976da9a0debf775c5c33fc1c8ad7b9f025825da99374770b7"}, + {file = "regex-2025.9.18-cp313-cp313t-win32.whl", hash = "sha256:168be0d2f9b9d13076940b1ed774f98595b4e3c7fc54584bba81b3cc4181742e"}, + {file = "regex-2025.9.18-cp313-cp313t-win_amd64.whl", hash = "sha256:d59ecf3bb549e491c8104fea7313f3563c7b048e01287db0a90485734a70a730"}, + {file = "regex-2025.9.18-cp313-cp313t-win_arm64.whl", hash = "sha256:dbef80defe9fb21310948a2595420b36c6d641d9bea4c991175829b2cc4bc06a"}, + {file = "regex-2025.9.18-cp314-cp314-macosx_10_13_universal2.whl", hash = "sha256:c6db75b51acf277997f3adcd0ad89045d856190d13359f15ab5dda21581d9129"}, + {file = "regex-2025.9.18-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:8f9698b6f6895d6db810e0bda5364f9ceb9e5b11328700a90cae573574f61eea"}, + {file = "regex-2025.9.18-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:29cd86aa7cb13a37d0f0d7c21d8d949fe402ffa0ea697e635afedd97ab4b69f1"}, + {file = "regex-2025.9.18-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7c9f285a071ee55cd9583ba24dde006e53e17780bb309baa8e4289cd472bcc47"}, + {file = "regex-2025.9.18-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5adf266f730431e3be9021d3e5b8d5ee65e563fec2883ea8093944d21863b379"}, + {file = "regex-2025.9.18-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:1137cabc0f38807de79e28d3f6e3e3f2cc8cfb26bead754d02e6d1de5f679203"}, + {file = "regex-2025.9.18-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7cc9e5525cada99699ca9223cce2d52e88c52a3d2a0e842bd53de5497c604164"}, + {file = "regex-2025.9.18-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:bbb9246568f72dce29bcd433517c2be22c7791784b223a810225af3b50d1aafb"}, + {file = "regex-2025.9.18-cp314-cp314-musllinux_1_2_ppc64le.whl", hash = "sha256:6a52219a93dd3d92c675383efff6ae18c982e2d7651c792b1e6d121055808743"}, + {file = "regex-2025.9.18-cp314-cp314-musllinux_1_2_s390x.whl", hash = "sha256:ae9b3840c5bd456780e3ddf2f737ab55a79b790f6409182012718a35c6d43282"}, + {file = "regex-2025.9.18-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d488c236ac497c46a5ac2005a952c1a0e22a07be9f10c3e735bc7d1209a34773"}, + {file = "regex-2025.9.18-cp314-cp314-win32.whl", hash = "sha256:0c3506682ea19beefe627a38872d8da65cc01ffa25ed3f2e422dffa1474f0788"}, + {file = "regex-2025.9.18-cp314-cp314-win_amd64.whl", hash = "sha256:57929d0f92bebb2d1a83af372cd0ffba2263f13f376e19b1e4fa32aec4efddc3"}, + {file = "regex-2025.9.18-cp314-cp314-win_arm64.whl", hash = "sha256:6a4b44df31d34fa51aa5c995d3aa3c999cec4d69b9bd414a8be51984d859f06d"}, + {file = "regex-2025.9.18-cp314-cp314t-macosx_10_13_universal2.whl", hash = "sha256:b176326bcd544b5e9b17d6943f807697c0cb7351f6cfb45bf5637c95ff7e6306"}, + {file = "regex-2025.9.18-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:0ffd9e230b826b15b369391bec167baed57c7ce39efc35835448618860995946"}, + {file = "regex-2025.9.18-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:ec46332c41add73f2b57e2f5b642f991f6b15e50e9f86285e08ffe3a512ac39f"}, + {file = "regex-2025.9.18-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b80fa342ed1ea095168a3f116637bd1030d39c9ff38dc04e54ef7c521e01fc95"}, + {file = "regex-2025.9.18-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f4d97071c0ba40f0cf2a93ed76e660654c399a0a04ab7d85472239460f3da84b"}, + {file = "regex-2025.9.18-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:0ac936537ad87cef9e0e66c5144484206c1354224ee811ab1519a32373e411f3"}, + {file = "regex-2025.9.18-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dec57f96d4def58c422d212d414efe28218d58537b5445cf0c33afb1b4768571"}, + {file = "regex-2025.9.18-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:48317233294648bf7cd068857f248e3a57222259a5304d32c7552e2284a1b2ad"}, + {file = "regex-2025.9.18-cp314-cp314t-musllinux_1_2_ppc64le.whl", hash = "sha256:274687e62ea3cf54846a9b25fc48a04459de50af30a7bd0b61a9e38015983494"}, + {file = "regex-2025.9.18-cp314-cp314t-musllinux_1_2_s390x.whl", hash = "sha256:a78722c86a3e7e6aadf9579e3b0ad78d955f2d1f1a8ca4f67d7ca258e8719d4b"}, + {file = "regex-2025.9.18-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:06104cd203cdef3ade989a1c45b6215bf42f8b9dd705ecc220c173233f7cba41"}, + {file = "regex-2025.9.18-cp314-cp314t-win32.whl", hash = "sha256:2e1eddc06eeaffd249c0adb6fafc19e2118e6308c60df9db27919e96b5656096"}, + {file = "regex-2025.9.18-cp314-cp314t-win_amd64.whl", hash = "sha256:8620d247fb8c0683ade51217b459cb4a1081c0405a3072235ba43a40d355c09a"}, + {file = "regex-2025.9.18-cp314-cp314t-win_arm64.whl", hash = "sha256:b7531a8ef61de2c647cdf68b3229b071e46ec326b3138b2180acb4275f470b01"}, + {file = "regex-2025.9.18.tar.gz", hash = "sha256:c5ba23274c61c6fef447ba6a39333297d0c247f53059dba0bca415cac511edc4"}, +] + +[[package]] +name = "requests" +version = "2.32.5" +requires_python = ">=3.9" +summary = "Python HTTP for Humans." +groups = ["default"] +dependencies = [ + "certifi>=2017.4.17", + "charset-normalizer<4,>=2", + "idna<4,>=2.5", + "urllib3<3,>=1.21.1", +] +files = [ + {file = "requests-2.32.5-py3-none-any.whl", hash = "sha256:2462f94637a34fd532264295e186976db0f5d453d1cdd31473c85a6a161affb6"}, + {file = "requests-2.32.5.tar.gz", hash = "sha256:dbba0bac56e100853db0ea71b82b4dfd5fe2bf6d3754a8893c3af500cec7d7cf"}, +] + +[[package]] +name = "requests-oauthlib" +version = "2.0.0" +requires_python = ">=3.4" +summary = "OAuthlib authentication support for Requests." +groups = ["default"] +dependencies = [ + "oauthlib>=3.0.0", + "requests>=2.0.0", +] +files = [ + {file = "requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9"}, + {file = "requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36"}, +] + +[[package]] +name = "requests-toolbelt" +version = "1.0.0" +requires_python = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +summary = "A utility belt for advanced users of python-requests" +groups = ["default"] +dependencies = [ + "requests<3.0.0,>=2.0.1", +] +files = [ + {file = "requests-toolbelt-1.0.0.tar.gz", hash = "sha256:7681a0a3d047012b5bdc0ee37d7f8f07ebe76ab08caeccfc3921ce23c88d5bc6"}, + {file = "requests_toolbelt-1.0.0-py2.py3-none-any.whl", hash = "sha256:cccfdd665f0a24fcf4726e690f65639d272bb0637b9b92dfd91a5568ccf6bd06"}, +] + +[[package]] +name = "rpds-py" +version = "0.27.1" +requires_python = ">=3.9" +summary = "Python bindings to Rust's persistent data structures (rpds)" +groups = ["default"] +files = [ + {file = "rpds_py-0.27.1-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:ae2775c1973e3c30316892737b91f9283f9908e3cc7625b9331271eaaed7dc90"}, + {file = "rpds_py-0.27.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2643400120f55c8a96f7c9d858f7be0c88d383cd4653ae2cf0d0c88f668073e5"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16323f674c089b0360674a4abd28d5042947d54ba620f72514d69be4ff64845e"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9a1f4814b65eacac94a00fc9a526e3fdafd78e439469644032032d0d63de4881"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7ba32c16b064267b22f1850a34051121d423b6f7338a12b9459550eb2096e7ec"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e5c20f33fd10485b80f65e800bbe5f6785af510b9f4056c5a3c612ebc83ba6cb"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:466bfe65bd932da36ff279ddd92de56b042f2266d752719beb97b08526268ec5"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_31_riscv64.whl", hash = "sha256:41e532bbdcb57c92ba3be62c42e9f096431b4cf478da9bc3bc6ce5c38ab7ba7a"}, + {file = "rpds_py-0.27.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f149826d742b406579466283769a8ea448eed82a789af0ed17b0cd5770433444"}, + {file = "rpds_py-0.27.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:80c60cfb5310677bd67cb1e85a1e8eb52e12529545441b43e6f14d90b878775a"}, + {file = "rpds_py-0.27.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:7ee6521b9baf06085f62ba9c7a3e5becffbc32480d2f1b351559c001c38ce4c1"}, + {file = "rpds_py-0.27.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a512c8263249a9d68cac08b05dd59d2b3f2061d99b322813cbcc14c3c7421998"}, + {file = "rpds_py-0.27.1-cp312-cp312-win32.whl", hash = "sha256:819064fa048ba01b6dadc5116f3ac48610435ac9a0058bbde98e569f9e785c39"}, + {file = "rpds_py-0.27.1-cp312-cp312-win_amd64.whl", hash = "sha256:d9199717881f13c32c4046a15f024971a3b78ad4ea029e8da6b86e5aa9cf4594"}, + {file = "rpds_py-0.27.1-cp312-cp312-win_arm64.whl", hash = "sha256:33aa65b97826a0e885ef6e278fbd934e98cdcfed80b63946025f01e2f5b29502"}, + {file = "rpds_py-0.27.1-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:e4b9fcfbc021633863a37e92571d6f91851fa656f0180246e84cbd8b3f6b329b"}, + {file = "rpds_py-0.27.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:1441811a96eadca93c517d08df75de45e5ffe68aa3089924f963c782c4b898cf"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:55266dafa22e672f5a4f65019015f90336ed31c6383bd53f5e7826d21a0e0b83"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d78827d7ac08627ea2c8e02c9e5b41180ea5ea1f747e9db0915e3adf36b62dcf"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae92443798a40a92dc5f0b01d8a7c93adde0c4dc965310a29ae7c64d72b9fad2"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c46c9dd2403b66a2a3b9720ec4b74d4ab49d4fabf9f03dfdce2d42af913fe8d0"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2efe4eb1d01b7f5f1939f4ef30ecea6c6b3521eec451fb93191bf84b2a522418"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_31_riscv64.whl", hash = "sha256:15d3b4d83582d10c601f481eca29c3f138d44c92187d197aff663a269197c02d"}, + {file = "rpds_py-0.27.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:4ed2e16abbc982a169d30d1a420274a709949e2cbdef119fe2ec9d870b42f274"}, + {file = "rpds_py-0.27.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:a75f305c9b013289121ec0f1181931975df78738cdf650093e6b86d74aa7d8dd"}, + {file = "rpds_py-0.27.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:67ce7620704745881a3d4b0ada80ab4d99df390838839921f99e63c474f82cf2"}, + {file = "rpds_py-0.27.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9d992ac10eb86d9b6f369647b6a3f412fc0075cfd5d799530e84d335e440a002"}, + {file = "rpds_py-0.27.1-cp313-cp313-win32.whl", hash = "sha256:4f75e4bd8ab8db624e02c8e2fc4063021b58becdbe6df793a8111d9343aec1e3"}, + {file = "rpds_py-0.27.1-cp313-cp313-win_amd64.whl", hash = "sha256:f9025faafc62ed0b75a53e541895ca272815bec18abe2249ff6501c8f2e12b83"}, + {file = "rpds_py-0.27.1-cp313-cp313-win_arm64.whl", hash = "sha256:ed10dc32829e7d222b7d3b93136d25a406ba9788f6a7ebf6809092da1f4d279d"}, + {file = "rpds_py-0.27.1-cp313-cp313t-macosx_10_12_x86_64.whl", hash = "sha256:92022bbbad0d4426e616815b16bc4127f83c9a74940e1ccf3cfe0b387aba0228"}, + {file = "rpds_py-0.27.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:47162fdab9407ec3f160805ac3e154df042e577dd53341745fc7fb3f625e6d92"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb89bec23fddc489e5d78b550a7b773557c9ab58b7946154a10a6f7a214a48b2"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e48af21883ded2b3e9eb48cb7880ad8598b31ab752ff3be6457001d78f416723"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6f5b7bd8e219ed50299e58551a410b64daafb5017d54bbe822e003856f06a802"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08f1e20bccf73b08d12d804d6e1c22ca5530e71659e6673bce31a6bb71c1e73f"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0dc5dceeaefcc96dc192e3a80bbe1d6c410c469e97bdd47494a7d930987f18b2"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_31_riscv64.whl", hash = "sha256:d76f9cc8665acdc0c9177043746775aa7babbf479b5520b78ae4002d889f5c21"}, + {file = "rpds_py-0.27.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:134fae0e36022edad8290a6661edf40c023562964efea0cc0ec7f5d392d2aaef"}, + {file = "rpds_py-0.27.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:eb11a4f1b2b63337cfd3b4d110af778a59aae51c81d195768e353d8b52f88081"}, + {file = "rpds_py-0.27.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:13e608ac9f50a0ed4faec0e90ece76ae33b34c0e8656e3dceb9a7db994c692cd"}, + {file = "rpds_py-0.27.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:dd2135527aa40f061350c3f8f89da2644de26cd73e4de458e79606384f4f68e7"}, + {file = "rpds_py-0.27.1-cp313-cp313t-win32.whl", hash = "sha256:3020724ade63fe320a972e2ffd93b5623227e684315adce194941167fee02688"}, + {file = "rpds_py-0.27.1-cp313-cp313t-win_amd64.whl", hash = "sha256:8ee50c3e41739886606388ba3ab3ee2aae9f35fb23f833091833255a31740797"}, + {file = "rpds_py-0.27.1-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:acb9aafccaae278f449d9c713b64a9e68662e7799dbd5859e2c6b3c67b56d334"}, + {file = "rpds_py-0.27.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:b7fb801aa7f845ddf601c49630deeeccde7ce10065561d92729bfe81bd21fb33"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe0dd05afb46597b9a2e11c351e5e4283c741237e7f617ffb3252780cca9336a"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b6dfb0e058adb12d8b1d1b25f686e94ffa65d9995a5157afe99743bf7369d62b"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ed090ccd235f6fa8bb5861684567f0a83e04f52dfc2e5c05f2e4b1309fcf85e7"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bf876e79763eecf3e7356f157540d6a093cef395b65514f17a356f62af6cc136"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:12ed005216a51b1d6e2b02a7bd31885fe317e45897de81d86dcce7d74618ffff"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_31_riscv64.whl", hash = "sha256:ee4308f409a40e50593c7e3bb8cbe0b4d4c66d1674a316324f0c2f5383b486f9"}, + {file = "rpds_py-0.27.1-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0b08d152555acf1f455154d498ca855618c1378ec810646fcd7c76416ac6dc60"}, + {file = "rpds_py-0.27.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:dce51c828941973a5684d458214d3a36fcd28da3e1875d659388f4f9f12cc33e"}, + {file = "rpds_py-0.27.1-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:c1476d6f29eb81aa4151c9a31219b03f1f798dc43d8af1250a870735516a1212"}, + {file = "rpds_py-0.27.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:3ce0cac322b0d69b63c9cdb895ee1b65805ec9ffad37639f291dd79467bee675"}, + {file = "rpds_py-0.27.1-cp314-cp314-win32.whl", hash = "sha256:dfbfac137d2a3d0725758cd141f878bf4329ba25e34979797c89474a89a8a3a3"}, + {file = "rpds_py-0.27.1-cp314-cp314-win_amd64.whl", hash = "sha256:a6e57b0abfe7cc513450fcf529eb486b6e4d3f8aee83e92eb5f1ef848218d456"}, + {file = "rpds_py-0.27.1-cp314-cp314-win_arm64.whl", hash = "sha256:faf8d146f3d476abfee026c4ae3bdd9ca14236ae4e4c310cbd1cf75ba33d24a3"}, + {file = "rpds_py-0.27.1-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:ba81d2b56b6d4911ce735aad0a1d4495e808b8ee4dc58715998741a26874e7c2"}, + {file = "rpds_py-0.27.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:84f7d509870098de0e864cad0102711c1e24e9b1a50ee713b65928adb22269e4"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a9e960fc78fecd1100539f14132425e1d5fe44ecb9239f8f27f079962021523e"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:62f85b665cedab1a503747617393573995dac4600ff51869d69ad2f39eb5e817"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fed467af29776f6556250c9ed85ea5a4dd121ab56a5f8b206e3e7a4c551e48ec"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2729615f9d430af0ae6b36cf042cb55c0936408d543fb691e1a9e36648fd35a"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b207d881a9aef7ba753d69c123a35d96ca7cb808056998f6b9e8747321f03b8"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_31_riscv64.whl", hash = "sha256:639fd5efec029f99b79ae47e5d7e00ad8a773da899b6309f6786ecaf22948c48"}, + {file = "rpds_py-0.27.1-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:fecc80cb2a90e28af8a9b366edacf33d7a91cbfe4c2c4544ea1246e949cfebeb"}, + {file = "rpds_py-0.27.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:42a89282d711711d0a62d6f57d81aa43a1368686c45bc1c46b7f079d55692734"}, + {file = "rpds_py-0.27.1-cp314-cp314t-musllinux_1_2_i686.whl", hash = "sha256:cf9931f14223de59551ab9d38ed18d92f14f055a5f78c1d8ad6493f735021bbb"}, + {file = "rpds_py-0.27.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:f39f58a27cc6e59f432b568ed8429c7e1641324fbe38131de852cd77b2d534b0"}, + {file = "rpds_py-0.27.1-cp314-cp314t-win32.whl", hash = "sha256:d5fa0ee122dc09e23607a28e6d7b150da16c662e66409bbe85230e4c85bb528a"}, + {file = "rpds_py-0.27.1-cp314-cp314t-win_amd64.whl", hash = "sha256:6567d2bb951e21232c2f660c24cf3470bb96de56cdcb3f071a83feeaff8a2772"}, + {file = "rpds_py-0.27.1.tar.gz", hash = "sha256:26a1c73171d10b7acccbded82bf6a586ab8203601e565badc74bbbf8bc5a10f8"}, +] + +[[package]] +name = "rsa" +version = "4.9.1" +requires_python = "<4,>=3.6" +summary = "Pure-Python RSA implementation" +groups = ["default"] +dependencies = [ + "pyasn1>=0.1.3", +] +files = [ + {file = "rsa-4.9.1-py3-none-any.whl", hash = "sha256:68635866661c6836b8d39430f97a996acbd61bfa49406748ea243539fe239762"}, + {file = "rsa-4.9.1.tar.gz", hash = "sha256:e7bdbfdb5497da4c07dfd35530e1a902659db6ff241e39d9953cad06ebd0ae75"}, +] + +[[package]] +name = "ruff" +version = "0.13.3" +requires_python = ">=3.7" +summary = "An extremely fast Python linter and code formatter, written in Rust." +groups = ["dev"] +files = [ + {file = "ruff-0.13.3-py3-none-linux_armv6l.whl", hash = "sha256:311860a4c5e19189c89d035638f500c1e191d283d0cc2f1600c8c80d6dcd430c"}, + {file = "ruff-0.13.3-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:2bdad6512fb666b40fcadb65e33add2b040fc18a24997d2e47fee7d66f7fcae2"}, + {file = "ruff-0.13.3-py3-none-macosx_11_0_arm64.whl", hash = "sha256:fc6fa4637284708d6ed4e5e970d52fc3b76a557d7b4e85a53013d9d201d93286"}, + {file = "ruff-0.13.3-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1c9e6469864f94a98f412f20ea143d547e4c652f45e44f369d7b74ee78185838"}, + {file = "ruff-0.13.3-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5bf62b705f319476c78891e0e97e965b21db468b3c999086de8ffb0d40fd2822"}, + {file = "ruff-0.13.3-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:78cc1abed87ce40cb07ee0667ce99dbc766c9f519eabfd948ed87295d8737c60"}, + {file = "ruff-0.13.3-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:4fb75e7c402d504f7a9a259e0442b96403fa4a7310ffe3588d11d7e170d2b1e3"}, + {file = "ruff-0.13.3-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:17b951f9d9afb39330b2bdd2dd144ce1c1335881c277837ac1b50bfd99985ed3"}, + {file = "ruff-0.13.3-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6052f8088728898e0a449f0dde8fafc7ed47e4d878168b211977e3e7e854f662"}, + {file = "ruff-0.13.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dc742c50f4ba72ce2a3be362bd359aef7d0d302bf7637a6f942eaa763bd292af"}, + {file = "ruff-0.13.3-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:8e5640349493b378431637019366bbd73c927e515c9c1babfea3e932f5e68e1d"}, + {file = "ruff-0.13.3-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:6b139f638a80eae7073c691a5dd8d581e0ba319540be97c343d60fb12949c8d0"}, + {file = "ruff-0.13.3-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:6b547def0a40054825de7cfa341039ebdfa51f3d4bfa6a0772940ed351d2746c"}, + {file = "ruff-0.13.3-py3-none-musllinux_1_2_i686.whl", hash = "sha256:9cc48a3564423915c93573f1981d57d101e617839bef38504f85f3677b3a0a3e"}, + {file = "ruff-0.13.3-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:1a993b17ec03719c502881cb2d5f91771e8742f2ca6de740034433a97c561989"}, + {file = "ruff-0.13.3-py3-none-win32.whl", hash = "sha256:f14e0d1fe6460f07814d03c6e32e815bff411505178a1f539a38f6097d3e8ee3"}, + {file = "ruff-0.13.3-py3-none-win_amd64.whl", hash = "sha256:621e2e5812b691d4f244638d693e640f188bacbb9bc793ddd46837cea0503dd2"}, + {file = "ruff-0.13.3-py3-none-win_arm64.whl", hash = "sha256:9e9e9d699841eaf4c2c798fa783df2fabc680b72059a02ca0ed81c460bc58330"}, + {file = "ruff-0.13.3.tar.gz", hash = "sha256:5b0ba0db740eefdfbcce4299f49e9eaefc643d4d007749d77d047c2bab19908e"}, +] + +[[package]] +name = "safetensors" +version = "0.6.2" +requires_python = ">=3.9" +summary = "" +groups = ["default"] +files = [ + {file = "safetensors-0.6.2-cp38-abi3-macosx_10_12_x86_64.whl", hash = "sha256:9c85ede8ec58f120bad982ec47746981e210492a6db876882aa021446af8ffba"}, + {file = "safetensors-0.6.2-cp38-abi3-macosx_11_0_arm64.whl", hash = "sha256:d6675cf4b39c98dbd7d940598028f3742e0375a6b4d4277e76beb0c35f4b843b"}, + {file = "safetensors-0.6.2-cp38-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1d2d2b3ce1e2509c68932ca03ab8f20570920cd9754b05063d4368ee52833ecd"}, + {file = "safetensors-0.6.2-cp38-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:93de35a18f46b0f5a6a1f9e26d91b442094f2df02e9fd7acf224cfec4238821a"}, + {file = "safetensors-0.6.2-cp38-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:89a89b505f335640f9120fac65ddeb83e40f1fd081cb8ed88b505bdccec8d0a1"}, + {file = "safetensors-0.6.2-cp38-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fc4d0d0b937e04bdf2ae6f70cd3ad51328635fe0e6214aa1fc811f3b576b3bda"}, + {file = "safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8045db2c872db8f4cbe3faa0495932d89c38c899c603f21e9b6486951a5ecb8f"}, + {file = "safetensors-0.6.2-cp38-abi3-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:81e67e8bab9878bb568cffbc5f5e655adb38d2418351dc0859ccac158f753e19"}, + {file = "safetensors-0.6.2-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b0e4d029ab0a0e0e4fdf142b194514695b1d7d3735503ba700cf36d0fc7136ce"}, + {file = "safetensors-0.6.2-cp38-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:fa48268185c52bfe8771e46325a1e21d317207bcabcb72e65c6e28e9ffeb29c7"}, + {file = "safetensors-0.6.2-cp38-abi3-musllinux_1_2_i686.whl", hash = "sha256:d83c20c12c2d2f465997c51b7ecb00e407e5f94d7dec3ea0cc11d86f60d3fde5"}, + {file = "safetensors-0.6.2-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d944cea65fad0ead848b6ec2c37cc0b197194bec228f8020054742190e9312ac"}, + {file = "safetensors-0.6.2-cp38-abi3-win32.whl", hash = "sha256:cab75ca7c064d3911411461151cb69380c9225798a20e712b102edda2542ddb1"}, + {file = "safetensors-0.6.2-cp38-abi3-win_amd64.whl", hash = "sha256:c7b214870df923cbc1593c3faee16bec59ea462758699bd3fee399d00aac072c"}, + {file = "safetensors-0.6.2.tar.gz", hash = "sha256:43ff2aa0e6fa2dc3ea5524ac7ad93a9839256b8703761e76e2d0b2a3fa4f15d9"}, +] + +[[package]] +name = "scipy" +version = "1.16.2" +requires_python = ">=3.11" +summary = "Fundamental algorithms for scientific computing in Python" +groups = ["default"] +dependencies = [ + "numpy<2.6,>=1.25.2", +] +files = [ + {file = "scipy-1.16.2-cp312-cp312-macosx_10_14_x86_64.whl", hash = "sha256:89d6c100fa5c48472047632e06f0876b3c4931aac1f4291afc81a3644316bb0d"}, + {file = "scipy-1.16.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:ca748936cd579d3f01928b30a17dc474550b01272d8046e3e1ee593f23620371"}, + {file = "scipy-1.16.2-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:fac4f8ce2ddb40e2e3d0f7ec36d2a1e7f92559a2471e59aec37bd8d9de01fec0"}, + {file = "scipy-1.16.2-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:033570f1dcefd79547a88e18bccacff025c8c647a330381064f561d43b821232"}, + {file = "scipy-1.16.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ea3421209bf00c8a5ef2227de496601087d8f638a2363ee09af059bd70976dc1"}, + {file = "scipy-1.16.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f66bd07ba6f84cd4a380b41d1bf3c59ea488b590a2ff96744845163309ee8e2f"}, + {file = "scipy-1.16.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5e9feab931bd2aea4a23388c962df6468af3d808ddf2d40f94a81c5dc38f32ef"}, + {file = "scipy-1.16.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:03dfc75e52f72cf23ec2ced468645321407faad8f0fe7b1f5b49264adbc29cb1"}, + {file = "scipy-1.16.2-cp312-cp312-win_amd64.whl", hash = "sha256:0ce54e07bbb394b417457409a64fd015be623f36e330ac49306433ffe04bc97e"}, + {file = "scipy-1.16.2-cp312-cp312-win_arm64.whl", hash = "sha256:2a8ffaa4ac0df81a0b94577b18ee079f13fecdb924df3328fc44a7dc5ac46851"}, + {file = "scipy-1.16.2-cp313-cp313-macosx_10_14_x86_64.whl", hash = "sha256:84f7bf944b43e20b8a894f5fe593976926744f6c185bacfcbdfbb62736b5cc70"}, + {file = "scipy-1.16.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:5c39026d12edc826a1ef2ad35ad1e6d7f087f934bb868fc43fa3049c8b8508f9"}, + {file = "scipy-1.16.2-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e52729ffd45b68777c5319560014d6fd251294200625d9d70fd8626516fc49f5"}, + {file = "scipy-1.16.2-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:024dd4a118cccec09ca3209b7e8e614931a6ffb804b2a601839499cb88bdf925"}, + {file = "scipy-1.16.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7a5dc7ee9c33019973a470556081b0fd3c9f4c44019191039f9769183141a4d9"}, + {file = "scipy-1.16.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c2275ff105e508942f99d4e3bc56b6ef5e4b3c0af970386ca56b777608ce95b7"}, + {file = "scipy-1.16.2-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:af80196eaa84f033e48444d2e0786ec47d328ba00c71e4299b602235ffef9acb"}, + {file = "scipy-1.16.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9fb1eb735fe3d6ed1f89918224e3385fbf6f9e23757cacc35f9c78d3b712dd6e"}, + {file = "scipy-1.16.2-cp313-cp313-win_amd64.whl", hash = "sha256:fda714cf45ba43c9d3bae8f2585c777f64e3f89a2e073b668b32ede412d8f52c"}, + {file = "scipy-1.16.2-cp313-cp313-win_arm64.whl", hash = "sha256:2f5350da923ccfd0b00e07c3e5cfb316c1c0d6c1d864c07a72d092e9f20db104"}, + {file = "scipy-1.16.2-cp313-cp313t-macosx_10_14_x86_64.whl", hash = "sha256:53d8d2ee29b925344c13bda64ab51785f016b1b9617849dac10897f0701b20c1"}, + {file = "scipy-1.16.2-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:9e05e33657efb4c6a9d23bd8300101536abd99c85cca82da0bffff8d8764d08a"}, + {file = "scipy-1.16.2-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:7fe65b36036357003b3ef9d37547abeefaa353b237e989c21027b8ed62b12d4f"}, + {file = "scipy-1.16.2-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:6406d2ac6d40b861cccf57f49592f9779071655e9f75cd4f977fa0bdd09cb2e4"}, + {file = "scipy-1.16.2-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ff4dc42bd321991fbf611c23fc35912d690f731c9914bf3af8f417e64aca0f21"}, + {file = "scipy-1.16.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:654324826654d4d9133e10675325708fb954bc84dae6e9ad0a52e75c6b1a01d7"}, + {file = "scipy-1.16.2-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:63870a84cd15c44e65220eaed2dac0e8f8b26bbb991456a033c1d9abfe8a94f8"}, + {file = "scipy-1.16.2-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:fa01f0f6a3050fa6a9771a95d5faccc8e2f5a92b4a2e5440a0fa7264a2398472"}, + {file = "scipy-1.16.2-cp313-cp313t-win_amd64.whl", hash = "sha256:116296e89fba96f76353a8579820c2512f6e55835d3fad7780fece04367de351"}, + {file = "scipy-1.16.2-cp313-cp313t-win_arm64.whl", hash = "sha256:98e22834650be81d42982360382b43b17f7ba95e0e6993e2a4f5b9ad9283a94d"}, + {file = "scipy-1.16.2-cp314-cp314-macosx_10_14_x86_64.whl", hash = "sha256:567e77755019bb7461513c87f02bb73fb65b11f049aaaa8ca17cfaa5a5c45d77"}, + {file = "scipy-1.16.2-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:17d9bb346194e8967296621208fcdfd39b55498ef7d2f376884d5ac47cec1a70"}, + {file = "scipy-1.16.2-cp314-cp314-macosx_14_0_arm64.whl", hash = "sha256:0a17541827a9b78b777d33b623a6dcfe2ef4a25806204d08ead0768f4e529a88"}, + {file = "scipy-1.16.2-cp314-cp314-macosx_14_0_x86_64.whl", hash = "sha256:d7d4c6ba016ffc0f9568d012f5f1eb77ddd99412aea121e6fa8b4c3b7cbad91f"}, + {file = "scipy-1.16.2-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:9702c4c023227785c779cba2e1d6f7635dbb5b2e0936cdd3a4ecb98d78fd41eb"}, + {file = "scipy-1.16.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:d1cdf0ac28948d225decdefcc45ad7dd91716c29ab56ef32f8e0d50657dffcc7"}, + {file = "scipy-1.16.2-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:70327d6aa572a17c2941cdfb20673f82e536e91850a2e4cb0c5b858b690e1548"}, + {file = "scipy-1.16.2-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:5221c0b2a4b58aa7c4ed0387d360fd90ee9086d383bb34d9f2789fafddc8a936"}, + {file = "scipy-1.16.2-cp314-cp314-win_amd64.whl", hash = "sha256:f5a85d7b2b708025af08f060a496dd261055b617d776fc05a1a1cc69e09fe9ff"}, + {file = "scipy-1.16.2-cp314-cp314-win_arm64.whl", hash = "sha256:2cc73a33305b4b24556957d5857d6253ce1e2dcd67fa0ff46d87d1670b3e1e1d"}, + {file = "scipy-1.16.2-cp314-cp314t-macosx_10_14_x86_64.whl", hash = "sha256:9ea2a3fed83065d77367775d689401a703d0f697420719ee10c0780bcab594d8"}, + {file = "scipy-1.16.2-cp314-cp314t-macosx_12_0_arm64.whl", hash = "sha256:7280d926f11ca945c3ef92ba960fa924e1465f8d07ce3a9923080363390624c4"}, + {file = "scipy-1.16.2-cp314-cp314t-macosx_14_0_arm64.whl", hash = "sha256:8afae1756f6a1fe04636407ef7dbece33d826a5d462b74f3d0eb82deabefd831"}, + {file = "scipy-1.16.2-cp314-cp314t-macosx_14_0_x86_64.whl", hash = "sha256:5c66511f29aa8d233388e7416a3f20d5cae7a2744d5cee2ecd38c081f4e861b3"}, + {file = "scipy-1.16.2-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:efe6305aeaa0e96b0ccca5ff647a43737d9a092064a3894e46c414db84bc54ac"}, + {file = "scipy-1.16.2-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7f3a337d9ae06a1e8d655ee9d8ecb835ea5ddcdcbd8d23012afa055ab014f374"}, + {file = "scipy-1.16.2-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:bab3605795d269067d8ce78a910220262711b753de8913d3deeaedb5dded3bb6"}, + {file = "scipy-1.16.2-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:b0348d8ddb55be2a844c518cd8cc8deeeb8aeba707cf834db5758fc89b476a2c"}, + {file = "scipy-1.16.2-cp314-cp314t-win_amd64.whl", hash = "sha256:26284797e38b8a75e14ea6631d29bda11e76ceaa6ddb6fdebbfe4c4d90faf2f9"}, + {file = "scipy-1.16.2-cp314-cp314t-win_arm64.whl", hash = "sha256:d2a4472c231328d4de38d5f1f68fdd6d28a615138f842580a8a321b5845cf779"}, + {file = "scipy-1.16.2.tar.gz", hash = "sha256:af029b153d243a80afb6eabe40b0a07f8e35c9adc269c019f364ad747f826a6b"}, +] + +[[package]] +name = "seaborn" +version = "0.13.2" +requires_python = ">=3.8" +summary = "Statistical data visualization" +groups = ["default"] +dependencies = [ + "matplotlib!=3.6.1,>=3.4", + "numpy!=1.24.0,>=1.20", + "pandas>=1.2", +] +files = [ + {file = "seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987"}, + {file = "seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7"}, +] + +[[package]] +name = "six" +version = "1.17.0" +requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" +summary = "Python 2 and 3 compatibility utilities" +groups = ["default"] +files = [ + {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, + {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, +] + +[[package]] +name = "smmap" +version = "5.0.2" +requires_python = ">=3.7" +summary = "A pure Python implementation of a sliding window memory map manager" +groups = ["default"] +files = [ + {file = "smmap-5.0.2-py3-none-any.whl", hash = "sha256:b30115f0def7d7531d22a0fb6502488d879e75b260a9db4d0819cfb25403af5e"}, + {file = "smmap-5.0.2.tar.gz", hash = "sha256:26ea65a03958fa0c8a1c7e8c7a58fdc77221b8910f6be2131affade476898ad5"}, +] + +[[package]] +name = "streamlit" +version = "1.48.0" +requires_python = "!=3.9.7,>=3.9" +summary = "A faster way to build and share data apps" +groups = ["default"] +dependencies = [ + "altair!=5.4.0,!=5.4.1,<6,>=4.0", + "blinker<2,>=1.5.0", + "cachetools<7,>=4.0", + "click<9,>=7.0", + "gitpython!=3.1.19,<4,>=3.0.7", + "numpy<3,>=1.23", + "packaging<26,>=20", + "pandas<3,>=1.4.0", + "pillow<12,>=7.1.0", + "protobuf<7,>=3.20", + "pyarrow>=7.0", + "pydeck<1,>=0.8.0b4", + "requests<3,>=2.27", + "tenacity<10,>=8.1.0", + "toml<2,>=0.10.1", + "tornado!=6.5.0,<7,>=6.0.3", + "typing-extensions<5,>=4.4.0", + "watchdog<7,>=2.1.5; platform_system != \"Darwin\"", +] +files = [ + {file = "streamlit-1.48.0-py3-none-any.whl", hash = "sha256:0f2bc697a1a4d2199384d8bb3966aa0b25904c9345e6d6ad6fbb69bd284c03a2"}, + {file = "streamlit-1.48.0.tar.gz", hash = "sha256:64da4819f4b897c8d1e9eb6396f4618b2eb5029d25af61472422d4eb0688bb75"}, +] + +[[package]] +name = "tenacity" +version = "9.1.2" +requires_python = ">=3.9" +summary = "Retry code until it succeeds" +groups = ["default"] +files = [ + {file = "tenacity-9.1.2-py3-none-any.whl", hash = "sha256:f77bf36710d8b73a50b2dd155c97b870017ad21afe6ab300326b0371b3b05138"}, + {file = "tenacity-9.1.2.tar.gz", hash = "sha256:1169d376c297e7de388d18b4481760d478b0e99a777cad3a9c86e556f4b697cb"}, +] + +[[package]] +name = "tokenizers" +version = "0.21.4" +requires_python = ">=3.9" +summary = "" +groups = ["default"] +dependencies = [ + "huggingface-hub<1.0,>=0.16.4", +] +files = [ + {file = "tokenizers-0.21.4-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:2ccc10a7c3bcefe0f242867dc914fc1226ee44321eb618cfe3019b5df3400133"}, + {file = "tokenizers-0.21.4-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:5e2f601a8e0cd5be5cc7506b20a79112370b9b3e9cb5f13f68ab11acd6ca7d60"}, + {file = "tokenizers-0.21.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:39b376f5a1aee67b4d29032ee85511bbd1b99007ec735f7f35c8a2eb104eade5"}, + {file = "tokenizers-0.21.4-cp39-abi3-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2107ad649e2cda4488d41dfd031469e9da3fcbfd6183e74e4958fa729ffbf9c6"}, + {file = "tokenizers-0.21.4-cp39-abi3-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c73012da95afafdf235ba80047699df4384fdc481527448a078ffd00e45a7d9"}, + {file = "tokenizers-0.21.4-cp39-abi3-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f23186c40395fc390d27f519679a58023f368a0aad234af145e0f39ad1212732"}, + {file = "tokenizers-0.21.4-cp39-abi3-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cc88bb34e23a54cc42713d6d98af5f1bf79c07653d24fe984d2d695ba2c922a2"}, + {file = "tokenizers-0.21.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51b7eabb104f46c1c50b486520555715457ae833d5aee9ff6ae853d1130506ff"}, + {file = "tokenizers-0.21.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:714b05b2e1af1288bd1bc56ce496c4cebb64a20d158ee802887757791191e6e2"}, + {file = "tokenizers-0.21.4-cp39-abi3-musllinux_1_2_armv7l.whl", hash = "sha256:1340ff877ceedfa937544b7d79f5b7becf33a4cfb58f89b3b49927004ef66f78"}, + {file = "tokenizers-0.21.4-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:3c1f4317576e465ac9ef0d165b247825a2a4078bcd01cba6b54b867bdf9fdd8b"}, + {file = "tokenizers-0.21.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:c212aa4e45ec0bb5274b16b6f31dd3f1c41944025c2358faaa5782c754e84c24"}, + {file = "tokenizers-0.21.4-cp39-abi3-win32.whl", hash = "sha256:6c42a930bc5f4c47f4ea775c91de47d27910881902b0f20e4990ebe045a415d0"}, + {file = "tokenizers-0.21.4-cp39-abi3-win_amd64.whl", hash = "sha256:475d807a5c3eb72c59ad9b5fcdb254f6e17f53dfcbb9903233b0dfa9c943b597"}, + {file = "tokenizers-0.21.4.tar.gz", hash = "sha256:fa23f85fbc9a02ec5c6978da172cdcbac23498c3ca9f3645c5c68740ac007880"}, +] + +[[package]] +name = "toml" +version = "0.10.2" +requires_python = ">=2.6, !=3.0.*, !=3.1.*, !=3.2.*" +summary = "Python Library for Tom's Obvious, Minimal Language" +groups = ["default"] +files = [ + {file = "toml-0.10.2-py2.py3-none-any.whl", hash = "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b"}, + {file = "toml-0.10.2.tar.gz", hash = "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f"}, +] + +[[package]] +name = "tornado" +version = "6.5.2" +requires_python = ">=3.9" +summary = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed." +groups = ["default"] +files = [ + {file = "tornado-6.5.2-cp39-abi3-macosx_10_9_universal2.whl", hash = "sha256:2436822940d37cde62771cff8774f4f00b3c8024fe482e16ca8387b8a2724db6"}, + {file = "tornado-6.5.2-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:583a52c7aa94ee046854ba81d9ebb6c81ec0fd30386d96f7640c96dad45a03ef"}, + {file = "tornado-6.5.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b0fe179f28d597deab2842b86ed4060deec7388f1fd9c1b4a41adf8af058907e"}, + {file = "tornado-6.5.2-cp39-abi3-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b186e85d1e3536d69583d2298423744740986018e393d0321df7340e71898882"}, + {file = "tornado-6.5.2-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e792706668c87709709c18b353da1f7662317b563ff69f00bab83595940c7108"}, + {file = "tornado-6.5.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:06ceb1300fd70cb20e43b1ad8aaee0266e69e7ced38fa910ad2e03285009ce7c"}, + {file = "tornado-6.5.2-cp39-abi3-musllinux_1_2_i686.whl", hash = "sha256:74db443e0f5251be86cbf37929f84d8c20c27a355dd452a5cfa2aada0d001ec4"}, + {file = "tornado-6.5.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:b5e735ab2889d7ed33b32a459cac490eda71a1ba6857b0118de476ab6c366c04"}, + {file = "tornado-6.5.2-cp39-abi3-win32.whl", hash = "sha256:c6f29e94d9b37a95013bb669616352ddb82e3bfe8326fccee50583caebc8a5f0"}, + {file = "tornado-6.5.2-cp39-abi3-win_amd64.whl", hash = "sha256:e56a5af51cc30dd2cae649429af65ca2f6571da29504a07995175df14c18f35f"}, + {file = "tornado-6.5.2-cp39-abi3-win_arm64.whl", hash = "sha256:d6c33dc3672e3a1f3618eb63b7ef4683a7688e7b9e6e8f0d9aa5726360a004af"}, + {file = "tornado-6.5.2.tar.gz", hash = "sha256:ab53c8f9a0fa351e2c0741284e06c7a45da86afb544133201c5cc8578eb076a0"}, +] + +[[package]] +name = "tqdm" +version = "4.67.1" +requires_python = ">=3.7" +summary = "Fast, Extensible Progress Meter" +groups = ["default"] +dependencies = [ + "colorama; platform_system == \"Windows\"", +] +files = [ + {file = "tqdm-4.67.1-py3-none-any.whl", hash = "sha256:26445eca388f82e72884e0d580d5464cd801a3ea01e63e5601bdff9ba6a48de2"}, + {file = "tqdm-4.67.1.tar.gz", hash = "sha256:f8aef9c52c08c13a65f30ea34f4e5aac3fd1a34959879d7e59e63027286627f2"}, +] + +[[package]] +name = "transformers" +version = "4.55.4" +requires_python = ">=3.9.0" +summary = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow" +groups = ["default"] +dependencies = [ + "filelock", + "huggingface-hub<1.0,>=0.34.0", + "numpy>=1.17", + "packaging>=20.0", + "pyyaml>=5.1", + "regex!=2019.12.17", + "requests", + "safetensors>=0.4.3", + "tokenizers<0.22,>=0.21", + "tqdm>=4.27", +] +files = [ + {file = "transformers-4.55.4-py3-none-any.whl", hash = "sha256:df28f3849665faba4af5106f0db4510323277c4bb595055340544f7e59d06458"}, + {file = "transformers-4.55.4.tar.gz", hash = "sha256:574a30559bc273c7a4585599ff28ab6b676e96dc56ffd2025ecfce2fd0ab915d"}, +] + +[[package]] +name = "typing-extensions" +version = "4.15.0" +requires_python = ">=3.9" +summary = "Backported and Experimental Type Hints for Python 3.9+" +groups = ["default"] +files = [ + {file = "typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548"}, + {file = "typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466"}, +] + +[[package]] +name = "typing-inspection" +version = "0.4.2" +requires_python = ">=3.9" +summary = "Runtime typing introspection tools" +groups = ["default"] +dependencies = [ + "typing-extensions>=4.12.0", +] +files = [ + {file = "typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7"}, + {file = "typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464"}, +] + +[[package]] +name = "tzdata" +version = "2025.2" +requires_python = ">=2" +summary = "Provider of IANA time zone data" +groups = ["default"] +files = [ + {file = "tzdata-2025.2-py2.py3-none-any.whl", hash = "sha256:1a403fada01ff9221ca8044d701868fa132215d84beb92242d9acd2147f667a8"}, + {file = "tzdata-2025.2.tar.gz", hash = "sha256:b60a638fcc0daffadf82fe0f57e53d06bdec2f36c4df66280ae79bce6bd6f2b9"}, +] + +[[package]] +name = "urllib3" +version = "2.3.0" +requires_python = ">=3.9" +summary = "HTTP library with thread-safe connection pooling, file post, and more." +groups = ["default"] +files = [ + {file = "urllib3-2.3.0-py3-none-any.whl", hash = "sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df"}, + {file = "urllib3-2.3.0.tar.gz", hash = "sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d"}, +] + +[[package]] +name = "virtualenv" +version = "20.34.0" +requires_python = ">=3.8" +summary = "Virtual Python Environment builder" +groups = ["dev"] +dependencies = [ + "distlib<1,>=0.3.7", + "filelock<4,>=3.12.2", + "importlib-metadata>=6.6; python_version < \"3.8\"", + "platformdirs<5,>=3.9.1", + "typing-extensions>=4.13.2; python_version < \"3.11\"", +] +files = [ + {file = "virtualenv-20.34.0-py3-none-any.whl", hash = "sha256:341f5afa7eee943e4984a9207c025feedd768baff6753cd660c857ceb3e36026"}, + {file = "virtualenv-20.34.0.tar.gz", hash = "sha256:44815b2c9dee7ed86e387b842a84f20b93f7f417f95886ca1996a72a4138eb1a"}, +] + +[[package]] +name = "watchdog" +version = "6.0.0" +requires_python = ">=3.9" +summary = "Filesystem events monitoring" +groups = ["default"] +marker = "platform_system != \"Darwin\"" +files = [ + {file = "watchdog-6.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:bdd4e6f14b8b18c334febb9c4425a878a2ac20efd1e0b231978e7b150f92a948"}, + {file = "watchdog-6.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c7c15dda13c4eb00d6fb6fc508b3c0ed88b9d5d374056b239c4ad1611125c860"}, + {file = "watchdog-6.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6f10cb2d5902447c7d0da897e2c6768bca89174d0c6e1e30abec5421af97a5b0"}, + {file = "watchdog-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:490ab2ef84f11129844c23fb14ecf30ef3d8a6abafd3754a6f75ca1e6654136c"}, + {file = "watchdog-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:76aae96b00ae814b181bb25b1b98076d5fc84e8a53cd8885a318b42b6d3a5134"}, + {file = "watchdog-6.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a175f755fc2279e0b7312c0035d52e27211a5bc39719dd529625b1930917345b"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c"}, + {file = "watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2"}, + {file = "watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a"}, + {file = "watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680"}, + {file = "watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f"}, + {file = "watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282"}, +] + +[[package]] +name = "websocket-client" +version = "1.8.0" +requires_python = ">=3.8" +summary = "WebSocket client for Python with low level API options" +groups = ["default"] +files = [ + {file = "websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526"}, + {file = "websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da"}, +] + +[[package]] +name = "yarl" +version = "1.20.1" +requires_python = ">=3.9" +summary = "Yet another URL library" +groups = ["default"] +dependencies = [ + "idna>=2.0", + "multidict>=4.0", + "propcache>=0.2.1", +] +files = [ + {file = "yarl-1.20.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:bdcc4cd244e58593a4379fe60fdee5ac0331f8eb70320a24d591a3be197b94a9"}, + {file = "yarl-1.20.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b29a2c385a5f5b9c7d9347e5812b6f7ab267193c62d282a540b4fc528c8a9d2a"}, + {file = "yarl-1.20.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1112ae8154186dfe2de4732197f59c05a83dc814849a5ced892b708033f40dc2"}, + {file = "yarl-1.20.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:90bbd29c4fe234233f7fa2b9b121fb63c321830e5d05b45153a2ca68f7d310ee"}, + {file = "yarl-1.20.1-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:680e19c7ce3710ac4cd964e90dad99bf9b5029372ba0c7cbfcd55e54d90ea819"}, + {file = "yarl-1.20.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4a979218c1fdb4246a05efc2cc23859d47c89af463a90b99b7c56094daf25a16"}, + {file = "yarl-1.20.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:255b468adf57b4a7b65d8aad5b5138dce6a0752c139965711bdcb81bc370e1b6"}, + {file = "yarl-1.20.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a97d67108e79cfe22e2b430d80d7571ae57d19f17cda8bb967057ca8a7bf5bfd"}, + {file = "yarl-1.20.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8570d998db4ddbfb9a590b185a0a33dbf8aafb831d07a5257b4ec9948df9cb0a"}, + {file = "yarl-1.20.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:97c75596019baae7c71ccf1d8cc4738bc08134060d0adfcbe5642f778d1dca38"}, + {file = "yarl-1.20.1-cp312-cp312-musllinux_1_2_armv7l.whl", hash = "sha256:1c48912653e63aef91ff988c5432832692ac5a1d8f0fb8a33091520b5bbe19ef"}, + {file = "yarl-1.20.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4c3ae28f3ae1563c50f3d37f064ddb1511ecc1d5584e88c6b7c63cf7702a6d5f"}, + {file = "yarl-1.20.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:c5e9642f27036283550f5f57dc6156c51084b458570b9d0d96100c8bebb186a8"}, + {file = "yarl-1.20.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:2c26b0c49220d5799f7b22c6838409ee9bc58ee5c95361a4d7831f03cc225b5a"}, + {file = "yarl-1.20.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:564ab3d517e3d01c408c67f2e5247aad4019dcf1969982aba3974b4093279004"}, + {file = "yarl-1.20.1-cp312-cp312-win32.whl", hash = "sha256:daea0d313868da1cf2fac6b2d3a25c6e3a9e879483244be38c8e6a41f1d876a5"}, + {file = "yarl-1.20.1-cp312-cp312-win_amd64.whl", hash = "sha256:48ea7d7f9be0487339828a4de0360d7ce0efc06524a48e1810f945c45b813698"}, + {file = "yarl-1.20.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:0b5ff0fbb7c9f1b1b5ab53330acbfc5247893069e7716840c8e7d5bb7355038a"}, + {file = "yarl-1.20.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:14f326acd845c2b2e2eb38fb1346c94f7f3b01a4f5c788f8144f9b630bfff9a3"}, + {file = "yarl-1.20.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:f60e4ad5db23f0b96e49c018596707c3ae89f5d0bd97f0ad3684bcbad899f1e7"}, + {file = "yarl-1.20.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:49bdd1b8e00ce57e68ba51916e4bb04461746e794e7c4d4bbc42ba2f18297691"}, + {file = "yarl-1.20.1-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:66252d780b45189975abfed839616e8fd2dbacbdc262105ad7742c6ae58f3e31"}, + {file = "yarl-1.20.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:59174e7332f5d153d8f7452a102b103e2e74035ad085f404df2e40e663a22b28"}, + {file = "yarl-1.20.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e3968ec7d92a0c0f9ac34d5ecfd03869ec0cab0697c91a45db3fbbd95fe1b653"}, + {file = "yarl-1.20.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1a4fbb50e14396ba3d375f68bfe02215d8e7bc3ec49da8341fe3157f59d2ff5"}, + {file = "yarl-1.20.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:11a62c839c3a8eac2410e951301309426f368388ff2f33799052787035793b02"}, + {file = "yarl-1.20.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:041eaa14f73ff5a8986b4388ac6bb43a77f2ea09bf1913df7a35d4646db69e53"}, + {file = "yarl-1.20.1-cp313-cp313-musllinux_1_2_armv7l.whl", hash = "sha256:377fae2fef158e8fd9d60b4c8751387b8d1fb121d3d0b8e9b0be07d1b41e83dc"}, + {file = "yarl-1.20.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:1c92f4390e407513f619d49319023664643d3339bd5e5a56a3bebe01bc67ec04"}, + {file = "yarl-1.20.1-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:d25ddcf954df1754ab0f86bb696af765c5bfaba39b74095f27eececa049ef9a4"}, + {file = "yarl-1.20.1-cp313-cp313-musllinux_1_2_s390x.whl", hash = "sha256:909313577e9619dcff8c31a0ea2aa0a2a828341d92673015456b3ae492e7317b"}, + {file = "yarl-1.20.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:793fd0580cb9664548c6b83c63b43c477212c0260891ddf86809e1c06c8b08f1"}, + {file = "yarl-1.20.1-cp313-cp313-win32.whl", hash = "sha256:468f6e40285de5a5b3c44981ca3a319a4b208ccc07d526b20b12aeedcfa654b7"}, + {file = "yarl-1.20.1-cp313-cp313-win_amd64.whl", hash = "sha256:495b4ef2fea40596bfc0affe3837411d6aa3371abcf31aac0ccc4bdd64d4ef5c"}, + {file = "yarl-1.20.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:f60233b98423aab21d249a30eb27c389c14929f47be8430efa7dbd91493a729d"}, + {file = "yarl-1.20.1-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:6f3eff4cc3f03d650d8755c6eefc844edde99d641d0dcf4da3ab27141a5f8ddf"}, + {file = "yarl-1.20.1-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:69ff8439d8ba832d6bed88af2c2b3445977eba9a4588b787b32945871c2444e3"}, + {file = "yarl-1.20.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3cf34efa60eb81dd2645a2e13e00bb98b76c35ab5061a3989c7a70f78c85006d"}, + {file = "yarl-1.20.1-cp313-cp313t-manylinux_2_17_armv7l.manylinux2014_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:8e0fe9364ad0fddab2688ce72cb7a8e61ea42eff3c7caeeb83874a5d479c896c"}, + {file = "yarl-1.20.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8f64fbf81878ba914562c672024089e3401974a39767747691c65080a67b18c1"}, + {file = "yarl-1.20.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f6342d643bf9a1de97e512e45e4b9560a043347e779a173250824f8b254bd5ce"}, + {file = "yarl-1.20.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56dac5f452ed25eef0f6e3c6a066c6ab68971d96a9fb441791cad0efba6140d3"}, + {file = "yarl-1.20.1-cp313-cp313t-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c7d7f497126d65e2cad8dc5f97d34c27b19199b6414a40cb36b52f41b79014be"}, + {file = "yarl-1.20.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:67e708dfb8e78d8a19169818eeb5c7a80717562de9051bf2413aca8e3696bf16"}, + {file = "yarl-1.20.1-cp313-cp313t-musllinux_1_2_armv7l.whl", hash = "sha256:595c07bc79af2494365cc96ddeb772f76272364ef7c80fb892ef9d0649586513"}, + {file = "yarl-1.20.1-cp313-cp313t-musllinux_1_2_i686.whl", hash = "sha256:7bdd2f80f4a7df852ab9ab49484a4dee8030023aa536df41f2d922fd57bf023f"}, + {file = "yarl-1.20.1-cp313-cp313t-musllinux_1_2_ppc64le.whl", hash = "sha256:c03bfebc4ae8d862f853a9757199677ab74ec25424d0ebd68a0027e9c639a390"}, + {file = "yarl-1.20.1-cp313-cp313t-musllinux_1_2_s390x.whl", hash = "sha256:344d1103e9c1523f32a5ed704d576172d2cabed3122ea90b1d4e11fe17c66458"}, + {file = "yarl-1.20.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:88cab98aa4e13e1ade8c141daeedd300a4603b7132819c484841bb7af3edce9e"}, + {file = "yarl-1.20.1-cp313-cp313t-win32.whl", hash = "sha256:b121ff6a7cbd4abc28985b6028235491941b9fe8fe226e6fdc539c977ea1739d"}, + {file = "yarl-1.20.1-cp313-cp313t-win_amd64.whl", hash = "sha256:541d050a355bbbc27e55d906bc91cb6fe42f96c01413dd0f4ed5a5240513874f"}, + {file = "yarl-1.20.1-py3-none-any.whl", hash = "sha256:83b8eb083fe4683c6115795d9fc1cfaf2cbbefb19b3a1cb68f6527460f483a77"}, + {file = "yarl-1.20.1.tar.gz", hash = "sha256:d017a4997ee50c91fd5466cef416231bb82177b93b029906cefc542ce14c35ac"}, +] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..bd67bd14 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,156 @@ +[project] +name = "llm-d-benchmark" +version = "0.3.0" +description = "Automated workflow for benchmarking LLM inference using the llm-d stack" +readme = "README.md" +requires-python = ">=3.12" +license = {text = "Apache-2.0"} +authors = [ + {name = "llm-d-benchmark team"}, +] +keywords = ["llm", "benchmark", "inference", "performance"] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers and Researchers", + "Programming Language :: Python :: 3.12", +] + +dependencies = [ + "kubernetes", + "pykube-ng", + "kubernetes-asyncio", + "GitPython", + "requests", + "PyYAML", + "Jinja2", + "huggingface_hub==0.34.4", + "transformers==4.55.4", + "matplotlib>=3.0.0", + "numpy>=2.3.1", + "seaborn>=0.12.0", + "pandas>=2.2.3", + "scipy>=1.16.0", + "plotly==6.3.0", + "streamlit==1.48.0", + "pydantic>=2.11.7", +] + +[project.optional-dependencies] +dev = [ + "ruff>=0.8.0", + "pytest>=7.4.0", + "pytest-cov>=4.1.0", + "pre-commit>=3.5.0", + "boxsdk>=10.0.0", +] + +[build-system] +requires = ["pdm-backend"] +build-backend = "pdm.backend" + +[tool.pdm] +distribution = true + +[tool.pdm.build] +package-dir = "." +includes = ["llm_d_benchmark/"] +excludes = [ + "**/.git", + "**/__pycache__", + "**/*.pyc", + "**/logs/", + "**/.ipynb_checkpoints", + "**/htmlcov/", +] + +[tool.pdm.dev-dependencies] +dev = [ + "ruff>=0.8.0", + "pytest>=7.4.0", + "pytest-cov>=4.1.0", + "pre-commit>=3.5.0", + "boxsdk>=10.0.0", +] + +[tool.pdm.scripts] +format = {composite = ["ruff format ."]} +format-check = {composite = ["ruff format --check ."]} +lint = {composite = ["ruff check --fix ."]} +lint-check = {composite = ["ruff check ."]} +format-lint = {composite = ["ruff format .", "ruff check --fix ."]} +test = "pytest" +test-cov = "pytest --cov=llm_d_benchmark --cov-report=html --cov-report=term" + +[tool.coverage.run] +source = ["llm_d_benchmark"] +omit = [ + "*/convert_*", + "*/__main__.py", + "*/tests/*", + "*/test_*", +] + +[tool.coverage.report] +exclude_lines = [ + "pragma: no cover", + "raise", + "except", + "register_parameter", + "if __name__ == .__main__.:", +] + +[tool.ruff] +target-version = "py312" +line-length = 119 +src = ["llm_d_benchmark"] +extend-exclude = [ + "llm_d_benchmark/config_explorer/htmlcov", + "llm_d_benchmark/setup/logs", + "__pycache__", + ".ipynb_checkpoints", +] + +[tool.ruff.lint] +# Never enforce `E501` (line length violations). +# SIM300: Yoda condition detected +# SIM212: Checks for if expressions that check against a negated condition. +# SIM905: Consider using a list literal instead of `str.split` +ignore = ["C901", "E501", "E741", "F402", "F823", "SIM1", "SIM300", "SIM212", "SIM905"] +# RUF013: Checks for the use of implicit Optional +# in type annotations when the default parameter value is None. +select = ["C", "E", "F", "I", "W", "RUF013", "UP006", "PERF102", "PLC1802", "PLC0208", "SIM"] +extend-safe-fixes = ["UP006"] + +# Ignore import violations in all `__init__.py` files. +[tool.ruff.lint.per-file-ignores] +"__init__.py" = ["E402", "F401", "F403", "F811"] +"llm_d_benchmark/setup/functions.py" = ["F401"] +"llm_d_benchmark/config_explorer/util.py" = ["F401"] + +[tool.ruff.lint.isort] +lines-after-imports = 2 +known-first-party = ["llm_d_benchmark", "config_explorer"] + +[tool.ruff.format] +# Like Black, use double quotes for strings. +quote-style = "double" + +# Like Black, indent with spaces, rather than tabs. +indent-style = "space" + +# Like Black, respect magic trailing commas. +skip-magic-trailing-comma = false + +# Like Black, automatically detect the appropriate line ending. +line-ending = "auto" + +[tool.pytest.ini_options] +addopts = "--doctest-glob='**/*.md'" +doctest_optionflags = "NUMBER NORMALIZE_WHITESPACE ELLIPSIS" +markers = [ + "integration: marks tests as integration tests", + "unit: marks tests as unit tests", +] +log_cli = 1 +log_cli_level = "WARNING" +testpaths = ["llm_d_benchmark/util/unit_test", "llm_d_benchmark/config_explorer/tests"] diff --git a/workload/report/convert.py b/workload/report/convert.py deleted file mode 100755 index b8d4f243..00000000 --- a/workload/report/convert.py +++ /dev/null @@ -1,1164 +0,0 @@ -#!/usr/bin/env python3 - -# This script imports data from a benchmark run in llm-d-benchmark using any -# supported harness, and converts the results into a data file with a standard -# benchmark report format. This format can then be used for post processing -# that is not specialized to a particular harness. - -import argparse -import base64 -import datetime -import os -import re -import sys -from typing import Any -import yaml - -import numpy as np -from scipy import stats - -from schema import BenchmarkReport, Units, WorkloadGenerator - - -def check_file(file_path: str) -> None: - """Make sure regular file exists. - - Args: - file_path (str): File to check. - """ - if not os.path.exists(file_path): - sys.stderr.write('File does not exist: %s\n' % file_path) - exit(2) - if not os.path.isfile(file_path): - sys.stderr.write('Not a regular file: %s\n' % file_path) - exit(2) - - -def import_yaml(file_path: str) -> dict[Any, Any]: - """Import a JSON/YAML file as a dict. - - Args: - file_path (str): Path to JSON/YAML file. - - Returns: - dict: Imported data. - """ - check_file(file_path) - with open(file_path, 'r', encoding='UTF-8') as file: - data = yaml.safe_load(file) - return data - - -def import_csv_with_header(file_path: str) -> dict[str, list[Any]]: - """Import a CSV file where the first line is a header. - - Args: - file_path (str): Path to CSV file. - - Returns: - dict: Imported data where the header provides key names. - """ - check_file(file_path) - with open(file_path, 'r', encoding='UTF-8') as file: - for ii, line in enumerate(file): - if ii == 0: - headers: list[str] = list(map(str.strip, line.split(','))) - data: dict[str, list[Any]] = {} - for hdr in headers: - data[hdr] = [] - continue - row_vals = list(map(str.strip, line.split(','))) - if len(row_vals) != len(headers): - sys.stderr.write('Warning: line %d of "%s" does not match header length, skipping: %d != %d\n' % - (ii + 1, file_path, len(row_vals), len(headers))) - continue - for jj, val in enumerate(row_vals): - # Try converting the value to an int or float - try: - val = int(val) - except ValueError: - try: - val = float(val) - except ValueError: - pass - data[headers[jj]].append(val) - # Convert lists of ints or floats to numpy arrays - for hdr in headers: - if isinstance(data[hdr][0], int) or isinstance(data[hdr][0], float): - data[hdr] = np.array(data[hdr]) - return data - - -def update_dict(dest: dict[Any, Any], source: dict[Any, Any]) -> None: - """Deep update a dict using values from another dict. If a value is a dict, - then update that dict, otherwise overwrite with the new value. - - Args: - dest (dict): dict to update. - source (dict): dict with new values to add to dest. - """ - for key, val in source.items(): - if key in dest and isinstance(dest[key], dict): - if not val: - # Do not "update" with null values - continue - if not isinstance(val, dict): - raise Exception("Cannot update dict type with non-dict: %s" % val) - update_dict(dest[key], val) - else: - dest[key] = val - - -def _get_llmd_benchmark_envars() -> dict: - """Get information from environment variables for the benchmark report. - - Returns: - dict: Imported data about scenario following schema of BenchmarkReport. - """ - # We make the assumption that if the environment variable - # LLMDBENCH_MAGIC_ENVAR is defined, then we are inside a harness pod. - if 'LLMDBENCH_MAGIC_ENVAR' not in os.environ: - # We are not in a harness pod - return {} - - if 'LLMDBENCH_DEPLOY_METHODS' not in os.environ: - sys.stderr.write('Warning: LLMDBENCH_DEPLOY_METHODS undefined, cannot determine deployment method.') - return {} - - if os.environ['LLMDBENCH_DEPLOY_METHODS'] == 'standalone': - # Given a 'standalone' deployment, we expect the following environment - # variables to be available - return { - "scenario": { - "model": { - "name": os.environ['LLMDBENCH_DEPLOY_CURRENT_MODEL'] - }, - "host": { - "type": ['replica'] * int(os.environ['LLMDBENCH_VLLM_COMMON_REPLICAS']), - "accelerator": [{ - "model": os.environ['LLMDBENCH_VLLM_COMMON_AFFINITY'].split(':', 1)[-1], - "count": int(os.environ['LLMDBENCH_VLLM_COMMON_TENSOR_PARALLELISM']) \ - * int(os.environ['LLMDBENCH_VLLM_COMMON_DATA_PARALLELISM']), - "parallelism": { - "tp": int(os.environ['LLMDBENCH_VLLM_COMMON_TENSOR_PARALLELISM']), - "dp": int(os.environ['LLMDBENCH_VLLM_COMMON_DATA_PARALLELISM']), - }, - }] * int(os.environ['LLMDBENCH_VLLM_COMMON_REPLICAS']), - }, - "platform": { - "engine": [{ - "name": os.environ['LLMDBENCH_VLLM_STANDALONE_IMAGE_REGISTRY'] + '/' + \ - os.environ['LLMDBENCH_VLLM_STANDALONE_IMAGE_REPO'] + '/' + \ - os.environ['LLMDBENCH_VLLM_STANDALONE_IMAGE_NAME'] + ':' + \ - os.environ['LLMDBENCH_VLLM_STANDALONE_IMAGE_TAG'], - }] * int(os.environ['LLMDBENCH_VLLM_COMMON_REPLICAS']) - }, - "metadata": { - "load_format": os.environ['LLMDBENCH_VLLM_STANDALONE_VLLM_LOAD_FORMAT'], - "logging_level": os.environ['LLMDBENCH_VLLM_STANDALONE_VLLM_LOGGING_LEVEL'], - "vllm_server_dev_mode": os.environ['LLMDBENCH_VLLM_STANDALONE_VLLM_SERVER_DEV_MODE'], - "preprocess": os.environ['LLMDBENCH_VLLM_STANDALONE_PREPROCESS'], - } - }, - } - - if os.environ['LLMDBENCH_DEPLOY_METHODS'] == 'modelservice': - # Given a 'modelservice' deployment, we expect the following environment - # variables to be available - - # Get EPP configuration - epp_config = {} - epp_config_content = os.getenv('LLMDBENCH_VLLM_MODELSERVICE_GAIE_PRESETS_CONFIG', '') - if epp_config_content == "": - sys.stderr.write('Warning: LLMDBENCH_VLLM_MODELSERVICE_GAIE_PRESETS_CONFIG empty.') - else: - epp_config_content = base64.b64decode(epp_config_content).decode("utf-8") - epp_config = yaml.safe_load(epp_config_content) - - # Insert default parameter values for scorers if left undefined - for ii, plugin in enumerate(epp_config['plugins']): - if plugin['type'] == 'prefix-cache-scorer': - if 'parameters' not in plugin: - plugin['parameters'] = {} - - parameters = plugin['parameters'] - if 'blockSize' not in parameters: - parameters['blockSize'] = 16 - if 'maxPrefixBlocksToMatch' not in parameters: - parameters['maxPrefixBlocksToMatch'] = 256 - if 'lruCapacityPerServer' not in parameters: - parameters['lruCapacityPerServer'] = 31250 - - epp_config['plugins'][ii]['parameters'] = parameters - - return { - "scenario": { - "model": { - "name": os.environ['LLMDBENCH_DEPLOY_CURRENT_MODEL'] - }, - "host": { - "type": ['prefill'] * int(os.environ['LLMDBENCH_VLLM_MODELSERVICE_PREFILL_REPLICAS']) + \ - ['decode'] * int(os.environ['LLMDBENCH_VLLM_MODELSERVICE_DECODE_REPLICAS']), - "accelerator": [{ - "model": os.environ['LLMDBENCH_VLLM_COMMON_AFFINITY'].split(':', 1)[-1], - "count": int(os.environ['LLMDBENCH_VLLM_MODELSERVICE_PREFILL_TENSOR_PARALLELISM']) \ - * int(os.environ['LLMDBENCH_VLLM_MODELSERVICE_PREFILL_DATA_PARALLELISM']), - "parallelism": { - "tp": int(os.environ['LLMDBENCH_VLLM_MODELSERVICE_PREFILL_TENSOR_PARALLELISM']), - "dp": int(os.environ['LLMDBENCH_VLLM_MODELSERVICE_PREFILL_DATA_PARALLELISM']), - }, - }] * int(os.environ['LLMDBENCH_VLLM_MODELSERVICE_PREFILL_REPLICAS']) + \ - [{ - "model": os.environ['LLMDBENCH_VLLM_COMMON_AFFINITY'].split(':', 1)[-1], - "count": int(os.environ['LLMDBENCH_VLLM_MODELSERVICE_DECODE_TENSOR_PARALLELISM']) \ - * int(os.environ['LLMDBENCH_VLLM_MODELSERVICE_DECODE_DATA_PARALLELISM']), - "parallelism": { - "tp": int(os.environ['LLMDBENCH_VLLM_MODELSERVICE_DECODE_TENSOR_PARALLELISM']), - "dp": int(os.environ['LLMDBENCH_VLLM_MODELSERVICE_DECODE_DATA_PARALLELISM']), - }, - }] * int(os.environ['LLMDBENCH_VLLM_MODELSERVICE_DECODE_REPLICAS']), - }, - "platform": { - "metadata": { - "inferenceScheduler": epp_config, - }, - "engine": [{ - "name": os.environ['LLMDBENCH_LLMD_IMAGE_REGISTRY'] + '/' + \ - os.environ['LLMDBENCH_LLMD_IMAGE_REPO'] + '/' + \ - os.environ['LLMDBENCH_LLMD_IMAGE_NAME'] + ':' + \ - os.environ['LLMDBENCH_LLMD_IMAGE_TAG'], - }] * (int(os.environ['LLMDBENCH_VLLM_MODELSERVICE_PREFILL_REPLICAS']) + - int(os.environ['LLMDBENCH_VLLM_MODELSERVICE_DECODE_REPLICAS'])) - }, - }, - } - - # Pre-existing deployment, cannot extract details about unknown inference - # service environment - sys.stderr.write('Warning: LLMDBENCH_DEPLOY_METHODS is not "modelservice" or "standalone", cannot extract environmental details.') - return {} - - -def import_benchmark_report(br_file: str) -> BenchmarkReport: - """Import benchmark report, and supplement with additional data from llm-d-benchmark run. - - Args: - br_file (str): Benchmark report file to import. - - Returns: - BenchmarkReport: Imported benchmark report supplemented with run data. - """ - check_file(br_file) - - # Import benchmark report as a dict following the schema of BenchmarkReport - br_dict = import_yaml(br_file) - - return BenchmarkReport(**br_dict) - - -def _vllm_timestamp_to_epoch(date_str: str) -> int: - """Convert timestamp from vLLM benchmark into seconds from Unix epoch. - - String format is YYYYMMDD-HHMMSS in UTC. - - Args: - date_str (str): Timestamp from vLLM benchmark. - - Returns: - int: Seconds from Unix epoch. - """ - date_str = date_str.strip() - if not re.search('[0-9]{8}-[0-9]{6}', date_str): - raise Exception('Invalid date format: %s' % date_str) - year = int(date_str[0:4]) - month = int(date_str[4:6]) - day = int(date_str[6:8]) - hour = int(date_str[9:11]) - minute = int(date_str[11:13]) - second = int(date_str[13:15]) - return datetime.datetime(year, month, day, hour, minute, second).timestamp() - - -def import_vllm_benchmark(results_file: str) -> BenchmarkReport: - """Import data from a vLLM benchmark run as a BenchmarkReport. - - Args: - results_file (str): Results file to import. - - Returns: - BenchmarkReport: Imported data. - """ - check_file(results_file) - - # Import results file from vLLM benchmark - results = import_yaml(results_file) - - # Get environment variables from llm-d-benchmark run as a dict following the - # schema of BenchmarkReport - br_dict = _get_llmd_benchmark_envars() - # Append to that dict the data from vLLM benchmark. - # This section assumes metric-percentiles contains at least the values - # "0.1,1,5,10,25,75,90,95,99,99.9". If any of these values are missing, we - # will crash with a KeyError. - update_dict(br_dict, { - "scenario": { - "model": {"name": results['model_id']}, - "load": { - "name": WorkloadGenerator.VLLM_BENCHMARK, - "args": { - "num_prompts": results['num_prompts'], - "request_rate": results['request_rate'], - "burstiness":results['burstiness'], - "max_concurrency": results['max_concurrency'], - }, - }, - }, - "metrics": { - "time": { - "duration": results['duration'], - "start": _vllm_timestamp_to_epoch(results['date']), - }, - "requests": { - "total": results['completed'], - "input_length": { - "units": Units.COUNT, - "mean": results['total_input_tokens']/results['completed'], - }, - "output_length": { - "units": Units.COUNT, - "mean": results['total_output_tokens']/results['completed'], - }, - }, - "latency": { - "time_to_first_token": { - "units": Units.MS, - "mean": results['mean_ttft_ms'], - "stddev": results['std_ttft_ms'], - "p00p1": results['p0.1_ttft_ms'], - "p01": results['p1_ttft_ms'], - "p05": results['p5_ttft_ms'], - "p10": results['p10_ttft_ms'], - "P25": results['p25_ttft_ms'], - "p50": results['median_ttft_ms'], - "p75": results['p75_ttft_ms'], - "p90": results['p90_ttft_ms'], - "p95": results['p95_ttft_ms'], - "p99": results['p99_ttft_ms'], - "p99p9": results['p99.9_ttft_ms'], - }, - "time_per_output_token": { - "units": Units.MS_PER_TOKEN, - "mean": results['mean_tpot_ms'], - "stddev": results['std_tpot_ms'], - "p00p1": results['p0.1_tpot_ms'], - "p01": results['p1_tpot_ms'], - "p05": results['p5_tpot_ms'], - "p10": results['p10_tpot_ms'], - "P25": results['p25_tpot_ms'], - "p50": results['median_tpot_ms'], - "p75": results['p75_tpot_ms'], - "p90": results['p90_tpot_ms'], - "p95": results['p95_tpot_ms'], - "p99": results['p99_tpot_ms'], - "p99p9": results['p99.9_tpot_ms'], - }, - "inter_token_latency": { - "units": Units.MS_PER_TOKEN, - "mean": results['mean_itl_ms'], - "stddev": results['std_itl_ms'], - "p00p1": results['p0.1_itl_ms'], - "p01": results['p1_itl_ms'], - "p05": results['p5_itl_ms'], - "p10": results['p10_itl_ms'], - "P25": results['p25_itl_ms'], - "p90": results['p90_itl_ms'], - "p95": results['p95_itl_ms'], - "p99": results['p99_itl_ms'], - "p99p9": results['p99.9_itl_ms'], - }, - "request_latency": { - "units": Units.MS, - "mean": results['mean_e2el_ms'], - "stddev": results['std_e2el_ms'], - "p00p1": results['p0.1_e2el_ms'], - "p01": results['p1_e2el_ms'], - "p05": results['p5_e2el_ms'], - "p10": results['p10_e2el_ms'], - "P25": results['p25_e2el_ms'], - "p90": results['p90_e2el_ms'], - "p95": results['p95_e2el_ms'], - "p99": results['p99_e2el_ms'], - "p99p9": results['p99.9_e2el_ms'], - }, - }, - "throughput": { - "output_tokens_per_sec": results['output_throughput'], - "total_tokens_per_sec": results['total_token_throughput'], - "requests_per_sec": results['request_throughput'], - }, - }, - }) - - return BenchmarkReport(**br_dict) - - -def import_guidellm(results_file: str) -> BenchmarkReport: - """Import data from a GuideLLM run as a BenchmarkReport. - - Args: - results_file (str): Results file to import. - - Returns: - BenchmarkReport: Imported data. - """ - check_file(results_file) - - # Everything falls under ['benchmarks'][0], so just grab that part - results = import_yaml(results_file)['benchmarks'][0] - - # Get environment variables from llm-d-benchmark run as a dict following the - # schema of BenchmarkReport - br_dict = _get_llmd_benchmark_envars() - # Append to that dict the data from GuideLLM - update_dict(br_dict, { - "scenario": { - "model": {"name": results['worker']['backend_model']}, - "load": { - "name": WorkloadGenerator.GUIDELLM, - "args": results['args'], - }, - }, - "metrics": { - "time": { - "duration": results['duration'], - "start": results['start_time'], - "stop": results['end_time'], - }, - "requests": { - "total": results['request_totals']['total'], - "failures": results['request_totals']['errored'], - "incomplete": results['request_totals']['incomplete'], - "input_length": { - "units": Units.COUNT, - "mean": results['metrics']['prompt_token_count']['successful']['mean'], - "mode": results['metrics']['prompt_token_count']['successful']['mode'], - "stddev": results['metrics']['prompt_token_count']['successful']['std_dev'], - "min": results['metrics']['prompt_token_count']['successful']['min'], - "p0p1": results['metrics']['prompt_token_count']['successful']['percentiles']['p001'], - "p1": results['metrics']['prompt_token_count']['successful']['percentiles']['p01'], - "p5": results['metrics']['prompt_token_count']['successful']['percentiles']['p05'], - "p10": results['metrics']['prompt_token_count']['successful']['percentiles']['p10'], - "p25": results['metrics']['prompt_token_count']['successful']['percentiles']['p25'], - "p50": results['metrics']['prompt_token_count']['successful']['percentiles']['p50'], - "p75": results['metrics']['prompt_token_count']['successful']['percentiles']['p75'], - "p90": results['metrics']['prompt_token_count']['successful']['percentiles']['p90'], - "p95": results['metrics']['prompt_token_count']['successful']['percentiles']['p95'], - "p99": results['metrics']['prompt_token_count']['successful']['percentiles']['p99'], - "p99p9": results['metrics']['prompt_token_count']['successful']['percentiles']['p999'], - "max": results['metrics']['prompt_token_count']['successful']['max'], - }, - "output_length": { - "units": Units.COUNT, - "mean": results['metrics']['output_token_count']['successful']['mean'], - "mode": results['metrics']['output_token_count']['successful']['mode'], - "stddev": results['metrics']['output_token_count']['successful']['std_dev'], - "min": results['metrics']['output_token_count']['successful']['min'], - "p0p1": results['metrics']['output_token_count']['successful']['percentiles']['p001'], - "p1": results['metrics']['output_token_count']['successful']['percentiles']['p01'], - "p5": results['metrics']['output_token_count']['successful']['percentiles']['p05'], - "p10": results['metrics']['output_token_count']['successful']['percentiles']['p10'], - "p25": results['metrics']['output_token_count']['successful']['percentiles']['p25'], - "p50": results['metrics']['output_token_count']['successful']['percentiles']['p50'], - "p75": results['metrics']['output_token_count']['successful']['percentiles']['p75'], - "p90": results['metrics']['output_token_count']['successful']['percentiles']['p90'], - "p95": results['metrics']['output_token_count']['successful']['percentiles']['p95'], - "p99": results['metrics']['output_token_count']['successful']['percentiles']['p99'], - "p99p9": results['metrics']['output_token_count']['successful']['percentiles']['p999'], - "max": results['metrics']['output_token_count']['successful']['max'], - }, - }, - "latency": { - "time_to_first_token": { - "units": Units.MS, - "mean": results['metrics']['time_to_first_token_ms']['successful']['mean'], - "mode": results['metrics']['time_to_first_token_ms']['successful']['mode'], - "stddev": results['metrics']['time_to_first_token_ms']['successful']['std_dev'], - "min": results['metrics']['time_to_first_token_ms']['successful']['min'], - "p0p1": results['metrics']['time_to_first_token_ms']['successful']['percentiles']['p001'], - "p1": results['metrics']['time_to_first_token_ms']['successful']['percentiles']['p01'], - "p5": results['metrics']['time_to_first_token_ms']['successful']['percentiles']['p05'], - "p10": results['metrics']['time_to_first_token_ms']['successful']['percentiles']['p10'], - "p25": results['metrics']['time_to_first_token_ms']['successful']['percentiles']['p25'], - "p50": results['metrics']['time_to_first_token_ms']['successful']['percentiles']['p50'], - "p75": results['metrics']['time_to_first_token_ms']['successful']['percentiles']['p75'], - "p90": results['metrics']['time_to_first_token_ms']['successful']['percentiles']['p90'], - "p95": results['metrics']['time_to_first_token_ms']['successful']['percentiles']['p95'], - "p99": results['metrics']['time_to_first_token_ms']['successful']['percentiles']['p99'], - "p99p9": results['metrics']['time_to_first_token_ms']['successful']['percentiles']['p999'], - "max": results['metrics']['time_to_first_token_ms']['successful']['max'], - }, - "time_per_output_token": { - "units": Units.MS_PER_TOKEN, - "mean": results['metrics']['time_per_output_token_ms']['successful']['mean'], - "mode": results['metrics']['time_per_output_token_ms']['successful']['mode'], - "stddev": results['metrics']['time_per_output_token_ms']['successful']['std_dev'], - "min": results['metrics']['time_per_output_token_ms']['successful']['min'], - "p0p1": results['metrics']['time_per_output_token_ms']['successful']['percentiles']['p001'], - "p1": results['metrics']['time_per_output_token_ms']['successful']['percentiles']['p01'], - "p5": results['metrics']['time_per_output_token_ms']['successful']['percentiles']['p05'], - "p10": results['metrics']['time_per_output_token_ms']['successful']['percentiles']['p10'], - "p25": results['metrics']['time_per_output_token_ms']['successful']['percentiles']['p25'], - "p50": results['metrics']['time_per_output_token_ms']['successful']['percentiles']['p50'], - "p75": results['metrics']['time_per_output_token_ms']['successful']['percentiles']['p75'], - "p90": results['metrics']['time_per_output_token_ms']['successful']['percentiles']['p90'], - "p95": results['metrics']['time_per_output_token_ms']['successful']['percentiles']['p95'], - "p99": results['metrics']['time_per_output_token_ms']['successful']['percentiles']['p99'], - "p99p9": results['metrics']['time_per_output_token_ms']['successful']['percentiles']['p999'], - "max": results['metrics']['time_per_output_token_ms']['successful']['max'], - }, - "inter_token_latency": { - "units": Units.MS_PER_TOKEN, - "mean": results['metrics']['inter_token_latency_ms']['successful']['mean'], - "mode": results['metrics']['inter_token_latency_ms']['successful']['mode'], - "stddev": results['metrics']['inter_token_latency_ms']['successful']['std_dev'], - "min": results['metrics']['inter_token_latency_ms']['successful']['min'], - "p0p1": results['metrics']['inter_token_latency_ms']['successful']['percentiles']['p001'], - "p1": results['metrics']['inter_token_latency_ms']['successful']['percentiles']['p01'], - "p5": results['metrics']['inter_token_latency_ms']['successful']['percentiles']['p05'], - "p10": results['metrics']['inter_token_latency_ms']['successful']['percentiles']['p10'], - "p25": results['metrics']['inter_token_latency_ms']['successful']['percentiles']['p25'], - "p50": results['metrics']['inter_token_latency_ms']['successful']['percentiles']['p50'], - "p75": results['metrics']['inter_token_latency_ms']['successful']['percentiles']['p75'], - "p90": results['metrics']['inter_token_latency_ms']['successful']['percentiles']['p90'], - "p95": results['metrics']['inter_token_latency_ms']['successful']['percentiles']['p95'], - "p99": results['metrics']['inter_token_latency_ms']['successful']['percentiles']['p99'], - "p99p9": results['metrics']['inter_token_latency_ms']['successful']['percentiles']['p999'], - "max": results['metrics']['inter_token_latency_ms']['successful']['max'], - }, - "request_latency": { - "units": Units.MS, - "mean": results['metrics']['request_latency']['successful']['mean'], - "mode": results['metrics']['request_latency']['successful']['mode'], - "stddev": results['metrics']['request_latency']['successful']['std_dev'], - "min": results['metrics']['request_latency']['successful']['min'], - "p0p1": results['metrics']['request_latency']['successful']['percentiles']['p001'], - "p1": results['metrics']['request_latency']['successful']['percentiles']['p01'], - "p5": results['metrics']['request_latency']['successful']['percentiles']['p05'], - "p10": results['metrics']['request_latency']['successful']['percentiles']['p10'], - "p25": results['metrics']['request_latency']['successful']['percentiles']['p25'], - "p50": results['metrics']['request_latency']['successful']['percentiles']['p50'], - "p75": results['metrics']['request_latency']['successful']['percentiles']['p75'], - "p90": results['metrics']['request_latency']['successful']['percentiles']['p90'], - "p95": results['metrics']['request_latency']['successful']['percentiles']['p95'], - "p99": results['metrics']['request_latency']['successful']['percentiles']['p99'], - "p99p9": results['metrics']['request_latency']['successful']['percentiles']['p999'], - "max": results['metrics']['request_latency']['successful']['max'], - }, - }, - "throughput": { - "output_tokens_per_sec": results['metrics']['output_tokens_per_second']['successful']['mean'], - "total_tokens_per_sec": results['metrics']['tokens_per_second']['successful']['mean'], - "requests_per_sec": results['metrics']['requests_per_second']['successful']['mean'], - }, - }, - }) - - return BenchmarkReport(**br_dict) - - -def import_fmperf(results_file: str) -> BenchmarkReport: - """Import data from a fmperf run as a BenchmarkReport. - - Args: - results_file (str): Results file to import. - - Returns: - BenchmarkReport: Imported data. - """ - check_file(results_file) - - results = import_csv_with_header(results_file) - - # Get environment variables from llm-d-benchmark run as a dict following the - # schema of BenchmarkReport - br_dict = _get_llmd_benchmark_envars() - if br_dict: - model_name = br_dict['scenario']['model']['name'] - else: - model_name = "unknown" - # Append to that dict the data from fmperf - duration = results['finish_time'][-1] - results['launch_time'][0] - req_latency = results['finish_time'] - results['launch_time'] - tpot = (req_latency - results['ttft']) / (results['generation_tokens'] - 1) - itl = tpot - update_dict(br_dict, { - "scenario": { - "model": {"name": model_name}, - "load": { - "name": WorkloadGenerator.FMPERF, - }, - }, - "metrics": { - "time": { - "duration": duration, - "start": results['launch_time'][0], - "stop": results['finish_time'][-1], - }, - "requests": { - "total": len(results['prompt_tokens']), - "input_length": { - "units": Units.COUNT, - "mean": results['prompt_tokens'].mean(), - "mode": stats.mode(results['prompt_tokens'])[0], - "stddev": results['prompt_tokens'].std(), - "min": results['prompt_tokens'].min(), - "p0p1": np.percentile(results['prompt_tokens'], 0.1), - "p1": np.percentile(results['prompt_tokens'], 1), - "p5": np.percentile(results['prompt_tokens'], 5), - "p10": np.percentile(results['prompt_tokens'], 10), - "p25": np.percentile(results['prompt_tokens'], 25), - "p50": np.percentile(results['prompt_tokens'], 50), - "p75": np.percentile(results['prompt_tokens'], 75), - "p90": np.percentile(results['prompt_tokens'], 90), - "p95": np.percentile(results['prompt_tokens'], 95), - "p99": np.percentile(results['prompt_tokens'], 99), - "p99p9": np.percentile(results['prompt_tokens'], 99.9), - "max": results['prompt_tokens'].max(), - }, - "output_length": { - "units": Units.COUNT, - "mean": results['generation_tokens'].mean(), - "mode": stats.mode(results['generation_tokens'])[0], - "stddev": results['generation_tokens'].std(), - "min": results['generation_tokens'].min(), - "p0p1": np.percentile(results['generation_tokens'], 0.1), - "p1": np.percentile(results['generation_tokens'], 1), - "p5": np.percentile(results['generation_tokens'], 5), - "p10": np.percentile(results['generation_tokens'], 10), - "p25": np.percentile(results['generation_tokens'], 25), - "p50": np.percentile(results['generation_tokens'], 50), - "p75": np.percentile(results['generation_tokens'], 75), - "p90": np.percentile(results['generation_tokens'], 90), - "p95": np.percentile(results['generation_tokens'], 95), - "p99": np.percentile(results['generation_tokens'], 99), - "p99p9": np.percentile(results['generation_tokens'], 99.9), - "max": results['generation_tokens'].max(), - }, - }, - "latency": { - "time_to_first_token": { - "units": Units.MS, - "mean": results['ttft'].mean(), - "mode": stats.mode(results['ttft'])[0], - "stddev": results['ttft'].std(), - "min": results['ttft'].min(), - "p0p1": np.percentile(results['ttft'], 0.1), - "p1": np.percentile(results['ttft'], 1), - "p5": np.percentile(results['ttft'], 5), - "p10": np.percentile(results['ttft'], 10), - "p25": np.percentile(results['ttft'], 25), - "p50": np.percentile(results['ttft'], 50), - "p75": np.percentile(results['ttft'], 75), - "p90": np.percentile(results['ttft'], 90), - "p95": np.percentile(results['ttft'], 95), - "p99": np.percentile(results['ttft'], 99), - "p99p9": np.percentile(results['ttft'], 99.9), - "max": results['ttft'].max(), - }, - "time_per_output_token": { - "units": Units.MS_PER_TOKEN, - "mean": tpot.mean(), - "mode": stats.mode(tpot)[0], - "stddev": tpot.std(), - "min": tpot.min(), - "p0p1": np.percentile(tpot, 0.1), - "p1": np.percentile(tpot, 1), - "p5": np.percentile(tpot, 5), - "p10": np.percentile(tpot, 10), - "p25": np.percentile(tpot, 25), - "p50": np.percentile(tpot, 50), - "p75": np.percentile(tpot, 75), - "p90": np.percentile(tpot, 90), - "p95": np.percentile(tpot, 95), - "p99": np.percentile(tpot, 99), - "p99p9": np.percentile(tpot, 99.9), - "max": tpot.max(), - }, - "inter_token_latency": { - "units": Units.MS_PER_TOKEN, - "mean": itl.mean(), - "mode": stats.mode(itl)[0], - "stddev": itl.std(), - "min": itl.min(), - "p0p1": np.percentile(itl, 0.1), - "p1": np.percentile(itl, 1), - "p5": np.percentile(itl, 5), - "p10": np.percentile(itl, 10), - "p25": np.percentile(itl, 25), - "p50": np.percentile(itl, 50), - "p75": np.percentile(itl, 75), - "p90": np.percentile(itl, 90), - "p95": np.percentile(itl, 95), - "p99": np.percentile(itl, 99), - "p99p9": np.percentile(itl, 99.9), - "max": itl.max(), - }, - "request_latency": { - "units": Units.MS, - "mean": req_latency.mean(), - "mode": stats.mode(req_latency)[0], - "stddev": req_latency.std(), - "min": req_latency.min(), - "p0p1": np.percentile(req_latency, 0.1), - "p1": np.percentile(req_latency, 1), - "p5": np.percentile(req_latency, 5), - "p10": np.percentile(req_latency, 10), - "p25": np.percentile(req_latency, 25), - "p50": np.percentile(req_latency, 50), - "p75": np.percentile(req_latency, 75), - "p90": np.percentile(req_latency, 90), - "p95": np.percentile(req_latency, 95), - "p99": np.percentile(req_latency, 99), - "p99p9": np.percentile(req_latency, 99.9), - "max": req_latency.max(), - }, - }, - "throughput": { - "output_tokens_per_sec": results['generation_tokens'].sum()/duration, - "total_tokens_per_sec": (results['prompt_tokens'].sum() + results['generation_tokens'].sum())/duration, - "requests_per_sec": len(results['prompt_tokens'])/duration, - }, - }, - }) - - return BenchmarkReport(**br_dict) - - -def import_inference_perf(results_file: str) -> BenchmarkReport: - """Import data from a Inference Perf run as a BenchmarkReport. - - Args: - results_file (str): Results file to import. - - Returns: - BenchmarkReport: Imported data. - """ - check_file(results_file) - - # Import results from Inference Perf - results = import_yaml(results_file) - - # Get stage number from metrics filename - stage = int(results_file.rsplit('stage_')[-1].split('_', 1)[0]) - - # Import Inference Perf config file - config_file = os.path.join( - os.path.dirname(results_file), - 'config.yaml' - ) - if os.path.isfile(config_file): - config = import_yaml(config_file) - else: - config = {} - - # Get environment variables from llm-d-benchmark run as a dict following the - # schema of BenchmarkReport - br_dict = _get_llmd_benchmark_envars() - if br_dict: - model_name = br_dict['scenario']['model']['name'] - else: - model_name = "unknown" - # Append to that dict the data from Inference Perf - update_dict(br_dict, { - "scenario": { - "model": {"name": model_name}, - "load": { - "name": WorkloadGenerator.INFERENCE_PERF, - "args": config, - "metadata": { - "stage": stage, - }, - }, - }, - "metrics": { - "time": { - "duration": results['load_summary']['send_duration'], # TODO this isn't exactly what we need, we may need to pull apart per_request_lifecycle_metrics.json - }, - "requests": { - "total": results['load_summary']['count'], - "failures": results['failures']['count'], - "input_length": { - "units": Units.COUNT, - "mean": results['successes']['prompt_len']['mean'], - "min": results['successes']['prompt_len']['min'], - "p0p1": results['successes']['prompt_len']['p0.1'], - "p1": results['successes']['prompt_len']['p1'], - "p5": results['successes']['prompt_len']['p5'], - "p10": results['successes']['prompt_len']['p10'], - "p25": results['successes']['prompt_len']['p25'], - "p50": results['successes']['prompt_len']['median'], - "p75": results['successes']['prompt_len']['p75'], - "p90": results['successes']['prompt_len']['p90'], - "p95": results['successes']['prompt_len']['p95'], - "p99": results['successes']['prompt_len']['p99'], - "p99p9": results['successes']['prompt_len']['p99.9'], - "max": results['successes']['prompt_len']['max'], - }, - "output_length": { - "units": Units.COUNT, - "mean": results['successes']['output_len']['mean'], - "min": results['successes']['output_len']['min'], - "p0p1": results['successes']['output_len']['p0.1'], - "p1": results['successes']['output_len']['p1'], - "p5": results['successes']['output_len']['p5'], - "p10": results['successes']['output_len']['p10'], - "p25": results['successes']['output_len']['p25'], - "p50": results['successes']['output_len']['median'], - "p75": results['successes']['output_len']['p75'], - "p90": results['successes']['output_len']['p90'], - "p95": results['successes']['output_len']['p95'], - "p99": results['successes']['output_len']['p99'], - "p99p9": results['successes']['output_len']['p99.9'], - "max": results['successes']['output_len']['max'], - }, - }, - "latency": { - "time_to_first_token": { - "units": Units.S, - "mean": results['successes']['latency']['time_to_first_token']['mean'], - "min": results['successes']['latency']['time_to_first_token']['min'], - "p0p1": results['successes']['latency']['time_to_first_token']['p0.1'], - "p1": results['successes']['latency']['time_to_first_token']['p1'], - "p5": results['successes']['latency']['time_to_first_token']['p5'], - "p10": results['successes']['latency']['time_to_first_token']['p10'], - "p25": results['successes']['latency']['time_to_first_token']['p25'], - "p50": results['successes']['latency']['time_to_first_token']['median'], - "p75": results['successes']['latency']['time_to_first_token']['p75'], - "p90": results['successes']['latency']['time_to_first_token']['p90'], - "p95": results['successes']['latency']['time_to_first_token']['p95'], - "p99": results['successes']['latency']['time_to_first_token']['p99'], - "p99p9": results['successes']['latency']['time_to_first_token']['p99.9'], - "max": results['successes']['latency']['time_to_first_token']['max'], - }, - "normalized_time_per_output_token": { - "units": Units.S_PER_TOKEN, - "mean": results['successes']['latency']['normalized_time_per_output_token']['mean'], - "min": results['successes']['latency']['normalized_time_per_output_token']['min'], - "p0p1": results['successes']['latency']['normalized_time_per_output_token']['p0.1'], - "p1": results['successes']['latency']['normalized_time_per_output_token']['p1'], - "p5": results['successes']['latency']['normalized_time_per_output_token']['p5'], - "p10": results['successes']['latency']['normalized_time_per_output_token']['p10'], - "p25": results['successes']['latency']['normalized_time_per_output_token']['p25'], - "p50": results['successes']['latency']['normalized_time_per_output_token']['median'], - "p75": results['successes']['latency']['normalized_time_per_output_token']['p75'], - "p90": results['successes']['latency']['normalized_time_per_output_token']['p90'], - "p95": results['successes']['latency']['normalized_time_per_output_token']['p95'], - "p99": results['successes']['latency']['normalized_time_per_output_token']['p99'], - "p99p9": results['successes']['latency']['normalized_time_per_output_token']['p99.9'], - "max": results['successes']['latency']['normalized_time_per_output_token']['max'], - }, - "time_per_output_token": { - "units": Units.S_PER_TOKEN, - "mean": results['successes']['latency']['time_per_output_token']['mean'], - "min": results['successes']['latency']['time_per_output_token']['min'], - "p0p1": results['successes']['latency']['time_per_output_token']['p0.1'], - "p1": results['successes']['latency']['time_per_output_token']['p1'], - "p5": results['successes']['latency']['time_per_output_token']['p5'], - "p10": results['successes']['latency']['time_per_output_token']['p10'], - "p25": results['successes']['latency']['time_per_output_token']['p25'], - "p50": results['successes']['latency']['time_per_output_token']['median'], - "p75": results['successes']['latency']['time_per_output_token']['p75'], - "p90": results['successes']['latency']['time_per_output_token']['p90'], - "p95": results['successes']['latency']['time_per_output_token']['p95'], - "p99": results['successes']['latency']['time_per_output_token']['p99'], - "p99p9": results['successes']['latency']['time_per_output_token']['p99.9'], - "max": results['successes']['latency']['time_per_output_token']['max'], - }, - "inter_token_latency": { - "units": Units.S_PER_TOKEN, - "mean": results['successes']['latency']['inter_token_latency']['mean'], - "min": results['successes']['latency']['inter_token_latency']['min'], - "p0p1": results['successes']['latency']['inter_token_latency']['p0.1'], - "p1": results['successes']['latency']['inter_token_latency']['p1'], - "p5": results['successes']['latency']['inter_token_latency']['p5'], - "p10": results['successes']['latency']['inter_token_latency']['p10'], - "p25": results['successes']['latency']['inter_token_latency']['p25'], - "p50": results['successes']['latency']['inter_token_latency']['median'], - "p75": results['successes']['latency']['inter_token_latency']['p75'], - "p90": results['successes']['latency']['inter_token_latency']['p90'], - "p95": results['successes']['latency']['inter_token_latency']['p95'], - "p99": results['successes']['latency']['inter_token_latency']['p99'], - "p99p9": results['successes']['latency']['inter_token_latency']['p99.9'], - "max": results['successes']['latency']['inter_token_latency']['max'], - }, - "request_latency": { - "units": Units.S, - "mean": results['successes']['latency']['request_latency']['mean'], - "min": results['successes']['latency']['request_latency']['min'], - "p0p1": results['successes']['latency']['request_latency']['p0.1'], - "p1": results['successes']['latency']['request_latency']['p1'], - "p5": results['successes']['latency']['request_latency']['p5'], - "p10": results['successes']['latency']['request_latency']['p10'], - "p25": results['successes']['latency']['request_latency']['p25'], - "p50": results['successes']['latency']['request_latency']['median'], - "p75": results['successes']['latency']['request_latency']['p75'], - "p90": results['successes']['latency']['request_latency']['p90'], - "p95": results['successes']['latency']['request_latency']['p95'], - "p99": results['successes']['latency']['request_latency']['p99'], - "p99p9": results['successes']['latency']['request_latency']['p99.9'], - "max": results['successes']['latency']['request_latency']['max'], - }, - }, - "throughput": { - "output_tokens_per_sec": results['successes']['throughput']['output_tokens_per_sec'], - "total_tokens_per_sec": results['successes']['throughput']['total_tokens_per_sec'], - "requests_per_sec": results['successes']['throughput']['requests_per_sec'], - }, - }, - }) - - return BenchmarkReport(**br_dict) - -def import_nop(results_file: str) -> BenchmarkReport: - """Import data from a nop run as a BenchmarkReport. - - Args: - results_file (str): Results file to import. - - Returns: - BenchmarkReport: Imported data. - """ - check_file(results_file) - - results = import_yaml(results_file) - - def _import_categories(cat_list: list[dict[str,Any]]) -> list[dict[str,Any]]: - new_cat_list = [] - for cat in cat_list: - cat_dict = {} - cat_dict["title"] = cat["title"] - process = cat.get("process") - if process is not None: - cat_dict["process"] = process["name"] - cat_dict["elapsed"] = { - "units": Units.S, - "value": cat["elapsed"], - } - categories = cat.get("categories") - if categories is not None: - cat_dict["categories"] = _import_categories(categories) - - new_cat_list.append(cat_dict) - - return new_cat_list - - categories = _import_categories(results["metrics"]["categories"]) - - # Get environment variables from llm-d-benchmark run as a dict following the - # schema of BenchmarkReport - br_dict = _get_llmd_benchmark_envars() - - results_dict = { - "scenario": { - "model": { - "name" : results["scenario"]["model"]["name"] - }, - "load": { - "name": WorkloadGenerator.NOP, - }, - "platform": { - "engine": [results["scenario"]["platform"]["engine"]] - }, - "metadata": { - "load_format": results["scenario"]["load_format"], - "sleep_mode": results["scenario"]["sleep_mode"], - }, - }, - "metrics": { - "metadata": { - "load_time": { - "units": Units.S, - "value": results["metrics"]["load_time"], - }, - "size": { - "units": Units.GIB, - "value": results["metrics"]["size"], - }, - "transfer_rate": { - "units": Units.GIB_PER_S, - "value": results["metrics"]["transfer_rate"], - }, - "sleep": { - "units": Units.S, - "value": results["metrics"]["sleep"], - }, - "gpu_freed": { - "units": Units.GIB, - "value": results["metrics"]["gpu_freed"], - }, - "gpu_in_use": { - "units": Units.GIB, - "value": results["metrics"]["gpu_in_use"], - }, - "wake": { - "units": Units.S, - "value": results["metrics"]["wake"], - }, - "categories": categories - }, - "time": { - "duration": results["metrics"]["time"]["duration"], - "start": results["metrics"]["time"]["start"], - "stop": results["metrics"]["time"]["stop"], - }, - "requests": { - "total": 0, - "failures": 0, - "input_length": { - "units": Units.COUNT, - "mean": 0, - "min": 0, - "p10": 0, - "p50": 0, - "p90": 0, - "max": 0, - }, - "output_length": { - "units": Units.COUNT, - "mean": 0, - "min": 0, - "p10": 0, - "p50": 0, - "p90": 0, - "max": 0, - }, - }, - "latency": { - "time_to_first_token": { - "units": Units.MS, - "mean": 0, - "min": 0, - "p10": 0, - "p50": 0, - "p90": 0, - "max": 0, - }, - "normalized_time_per_output_token": { - "units": Units.MS_PER_TOKEN, - "mean": 0, - "min": 0, - "p10": 0, - "p50": 0, - "p90": 0, - "max": 0, - }, - "time_per_output_token": { - "units": Units.MS_PER_TOKEN, - "mean": 0, - "min": 0, - "p10": 0, - "p50": 0, - "p90": 0, - "max": 0, - }, - "inter_token_latency": { - "units": Units.MS_PER_TOKEN, - "mean": 0, - "min": 0, - "p10": 0, - "p50": 0, - "p90": 0, - "max": 0, - }, - "request_latency": { - "units": Units.MS, - "mean": 0, - "min": 0, - "p10": 0, - "p50": 0, - "p90": 0, - "max": 0, - }, - }, - "throughput": { - "output_tokens_per_sec": 0, - "total_tokens_per_sec": 0, - "requests_per_sec": 0, - }, - }, - } - - for name in ["load_cached_compiled_graph", "compile_graph"]: - value = results["metrics"].get(name) - if value is not None: - results_dict["metrics"]["metadata"][name] = { - "units": Units.S, - "value": value, - } - - update_dict(br_dict, results_dict) - - return BenchmarkReport(**br_dict) - - -if __name__ == "__main__": - - parser = argparse.ArgumentParser( - description='Convert benchmark run data to standard benchmark report format.') - parser.add_argument( - 'results_file', - type=str, - help='Results file to convert.') - parser.add_argument( - 'output_file', - type=str, - default=None, - nargs='?', - help='Output file for benchark report.') - parser.add_argument( - '-f', '--force', - action=argparse.BooleanOptionalAction, - help='Write to output file even if it already exists.') - parser.add_argument( - '-w', '--workload-generator', - type=str, - default=WorkloadGenerator.VLLM_BENCHMARK, - help='Workload generator used.') - - args = parser.parse_args() - if args.output_file and os.path.exists(args.output_file) and not args.force: - sys.stderr.write('Output file already exists: %s\n' % args.output_file) - sys.exit(1) - - match args.workload_generator: - case WorkloadGenerator.FMPERF: - if args.output_file: - import_fmperf(args.results_file).export_yaml(args.output_file) - else: - import_fmperf(args.results_file).print_yaml() - case WorkloadGenerator.GUIDELLM: - if args.output_file: - import_guidellm(args.results_file).export_yaml(args.output_file) - else: - import_guidellm(args.results_file).print_yaml() - case WorkloadGenerator.INFERENCE_PERF: - if args.output_file: - import_inference_perf(args.results_file).export_yaml(args.output_file) - else: - import_inference_perf(args.results_file).print_yaml() - case WorkloadGenerator.VLLM_BENCHMARK: - if args.output_file: - import_vllm_benchmark(args.results_file).export_yaml(args.output_file) - else: - import_vllm_benchmark(args.results_file).print_yaml() - case WorkloadGenerator.NOP: - if args.output_file: - import_nop(args.results_file).export_yaml(args.output_file) - else: - import_nop(args.results_file).print_yaml() - case _: - sys.stderr.write('Unsupported workload generator: %s\n' % - args.workload_generator) - sys.stderr.write('Must be one of: %s\n' % - str([wg.value for wg in WorkloadGenerator])[1:-1]) - sys.exit(1)