From d7a272b1ad845c249ee0c31a980b3ab348f554c6 Mon Sep 17 00:00:00 2001 From: afleisc Date: Wed, 2 Oct 2024 15:25:34 -0500 Subject: [PATCH 1/4] compute ddl script --- scripts/optimization/README.md | 27 ++++++ .../compute_billiing_model_savings_ddl.sql | 84 +++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 scripts/optimization/compute_billiing_model_savings_ddl.sql diff --git a/scripts/optimization/README.md b/scripts/optimization/README.md index 226e10a72..519a7699d 100644 --- a/scripts/optimization/README.md +++ b/scripts/optimization/README.md @@ -534,3 +534,30 @@ of that hour's slots each grouping of labels consumed. ``` +
🔍 Compute Billing Model Savings DDL + +## Compute Billing Model Savings DDL + +The [compute_billing_model_savings_ddl](compute_billing_model_savings_ddl.sql) +script creates a table named, `compute_billing_model_savings_ddl`. + +It can be more cost efficient to use either the on-demand or the reservation compute billing model depending on query patterns. This script aims to identify projects that obviously save cost by switching billing models and to provide template DDL to adjust accordingly. + +In order to safely identify projects that would benefit in one model or the other, the script assumes that any slots used in reservations are autoscaling slots. + +Instructions: Search for marker 'REMEMBER' to tune the queries at your will. You must adjust the region that you're interested in +and the pricing for each region found [here](https://cloud.google.com/bigquery/pricing#storage). + +### Examples of querying script results + +* Top 10 projects that are forecasted to benefit from a compute billing model change, ordered by forecasted cost difference. + + ```sql + SELECT project_id, usage_type as current_model, cost_on_demand, cost_reservation, ABS(cost_reservation-cost_on_demand) as cost_diff, ddl + FROM `optimization_workshop.compute_billing_model_savings_ddl` + WHERE ddl is not null + ORDER BY cost_diff desc + LIMIT 10 + ``` + +
\ No newline at end of file diff --git a/scripts/optimization/compute_billiing_model_savings_ddl.sql b/scripts/optimization/compute_billiing_model_savings_ddl.sql new file mode 100644 index 000000000..f231ebed3 --- /dev/null +++ b/scripts/optimization/compute_billiing_model_savings_ddl.sql @@ -0,0 +1,84 @@ +/* + * Copyright 2024 Google LLC + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +DECLARE num_days_to_scan INT64 DEFAULT 30; + +-- REMEMBER: Put here the prices of the region of interest, current values are for the US +-- See https://cloud.google.com/bigquery/pricing#storage +DECLARE on_demand_rate FLOAT64 DEFAULT 6.25; +DECLARE standard_payg_rate FLOAT64 DEFAULT .04; +-- DECLARE standard_payg_rate FLOAT64 DEFAULT .06; +-- DECLARE ee_payg_rate FLOAT64 DEFAULT .1; + +-- REMEMBER: (optional) Change this to filter based on savings absolute value or percentage +DECLARE threshold_percent FLOAT64 DEFAULT 10.0; -- 10% difference +DECLARE absolute_threshold FLOAT64 DEFAULT 100.0; -- $100 absolute difference + +-- REMEMBER: Set the Admin Project ID, Location, and Reservation Name +DECLARE admin_project_id STRING DEFAULT 'your-admin-project-id'; -- Replace with your Admin Project ID +DECLARE location STRING DEFAULT 'US'; -- Replace with your location +DECLARE reservation_name STRING DEFAULT 'your-reservation-name'; -- Replace with your Reservation Name + + +CREATE SCHEMA IF NOT EXISTS optimization_workshop; +CREATE OR REPLACE TABLE optimization_workshop.compute_billing_model_savings_ddl AS ( +WITH job_data AS ( + SELECT + project_id, + TIMESTAMP_TRUNC(end_time, HOUR) AS time_window, + SUM(total_slot_ms) as total_slot_ms, + SUM((total_slot_ms/1000/60/60)) as total_slot_hours, + SUM(total_bytes_billed/ POW(1024, 4)) as tb_billed, + CASE + WHEN reservation_id IS NULL THEN 'on_demand' + ELSE 'reservation' + END AS usage_type + FROM + `region-us`.INFORMATION_SCHEMA.JOBS_BY_ORGANIZATION as jbo + WHERE + job_type = 'QUERY' + AND statement_type != 'SCRIPT' + AND DATE(jbo.creation_time, "US/Central") >= CURRENT_DATE - 30 + GROUP BY + project_id, time_window, usage_type +), +cost_analysis AS ( + SELECT + project_id, + usage_type, + sum(total_slot_hours) as total_slot_hours, + sum(tb_billed) as tb_billed, + sum(tb_billed)*on_demand_rate as cost_on_demand, + sum(total_slot_hours)*standard_payg_rate as cost_reservation + FROM job_data + GROUP BY 1, 2 +) +SELECT + *, + -- Generate DDL templates with threshold logic and parameters + CASE + WHEN usage_type = 'on_demand' AND cost_on_demand > 0 AND ( + (cost_on_demand - cost_reservation) / cost_on_demand * 100 >= threshold_percent OR + cost_on_demand - cost_reservation >= absolute_threshold + ) THEN CONCAT('CREATE ASSIGNMENT `', admin_project_id, '.', 'region-', location, '.', reservation_name, '.ASSIGNMENT_ID` OPTIONS ( assignee="projects/', project_id, '", job_type="QUERY");')   + WHEN usage_type = 'reservation' AND cost_on_demand > 0 AND ( + (cost_reservation - cost_on_demand) / cost_on_demand * 100 >= threshold_percent OR + cost_reservation - cost_on_demand >= absolute_threshold + ) THEN CONCAT('CREATE ASSIGNMENT `', admin_project_id, '.', 'region-', location, '.', 'none', '.ASSIGNMENT_ID` OPTIONS ( assignee="projects/', project_id, '", job_type="QUERY");') + ELSE NULL -- No DDL if threshold not met or cost_on_demand is zero + END AS ddl +FROM cost_analysis +); \ No newline at end of file From ef764aa959cbd861bcaed617a9da469cb8f9893f Mon Sep 17 00:00:00 2001 From: afleisc Date: Wed, 2 Oct 2024 15:27:51 -0500 Subject: [PATCH 2/4] file name typo --- ...odel_savings_ddl.sql => compute_billing_model_savings_ddl.sql} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scripts/optimization/{compute_billiing_model_savings_ddl.sql => compute_billing_model_savings_ddl.sql} (100%) diff --git a/scripts/optimization/compute_billiing_model_savings_ddl.sql b/scripts/optimization/compute_billing_model_savings_ddl.sql similarity index 100% rename from scripts/optimization/compute_billiing_model_savings_ddl.sql rename to scripts/optimization/compute_billing_model_savings_ddl.sql From 2e2089454e21ad2d4664c9526f9d8a1bc392a238 Mon Sep 17 00:00:00 2001 From: afleisc Date: Wed, 2 Oct 2024 16:47:45 -0500 Subject: [PATCH 3/4] add best and worst case logic --- .../compute_billing_model_savings_ddl.sql | 31 ++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/scripts/optimization/compute_billing_model_savings_ddl.sql b/scripts/optimization/compute_billing_model_savings_ddl.sql index f231ebed3..5bb7d6c7b 100644 --- a/scripts/optimization/compute_billing_model_savings_ddl.sql +++ b/scripts/optimization/compute_billing_model_savings_ddl.sql @@ -14,14 +14,22 @@ * limitations under the License. */ -DECLARE num_days_to_scan INT64 DEFAULT 30; +-- US pricing for reference: https://cloud.google.com/bigquery/pricing#storage +-- standard_baseline_rate FLOAT64 DEFAULT .04; +-- enterprise_baseline_1yr_rate FLOAT64 DEFAULT .048; +-- enterprise_baseline_3yr_rate FLOAT64 DEFAULT .036; +-- enterprise_plus_baseline_1yr_rate FLOAT64 DEFAULT .08; +-- enterprise_plus_baseline_3yr_rate FLOAT64 DEFAULT .06; +-- standard_payg_rate FLOAT64 DEFAULT .04; +-- enterprise_payg_rate FLOAT64 DEFAULT .06; +-- enterprise_plus_payg_rate FLOAT64 DEFAULT .1; --- REMEMBER: Put here the prices of the region of interest, current values are for the US --- See https://cloud.google.com/bigquery/pricing#storage +DECLARE num_days_to_scan INT64 DEFAULT 30; DECLARE on_demand_rate FLOAT64 DEFAULT 6.25; -DECLARE standard_payg_rate FLOAT64 DEFAULT .04; --- DECLARE standard_payg_rate FLOAT64 DEFAULT .06; --- DECLARE ee_payg_rate FLOAT64 DEFAULT .1; +-- REMEMBER: Replace with the values of your current or target baseline slot price. +DECLARE baseline_rate FLOAT64 DEFAULT .04; +-- REMEMBER: Replace with the values of your current or target autoscaling slot price. +DECLARE autoscaling_rate DEFAULT .04; -- REMEMBER: (optional) Change this to filter based on savings absolute value or percentage DECLARE threshold_percent FLOAT64 DEFAULT 10.0; -- 10% difference @@ -62,7 +70,8 @@ cost_analysis AS ( sum(total_slot_hours) as total_slot_hours, sum(tb_billed) as tb_billed, sum(tb_billed)*on_demand_rate as cost_on_demand, - sum(total_slot_hours)*standard_payg_rate as cost_reservation + sum(total_slot_hours)*autoscaling_rate as worst_case_cost_reservation, -- Worst case is that only autoscaling slots are used. + sum(total_slot_hours)*baseline_rate as best_case_cost_reservation -- Best case is that only baseline slots are used. FROM job_data GROUP BY 1, 2 ) @@ -71,12 +80,12 @@ SELECT -- Generate DDL templates with threshold logic and parameters CASE WHEN usage_type = 'on_demand' AND cost_on_demand > 0 AND ( - (cost_on_demand - cost_reservation) / cost_on_demand * 100 >= threshold_percent OR - cost_on_demand - cost_reservation >= absolute_threshold + (cost_on_demand - worst_case_cost_reservation) / cost_on_demand * 100 >= threshold_percent OR + cost_on_demand - worst_case_cost_reservation >= absolute_threshold ) THEN CONCAT('CREATE ASSIGNMENT `', admin_project_id, '.', 'region-', location, '.', reservation_name, '.ASSIGNMENT_ID` OPTIONS ( assignee="projects/', project_id, '", job_type="QUERY");')   WHEN usage_type = 'reservation' AND cost_on_demand > 0 AND ( - (cost_reservation - cost_on_demand) / cost_on_demand * 100 >= threshold_percent OR - cost_reservation - cost_on_demand >= absolute_threshold + (best_case_cost_reservation - cost_on_demand) / cost_on_demand * 100 >= threshold_percent OR + best_case_cost_reservation - cost_on_demand >= absolute_threshold ) THEN CONCAT('CREATE ASSIGNMENT `', admin_project_id, '.', 'region-', location, '.', 'none', '.ASSIGNMENT_ID` OPTIONS ( assignee="projects/', project_id, '", job_type="QUERY");') ELSE NULL -- No DDL if threshold not met or cost_on_demand is zero END AS ddl From af4560d01c55ff148ec8bae2fa7fd3998c293d9e Mon Sep 17 00:00:00 2001 From: afleisc Date: Mon, 21 Oct 2024 08:03:59 -0500 Subject: [PATCH 4/4] Remove time window group by, use num_days variable, refactor README --- scripts/optimization/README.md | 2 -- scripts/optimization/compute_billing_model_savings_ddl.sql | 7 +++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/scripts/optimization/README.md b/scripts/optimization/README.md index 519a7699d..9bd8bf090 100644 --- a/scripts/optimization/README.md +++ b/scripts/optimization/README.md @@ -543,8 +543,6 @@ script creates a table named, `compute_billing_model_savings_ddl`. It can be more cost efficient to use either the on-demand or the reservation compute billing model depending on query patterns. This script aims to identify projects that obviously save cost by switching billing models and to provide template DDL to adjust accordingly. -In order to safely identify projects that would benefit in one model or the other, the script assumes that any slots used in reservations are autoscaling slots. - Instructions: Search for marker 'REMEMBER' to tune the queries at your will. You must adjust the region that you're interested in and the pricing for each region found [here](https://cloud.google.com/bigquery/pricing#storage). diff --git a/scripts/optimization/compute_billing_model_savings_ddl.sql b/scripts/optimization/compute_billing_model_savings_ddl.sql index 5bb7d6c7b..a1a4aa947 100644 --- a/scripts/optimization/compute_billing_model_savings_ddl.sql +++ b/scripts/optimization/compute_billing_model_savings_ddl.sql @@ -46,7 +46,6 @@ CREATE OR REPLACE TABLE optimization_workshop.compute_billing_model_savings_ddl WITH job_data AS ( SELECT project_id, - TIMESTAMP_TRUNC(end_time, HOUR) AS time_window, SUM(total_slot_ms) as total_slot_ms, SUM((total_slot_ms/1000/60/60)) as total_slot_hours, SUM(total_bytes_billed/ POW(1024, 4)) as tb_billed, @@ -59,9 +58,9 @@ WITH job_data AS ( WHERE job_type = 'QUERY' AND statement_type != 'SCRIPT' - AND DATE(jbo.creation_time, "US/Central") >= CURRENT_DATE - 30 + AND DATE(jbo.creation_time, "US/Central") >= CURRENT_DATE - num_days_to_scan GROUP BY - project_id, time_window, usage_type + project_id, usage_type ), cost_analysis AS ( SELECT @@ -90,4 +89,4 @@ SELECT ELSE NULL -- No DDL if threshold not met or cost_on_demand is zero END AS ddl FROM cost_analysis -); \ No newline at end of file +);