diff --git a/scripts/optimization/README.md b/scripts/optimization/README.md index e1a5cad1a..226e10a72 100644 --- a/scripts/optimization/README.md +++ b/scripts/optimization/README.md @@ -306,6 +306,30 @@ contains views with JOINs where the JOIN condition is potentially non-optimal. +
🔍 Storage Billing Model Savings DDL + +## Storage Billing Model Savings DDL + +The [storage_billing_model_savings_ddl.sql](storage_billing_model_savings_ddl.sql) script +creates a table named, `storage_billing_model_savings_ddl` which forecasts which datasets will benefit from +either the physical or logical storage billing model, and provides the DDL to adjust them across your organization. + +Instructions: Search for marker 'REMEMBER' to tune the queries at your will. You must adjust the region that you're interested in +and the pricing for each region found [here](https://cloud.google.com/bigquery/pricing#storage). + +### Examples of querying script results + +* Top 10 datasets with their respective recommended storage billing model, ordered by forecasted savings + + ```sql + SELECT project_name, dataset_name, currently_on, better_on, monthly_savings, ddl + FROM optimization_workshop.storage_billing_model_savings_ddl + ORDER BY monthly_savings DESC + LIMIT 10 + ``` + +
+ # Query Analysis
🔍 Queries grouped by hash diff --git a/scripts/optimization/storage_billing_model_savings_ddl.sql b/scripts/optimization/storage_billing_model_savings_ddl.sql new file mode 100644 index 000000000..b89dd3149 --- /dev/null +++ b/scripts/optimization/storage_billing_model_savings_ddl.sql @@ -0,0 +1,140 @@ +-- WARNING ######################################################################################################################################### +-- WARNING If you change the storage billing model of a dataset then you have to wait 14 days before changing it again. +-- WARNING See https://cloud.google.com/bigquery/docs/datasets-intro#dataset_limitations +-- WARNING ######################################################################################################################################### + + +-- Instructions +-- Search for marker 'REMEMBER' to tune the queries at your will +-- Run the query to obtain the results including ALTER SCHEMA DDL + + +-- Permissions needed +-- * To run this query https://cloud.google.com/bigquery/docs/information-schema-table-storage-by-organization#required_permissions +-- * To run the ALTER SCHEMA DDL https://cloud.google.com/bigquery/docs/updating-datasets#required_permissions + + +-- REMEMBER: Put here the prices of the region of interest, current values are for the US +-- See https://cloud.google.com/bigquery/pricing#storage +DECLARE active_logical_gib_price FLOAT64 DEFAULT 0.02; +DECLARE long_term_logical_gib_price FLOAT64 DEFAULT 0.01; +DECLARE active_physical_gib_price FLOAT64 DEFAULT 0.04; +DECLARE long_term_physical_gib_price FLOAT64 DEFAULT 0.02; + + +-- REMEMBER: (optional) Change this if you plan to change the time travel window (e.g. if you currently use the default 7 days and want to reduce it to 2 days put 2.0/7.0) +-- See https://cloud.google.com/bigquery/docs/time-travel#configuring_the_time_travel_window +DECLARE time_travel_rescale FLOAT64 DEFAULT 1.0; +-- REMEMBER: (optional) Change this if you want that the generated DDL statements include time travel window settings (e.g. 2.0*24.0) +DECLARE time_travel_hours FLOAT64 DEFAULT NULL; +-- REMEMBER: (optional) Change this to filter based on savings absolute value +DECLARE min_monthly_saving FLOAT64 DEFAULT 0.0; +-- REMEMBER: (optional) Change this to filter based on savings % value (e.g. 1% is 0.01) +DECLARE min_monthly_saving_pct FLOAT64 DEFAULT 0.00; + +CREATE SCHEMA IF NOT EXISTS optimization_workshop; +CREATE OR REPLACE TABLE optimization_workshop.storage_billing_model_savings_ddl AS +WITH +storage_sizes AS ( + SELECT + project_id AS project_name, + table_schema AS dataset_name, + -- Logical + SUM(active_logical_bytes) / power(1024, 3) AS active_logical_gib, + SUM(long_term_logical_bytes) / power(1024, 3) AS long_term_logical_gib, + -- Physical + SUM(active_physical_bytes - time_travel_physical_bytes) / power(1024, 3) AS active_no_tt_no_fs_physical_gib, + SUM(time_travel_physical_bytes) / power(1024, 3) * time_travel_rescale AS time_travel_physical_gib, + SUM(fail_safe_physical_bytes) / power(1024, 3) AS fail_safe_physical_gib, + SUM(long_term_physical_bytes) / power(1024, 3) AS long_term_physical_gib, + FROM + -- REMEMBER: Change here to the region of interest + `region-us`.INFORMATION_SCHEMA.TABLE_STORAGE_BY_ORGANIZATION + -- See https://cloud.google.com/bigquery/docs/information-schema-table-storage-by-organization#schema + WHERE TRUE + AND total_physical_bytes > 0 + -- REMEMBER: (optional) You may want to enable this filter to exclude ML models, materialized views, etc... + -- AND table_type = 'BASE TABLE' + GROUP BY 1,2 +), +storage_prices AS ( + SELECT + project_name, + dataset_name, + -- Logical + active_logical_gib AS active_logical_gib, + long_term_logical_gib AS long_term_logical_gib, + -- Physical + active_no_tt_no_fs_physical_gib AS active_no_tt_no_fs_physical_gib, + time_travel_physical_gib AS time_travel_physical_gib, + fail_safe_physical_gib AS fail_safe_physical_gib, + long_term_physical_gib AS long_term_physical_gib, + -- Compression ratio + SAFE_DIVIDE(active_logical_gib, active_no_tt_no_fs_physical_gib) AS active_compression_ratio, + SAFE_DIVIDE(long_term_logical_gib, long_term_physical_gib) AS long_term_compression_ratio, + -- Forecast costs logical + active_logical_gib * active_logical_gib_price AS forecast_active_logical_cost, + long_term_logical_gib * long_term_logical_gib_price AS forecast_long_term_logical_cost, + -- Forecast costs physical + active_no_tt_no_fs_physical_gib * active_physical_gib_price AS forecast_active_no_tt_no_fs_physical_cost, + time_travel_physical_gib * active_physical_gib_price AS forecast_travel_physical_cost, + fail_safe_physical_gib * active_physical_gib_price AS forecast_failsafe_physical_cost, + long_term_physical_gib * long_term_physical_gib_price AS forecast_long_term_physical_cost, + FROM + storage_sizes +), +storage_prices_total AS ( + SELECT + project_name, + dataset_name, + (forecast_active_logical_cost+forecast_long_term_logical_cost) AS forecast_logical, + (forecast_active_no_tt_no_fs_physical_cost+forecast_travel_physical_cost+forecast_failsafe_physical_cost+forecast_long_term_physical_cost) AS forecast_physical, + FROM storage_prices +), +current_model AS ( + SELECT + catalog_name AS project_name, + option_name AS dataset_name, + LOWER(option_value) AS current_model + FROM + -- REMEMBER: Change here to the region of interest + `region-us`.INFORMATION_SCHEMA.SCHEMATA_OPTIONS + WHERE option_name = "storage_billing_model" +), +storage_prices_compare AS ( + SELECT + SPT.project_name, + SPT.dataset_name, + SPT.forecast_logical, + SPT.forecast_physical, + (SPT.forecast_logical - SPT.forecast_physical) AS forecast_compare, + IF(SPT.forecast_logical > SPT.forecast_physical,"physical","logical") AS better_on, + IFNULL(CM.current_model, "logical") AS currently_on + FROM + storage_prices_total AS SPT + LEFT JOIN current_model AS CM + USING(project_name, dataset_name) +), +storage_prices_compare2 AS ( + SELECT + *, + IF(currently_on = "logical", forecast_logical, forecast_physical) AS monthly_spending, + ABS(forecast_compare) AS monthly_savings, + ABS(forecast_compare)/IF(currently_on = "logical", forecast_logical, forecast_physical) AS monthly_savings_pct, + FROM storage_prices_compare +), +storage_ddl AS ( + SELECT + *, + IF(time_travel_hours IS NULL, + CONCAT("ALTER SCHEMA `", project_name, ".", dataset_name, "` SET OPTIONS(storage_billing_model='", better_on, "' );" ), + CONCAT("ALTER SCHEMA `", project_name, ".", dataset_name, "` SET OPTIONS(storage_billing_model='", better_on, "', max_time_travel_hours=", time_travel_hours, ");" ) + ) AS ddl, + FROM storage_prices_compare2 + WHERE better_on != currently_on + AND monthly_savings > min_monthly_saving + AND monthly_savings_pct > min_monthly_saving_pct + ORDER BY monthly_savings DESC +) +-- REMEMBER: (optional) Change here the name of the pseudo-table (CTE) you want to check +SELECT * FROM storage_ddl; \ No newline at end of file