-
Notifications
You must be signed in to change notification settings - Fork 282
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
storage billing model ddl optimization script (#456)
- Loading branch information
Showing
2 changed files
with
164 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
140 changes: 140 additions & 0 deletions
140
scripts/optimization/storage_billing_model_savings_ddl.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
-- WARNING ######################################################################################################################################### | ||
-- WARNING If you change the storage billing model of a dataset then you have to wait 14 days before changing it again. | ||
-- WARNING See https://cloud.google.com/bigquery/docs/datasets-intro#dataset_limitations | ||
-- WARNING ######################################################################################################################################### | ||
|
||
|
||
-- Instructions | ||
-- Search for marker 'REMEMBER' to tune the queries at your will | ||
-- Run the query to obtain the results including ALTER SCHEMA DDL | ||
|
||
|
||
-- Permissions needed | ||
-- * To run this query https://cloud.google.com/bigquery/docs/information-schema-table-storage-by-organization#required_permissions | ||
-- * To run the ALTER SCHEMA DDL https://cloud.google.com/bigquery/docs/updating-datasets#required_permissions | ||
|
||
|
||
-- REMEMBER: Put here the prices of the region of interest, current values are for the US | ||
-- See https://cloud.google.com/bigquery/pricing#storage | ||
DECLARE active_logical_gib_price FLOAT64 DEFAULT 0.02; | ||
DECLARE long_term_logical_gib_price FLOAT64 DEFAULT 0.01; | ||
DECLARE active_physical_gib_price FLOAT64 DEFAULT 0.04; | ||
DECLARE long_term_physical_gib_price FLOAT64 DEFAULT 0.02; | ||
|
||
|
||
-- REMEMBER: (optional) Change this if you plan to change the time travel window (e.g. if you currently use the default 7 days and want to reduce it to 2 days put 2.0/7.0) | ||
-- See https://cloud.google.com/bigquery/docs/time-travel#configuring_the_time_travel_window | ||
DECLARE time_travel_rescale FLOAT64 DEFAULT 1.0; | ||
-- REMEMBER: (optional) Change this if you want that the generated DDL statements include time travel window settings (e.g. 2.0*24.0) | ||
DECLARE time_travel_hours FLOAT64 DEFAULT NULL; | ||
-- REMEMBER: (optional) Change this to filter based on savings absolute value | ||
DECLARE min_monthly_saving FLOAT64 DEFAULT 0.0; | ||
-- REMEMBER: (optional) Change this to filter based on savings % value (e.g. 1% is 0.01) | ||
DECLARE min_monthly_saving_pct FLOAT64 DEFAULT 0.00; | ||
|
||
CREATE SCHEMA IF NOT EXISTS optimization_workshop; | ||
CREATE OR REPLACE TABLE optimization_workshop.storage_billing_model_savings_ddl AS | ||
WITH | ||
storage_sizes AS ( | ||
SELECT | ||
project_id AS project_name, | ||
table_schema AS dataset_name, | ||
-- Logical | ||
SUM(active_logical_bytes) / power(1024, 3) AS active_logical_gib, | ||
SUM(long_term_logical_bytes) / power(1024, 3) AS long_term_logical_gib, | ||
-- Physical | ||
SUM(active_physical_bytes - time_travel_physical_bytes) / power(1024, 3) AS active_no_tt_no_fs_physical_gib, | ||
SUM(time_travel_physical_bytes) / power(1024, 3) * time_travel_rescale AS time_travel_physical_gib, | ||
SUM(fail_safe_physical_bytes) / power(1024, 3) AS fail_safe_physical_gib, | ||
SUM(long_term_physical_bytes) / power(1024, 3) AS long_term_physical_gib, | ||
FROM | ||
-- REMEMBER: Change here to the region of interest | ||
`region-us`.INFORMATION_SCHEMA.TABLE_STORAGE_BY_ORGANIZATION | ||
-- See https://cloud.google.com/bigquery/docs/information-schema-table-storage-by-organization#schema | ||
WHERE TRUE | ||
AND total_physical_bytes > 0 | ||
-- REMEMBER: (optional) You may want to enable this filter to exclude ML models, materialized views, etc... | ||
-- AND table_type = 'BASE TABLE' | ||
GROUP BY 1,2 | ||
), | ||
storage_prices AS ( | ||
SELECT | ||
project_name, | ||
dataset_name, | ||
-- Logical | ||
active_logical_gib AS active_logical_gib, | ||
long_term_logical_gib AS long_term_logical_gib, | ||
-- Physical | ||
active_no_tt_no_fs_physical_gib AS active_no_tt_no_fs_physical_gib, | ||
time_travel_physical_gib AS time_travel_physical_gib, | ||
fail_safe_physical_gib AS fail_safe_physical_gib, | ||
long_term_physical_gib AS long_term_physical_gib, | ||
-- Compression ratio | ||
SAFE_DIVIDE(active_logical_gib, active_no_tt_no_fs_physical_gib) AS active_compression_ratio, | ||
SAFE_DIVIDE(long_term_logical_gib, long_term_physical_gib) AS long_term_compression_ratio, | ||
-- Forecast costs logical | ||
active_logical_gib * active_logical_gib_price AS forecast_active_logical_cost, | ||
long_term_logical_gib * long_term_logical_gib_price AS forecast_long_term_logical_cost, | ||
-- Forecast costs physical | ||
active_no_tt_no_fs_physical_gib * active_physical_gib_price AS forecast_active_no_tt_no_fs_physical_cost, | ||
time_travel_physical_gib * active_physical_gib_price AS forecast_travel_physical_cost, | ||
fail_safe_physical_gib * active_physical_gib_price AS forecast_failsafe_physical_cost, | ||
long_term_physical_gib * long_term_physical_gib_price AS forecast_long_term_physical_cost, | ||
FROM | ||
storage_sizes | ||
), | ||
storage_prices_total AS ( | ||
SELECT | ||
project_name, | ||
dataset_name, | ||
(forecast_active_logical_cost+forecast_long_term_logical_cost) AS forecast_logical, | ||
(forecast_active_no_tt_no_fs_physical_cost+forecast_travel_physical_cost+forecast_failsafe_physical_cost+forecast_long_term_physical_cost) AS forecast_physical, | ||
FROM storage_prices | ||
), | ||
current_model AS ( | ||
SELECT | ||
catalog_name AS project_name, | ||
option_name AS dataset_name, | ||
LOWER(option_value) AS current_model | ||
FROM | ||
-- REMEMBER: Change here to the region of interest | ||
`region-us`.INFORMATION_SCHEMA.SCHEMATA_OPTIONS | ||
WHERE option_name = "storage_billing_model" | ||
), | ||
storage_prices_compare AS ( | ||
SELECT | ||
SPT.project_name, | ||
SPT.dataset_name, | ||
SPT.forecast_logical, | ||
SPT.forecast_physical, | ||
(SPT.forecast_logical - SPT.forecast_physical) AS forecast_compare, | ||
IF(SPT.forecast_logical > SPT.forecast_physical,"physical","logical") AS better_on, | ||
IFNULL(CM.current_model, "logical") AS currently_on | ||
FROM | ||
storage_prices_total AS SPT | ||
LEFT JOIN current_model AS CM | ||
USING(project_name, dataset_name) | ||
), | ||
storage_prices_compare2 AS ( | ||
SELECT | ||
*, | ||
IF(currently_on = "logical", forecast_logical, forecast_physical) AS monthly_spending, | ||
ABS(forecast_compare) AS monthly_savings, | ||
ABS(forecast_compare)/IF(currently_on = "logical", forecast_logical, forecast_physical) AS monthly_savings_pct, | ||
FROM storage_prices_compare | ||
), | ||
storage_ddl AS ( | ||
SELECT | ||
*, | ||
IF(time_travel_hours IS NULL, | ||
CONCAT("ALTER SCHEMA `", project_name, ".", dataset_name, "` SET OPTIONS(storage_billing_model='", better_on, "' );" ), | ||
CONCAT("ALTER SCHEMA `", project_name, ".", dataset_name, "` SET OPTIONS(storage_billing_model='", better_on, "', max_time_travel_hours=", time_travel_hours, ");" ) | ||
) AS ddl, | ||
FROM storage_prices_compare2 | ||
WHERE better_on != currently_on | ||
AND monthly_savings > min_monthly_saving | ||
AND monthly_savings_pct > min_monthly_saving_pct | ||
ORDER BY monthly_savings DESC | ||
) | ||
-- REMEMBER: (optional) Change here the name of the pseudo-table (CTE) you want to check | ||
SELECT * FROM storage_ddl; |