-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Set catalog on
SchemaDeployer
to overwrite the default `hive_metast…
…ore` (#296) Set catalog on `SchemaDeployer` to overwrite the default `hive_metastore` ### Linked issues Resolves #294 Needs #280 (tech debt to tackle later) Progresses #278 Requires #287 for the CI to pass
- Loading branch information
1 parent
605498c
commit 1d50c70
Showing
7 changed files
with
117 additions
and
60 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,46 +1,53 @@ | ||
import datetime as dt | ||
import logging | ||
import pkgutil | ||
from typing import Any | ||
|
||
from databricks.sdk.errors import InternalError | ||
from databricks.sdk.retries import retried | ||
|
||
from databricks.labs.lsql.backends import Dataclass, SqlBackend | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
class SchemaDeployer: | ||
def __init__(self, sql_backend: SqlBackend, inventory_schema: str, mod: Any): | ||
"""Deploy schema, tables, and views for a given inventory schema.""" | ||
|
||
def __init__( | ||
self, | ||
sql_backend: SqlBackend, | ||
schema: str, | ||
mod: Any, | ||
*, | ||
catalog: str = "hive_metastore", | ||
) -> None: | ||
self._sql_backend = sql_backend | ||
self._inventory_schema = inventory_schema | ||
self._schema = schema | ||
self._module = mod | ||
self._catalog = catalog | ||
|
||
# InternalError are retried for resilience on sporadic Databricks issues | ||
@retried(on=[InternalError], timeout=dt.timedelta(minutes=1)) | ||
def deploy_schema(self): | ||
logger.info(f"Ensuring {self._inventory_schema} database exists") | ||
self._sql_backend.execute(f"CREATE SCHEMA IF NOT EXISTS hive_metastore.{self._inventory_schema}") | ||
|
||
def delete_schema(self): | ||
logger.info(f"deleting {self._inventory_schema} database") | ||
def deploy_schema(self) -> None: | ||
schema_full_name = f"{self._catalog}.{self._schema}" | ||
logger.info(f"Ensuring {schema_full_name} database exists") | ||
self._sql_backend.execute(f"CREATE SCHEMA IF NOT EXISTS {schema_full_name}") | ||
|
||
self._sql_backend.execute(f"DROP SCHEMA IF EXISTS hive_metastore.{self._inventory_schema} CASCADE") | ||
def delete_schema(self) -> None: | ||
schema_full_name = f"{self._catalog}.{self._schema}" | ||
logger.info(f"Deleting {schema_full_name} database") | ||
self._sql_backend.execute(f"DROP SCHEMA IF EXISTS {schema_full_name} CASCADE") | ||
|
||
def deploy_table(self, name: str, klass: Dataclass): | ||
logger.info(f"Ensuring {self._inventory_schema}.{name} table exists") | ||
self._sql_backend.create_table(f"hive_metastore.{self._inventory_schema}.{name}", klass) | ||
def deploy_table(self, name: str, klass: Dataclass) -> None: | ||
table_full_name = f"{self._catalog}.{self._schema}.{name}" | ||
logger.info(f"Ensuring {table_full_name} table exists") | ||
self._sql_backend.create_table(table_full_name, klass) | ||
|
||
def deploy_view(self, name: str, relative_filename: str): | ||
def deploy_view(self, name: str, relative_filename: str) -> None: | ||
query = self._load(relative_filename) | ||
logger.info(f"Ensuring {self._inventory_schema}.{name} view matches {relative_filename} contents") | ||
ddl = f"CREATE OR REPLACE VIEW hive_metastore.{self._inventory_schema}.{name} AS {query}" | ||
view_full_name = f"{self._catalog}.{self._schema}.{name}" | ||
logger.info(f"Ensuring {view_full_name} view matches {relative_filename} contents") | ||
ddl = f"CREATE OR REPLACE VIEW {view_full_name} AS {query}" | ||
self._sql_backend.execute(ddl) | ||
|
||
def _load(self, relative_filename: str) -> str: | ||
data = pkgutil.get_data(self._module.__name__, relative_filename) | ||
assert data is not None | ||
sql = data.decode("utf-8") | ||
sql = sql.replace("$inventory", f"hive_metastore.{self._inventory_schema}") | ||
sql = sql.replace("$inventory", f"{self._catalog}.{self._schema}") | ||
return sql |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,37 +1,24 @@ | ||
import pytest | ||
|
||
from databricks.labs.lsql import Row | ||
from databricks.labs.lsql.backends import StatementExecutionBackend | ||
from databricks.labs.lsql.deployment import SchemaDeployer | ||
|
||
from . import views | ||
|
||
|
||
@pytest.mark.xfail | ||
def test_deploys_database(ws, env_or_skip, make_random): | ||
# TODO: create per-project/per-scope catalog | ||
schema = "default" | ||
sql_backend = StatementExecutionBackend(ws, env_or_skip("TEST_DEFAULT_WAREHOUSE_ID")) | ||
@pytest.mark.xfail(reason="Identity used in CI misses privileges to create UC resources") | ||
def test_deploys_schema(ws, sql_backend, make_random, make_catalog) -> None: | ||
"""Test deploying a full, minimal inventory schema with a single schema, table and view.""" | ||
catalog = make_catalog(name=f"lsql_test_{make_random()}") | ||
schema_name = "lsql_test" | ||
table_full_name = f"{catalog.name}.{schema_name}.foo" | ||
|
||
deployer = SchemaDeployer(sql_backend, schema, views) | ||
deployer = SchemaDeployer(sql_backend, schema_name, views, catalog=catalog.name) | ||
deployer.deploy_schema() | ||
deployer.deploy_table("foo", views.Foo) | ||
deployer.deploy_view("some", "some.sql") | ||
|
||
sql_backend.save_table(f"{schema}.foo", [views.Foo("abc", True)], views.Foo) | ||
rows = list(sql_backend.fetch(f"SELECT * FROM {schema}.some")) | ||
sql_backend.save_table(table_full_name, [views.Foo("abc", True)], views.Foo) | ||
|
||
assert rows == [Row(name="abc", id=1)] | ||
|
||
|
||
def test_overwrite(ws, env_or_skip, make_random): | ||
schema = "default" | ||
sql_backend = StatementExecutionBackend(ws, env_or_skip("TEST_DEFAULT_WAREHOUSE_ID")) | ||
catalog = env_or_skip("TEST_CATALOG") | ||
schema = env_or_skip("TEST_SCHEMA") | ||
|
||
sql_backend.save_table(f"{catalog}.{schema}.foo", [views.Foo("abc", True)], views.Foo, "append") | ||
sql_backend.save_table(f"{catalog}.{schema}.foo", [views.Foo("xyz", True)], views.Foo, "overwrite") | ||
rows = list(sql_backend.fetch(f"SELECT * FROM {catalog}.{schema}.foo")) | ||
|
||
assert rows == [Row(first="xyz", second=True)] | ||
rows = list(sql_backend.fetch(f"SELECT * FROM {table_full_name}")) | ||
assert rows == [Row(first="abc", second=1)] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters