Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Group migration workflow improvements #3391

Merged
merged 27 commits into from
Dec 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
6b197eb
Test group existence in migrate groups integration test
JCZuurmond Nov 27, 2024
e517c6c
Type hint test
JCZuurmond Nov 27, 2024
5bdd43a
Add TODO
JCZuurmond Nov 27, 2024
45c59e8
Add validate workflow
JCZuurmond Nov 27, 2024
dc3175f
Add skip job wait
JCZuurmond Nov 27, 2024
130a3b2
Add setup tacl to migrate-groups-legacy workflow
JCZuurmond Nov 27, 2024
833a3aa
Let workspace and account group function group manager
JCZuurmond Nov 27, 2024
37d34b5
Move check for original workspace group up
JCZuurmond Nov 27, 2024
f6bb2b2
Fix test
JCZuurmond Nov 27, 2024
cb4394e
Add TODO's
JCZuurmond Nov 27, 2024
7e4319c
Name variables more clear and fix condition
JCZuurmond Nov 27, 2024
8117e0a
Remove redundant check of account group to exist
JCZuurmond Nov 27, 2024
9877360
Move include databases check out of try-except
JCZuurmond Nov 27, 2024
266e658
Fix test
JCZuurmond Nov 27, 2024
3dcd550
Check if all group permission validation came back positive
JCZuurmond Nov 27, 2024
1961746
Raise value error in validate group permission steps when not succesful
JCZuurmond Nov 27, 2024
7790465
Format
JCZuurmond Nov 27, 2024
20f7266
Merge duplicate tests
JCZuurmond Nov 27, 2024
5388911
Remove redundant include group names
JCZuurmond Nov 27, 2024
552789c
Remove redundant import
JCZuurmond Nov 27, 2024
f0ba6a5
Fix logging verify group permissions
JCZuurmond Nov 27, 2024
294a0f0
Fix threads.gather
JCZuurmond Nov 27, 2024
45feace
Make log error a warning
JCZuurmond Nov 27, 2024
f40b780
Format
JCZuurmond Nov 27, 2024
8b04f2f
Reference `validate-group-permissions` workflow when validation fails…
JCZuurmond Nov 28, 2024
d94b775
Improve log messages
JCZuurmond Dec 6, 2024
da79392
Document integration test
JCZuurmond Dec 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions src/databricks/labs/ucx/hive_metastore/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,10 +539,10 @@ def _option_as_python(scala_option: typing.Any):
return scala_option.get() if scala_option.isDefined() else None

def _all_databases(self) -> list[str]:
try:
if not self._include_database:
return list(self._iterator(self._external_catalog.listDatabases()))
if self._include_database:
return self._include_database
try:
return list(self._iterator(self._external_catalog.listDatabases()))
except Exception as err: # pylint: disable=broad-exception-caught
if "py4j.security.Py4JSecurityException" in str(err):
logger.error(
Expand Down
10 changes: 8 additions & 2 deletions src/databricks/labs/ucx/workspace_access/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,15 @@ def verify_group_permissions(self) -> bool:
verifier_tasks.extend(tasks_for_support)

logger.info(f"Starting to verify permissions. Total tasks: {len(verifier_tasks)}")
Threads.strict("verify group permissions", verifier_tasks)
verifications, errors = Threads.gather("verify group permissions", verifier_tasks)
if errors:
logger.error(f"Detected {len(errors)} errors while verifying permissions")
raise ManyError(errors)
if not all(verifications):
unsuccessful_verifications = len(verifications) - sum(verifications)
logger.warning(f"Detected {unsuccessful_verifications} out of {len(verifications)} invalidated permissions")
return False
logger.info("All permissions validated successfully. No issues found.")

return True

def object_type_support(self) -> dict[str, AclSupport]:
Expand Down
19 changes: 16 additions & 3 deletions src/databricks/labs/ucx/workspace_access/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,12 @@ def reflect_account_groups_on_workspace(self, ctx: RuntimeContext):
return
ctx.group_manager.reflect_account_groups_on_workspace()

@job_task(depends_on=[reflect_account_groups_on_workspace], job_cluster="tacl")
@job_task(job_cluster="tacl")
def setup_tacl(self, ctx: RuntimeContext):
"""(Optimization) Allow the TACL job cluster to be started while we're verifying the prerequisites for
refreshing everything."""

@job_task(depends_on=[reflect_account_groups_on_workspace, setup_tacl], job_cluster="tacl")
def apply_permissions_to_account_groups(self, ctx: RuntimeContext):
"""Fourth phase of the workspace-local group migration process. It does the following:
- Assigns the full set of permissions of the original group to the account-level one
Expand All @@ -65,7 +70,12 @@ def validate_groups_permissions(self, ctx: RuntimeContext):
if not ctx.config.use_legacy_permission_migration:
logger.info("Use `migrate-groups` job, or set `use_legacy_permission_migration: true` in config.yml.")
return
ctx.permission_manager.verify_group_permissions()
if not ctx.permission_manager.verify_group_permissions():
raise ValueError(
"Some group permissions were not migrated successfully. Wait for an hour then use the "
"`validate-group-permissions` workflow to validate the permissions after the API caught up. "
f"Run `databricks labs ucx logs --workflow '{self._name}' --debug` for more details."
)


class PermissionsMigrationAPI(Workflow):
Expand Down Expand Up @@ -142,7 +152,10 @@ def validate_groups_permissions(self, ctx: RuntimeContext):
if not ctx.config.use_legacy_permission_migration:
logger.info("Use `migrate-groups` job, or set `use_legacy_permission_migration: true` in config.yml.")
return
ctx.permission_manager.verify_group_permissions()
if not ctx.permission_manager.verify_group_permissions():
raise ValueError(
f"Some group permissions were not migrated successfully. Run `databricks labs ucx logs --workflow '{self._name}' --debug` for more details."
)


class RemoveWorkspaceLocalGroups(Workflow):
Expand Down
127 changes: 69 additions & 58 deletions tests/integration/workspace_access/test_workflows.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,105 @@
import datetime as dt
from dataclasses import replace


import pytest
from databricks.sdk.retries import retried
from databricks.sdk.service import sql
from databricks.sdk.service.iam import PermissionLevel
from databricks.sdk.service.workspace import AclPermission

from databricks.labs.ucx.workspace_access.groups import MigratedGroup


def test_running_real_migrate_groups_job(
installation_ctx,
make_cluster_policy,
make_cluster_policy_permissions,
make_secret_scope,
make_secret_scope_acl,
):
ws_group_a, acc_group_a = installation_ctx.make_ucx_group(wait_for_provisioning=True)

) -> None:
"""Test the migrate groups workflow.

We have many asserts in a single integration tests as we minimize the number of integration tests that run workflows
to minimize the number of long-running integration tests.

For testing, we require:
- UCX installation (as always)
- A workspace group as the source for migration
- A account group as the target for migration
- Permissions to migrate:
- Cluster policy
- Schema and table access permissions
- Secret scope permissions
- Inventory tables used by the migrate groups workflow

We test:
- The workflow to complete successfully
- The workspace group to be renamed
- The permissions to be transferred to the account group
"""
ws_group, acc_group = installation_ctx.make_ucx_group(wait_for_provisioning=True)

# TODO: Move `make_cluster_policy` and `make_cluster_policy_permissions` to context like other `make_` methods
cluster_policy = make_cluster_policy()
make_cluster_policy_permissions(
object_id=cluster_policy.policy_id,
permission_level=PermissionLevel.CAN_USE,
group_name=ws_group_a.display_name,
group_name=ws_group.display_name,
)

table = installation_ctx.make_table()
installation_ctx.make_grant(ws_group_a.display_name, "SELECT", table_info=table)
schema = installation_ctx.make_schema()
table = installation_ctx.make_table(schema_name=schema.name)
installation_ctx.make_grant(ws_group.display_name, 'USAGE', schema_info=schema)
installation_ctx.make_grant(ws_group.display_name, 'OWN', schema_info=schema)
installation_ctx.make_grant(ws_group.display_name, 'SELECT', table_info=table)

# TODO: Move `make_secret_scope` and `make_secret_scope_acl` to context like other `make_` methods
secret_scope = make_secret_scope()
make_secret_scope_acl(scope=secret_scope, principal=ws_group_a.display_name, permission=AclPermission.WRITE)
make_secret_scope_acl(scope=secret_scope, principal=ws_group.display_name, permission=AclPermission.WRITE)

installation_ctx.__dict__['include_group_names'] = [ws_group_a.display_name]
# TODO: Move `include_object_permissions` to context like other `include_` attributes
# Limit the considered permissions to the following objects:
installation_ctx.__dict__['include_object_permissions'] = [
f"cluster-policies:{cluster_policy.policy_id}",
f"TABLE:{table.full_name}",
f"secrets:{secret_scope}",
]

installation_ctx.workspace_installation.run()

installation_ctx.deployed_workflows.run_workflow("migrate-groups")
# The crawlers should run as part of the assessment. To minimize the crawling here, we only crawl what is necessary
# Tables crawler fails on `tacl` cluster used by the apply and validate permission tasks
installation_ctx.tables_crawler.snapshot(force_refresh=True)

workflow = "migrate-groups"
installation_ctx.deployed_workflows.run_workflow(workflow, skip_job_wait=True)
assert installation_ctx.deployed_workflows.validate_step(workflow), f"Workflow failed: {workflow}"

# Wrapper functions to wait for eventual consistency of API
@retried(on=[KeyError], timeout=dt.timedelta(minutes=1))
def wait_for_workspace_group_to_exists(display_name: str) -> bool:
if installation_ctx.group_manager.has_workspace_group(display_name):
return True
raise KeyError(f"Group not found {display_name}")

# The original workspace group should be renamed
renamed_workspace_group_name = installation_ctx.renamed_group_prefix + ws_group.display_name
assert wait_for_workspace_group_to_exists(
renamed_workspace_group_name
), f"Workspace group not found: {renamed_workspace_group_name}"
if installation_ctx.group_manager.has_workspace_group(ws_group.display_name): # Avoid wait on timeout
with pytest.raises(TimeoutError):
wait_for_workspace_group_to_exists(ws_group.display_name) # Expect to NOT exists

schema_grants = installation_ctx.grants_crawler.for_schema_info(schema)
assert {"USAGE", "OWN"} == schema_grants[acc_group.display_name], "Incorrect schema grants for migrated group"

# specific permissions api migrations are checked in different and smaller integration tests
found = installation_ctx.generic_permissions_support.load_as_dict("cluster-policies", cluster_policy.policy_id)
assert acc_group_a.display_name in found, "Group not found in cluster policies"
assert found[acc_group_a.display_name] == PermissionLevel.CAN_USE
object_permissions = installation_ctx.generic_permissions_support.load_as_dict(
"cluster-policies", cluster_policy.policy_id
)
assert acc_group.display_name in object_permissions, "Group not found in cluster policies"
assert object_permissions[acc_group.display_name] == PermissionLevel.CAN_USE

scope_permission = installation_ctx.secret_scope_acl_support.secret_scope_permission(
secret_scope, acc_group_a.display_name
secret_scope, acc_group.display_name
)
assert scope_permission == AclPermission.WRITE

Expand Down Expand Up @@ -96,44 +148,3 @@ def test_running_legacy_validate_groups_permissions_job(

# assert the job does not throw any exception
installation_ctx.deployed_workflows.run_workflow("validate-groups-permissions")


def test_permissions_migration_for_group_with_same_name(
installation_ctx,
make_cluster_policy,
make_cluster_policy_permissions,
):
ws_group, acc_group = installation_ctx.make_ucx_group()
migrated_group = MigratedGroup.partial_info(ws_group, acc_group)
cluster_policy = make_cluster_policy()
make_cluster_policy_permissions(
object_id=cluster_policy.policy_id,
permission_level=PermissionLevel.CAN_USE,
group_name=migrated_group.name_in_workspace,
)

schema_a = installation_ctx.make_schema()
table_a = installation_ctx.make_table(schema_name=schema_a.name)
installation_ctx.make_grant(migrated_group.name_in_workspace, 'USAGE', schema_info=schema_a)
installation_ctx.make_grant(migrated_group.name_in_workspace, 'OWN', schema_info=schema_a)
installation_ctx.make_grant(migrated_group.name_in_workspace, 'SELECT', table_info=table_a)

installation_ctx.workspace_installation.run()

installation_ctx.deployed_workflows.run_workflow("migrate-groups")

object_permissions = installation_ctx.generic_permissions_support.load_as_dict(
"cluster-policies", cluster_policy.policy_id
)
new_schema_grants = installation_ctx.grants_crawler.for_schema_info(schema_a)

if {"USAGE", "OWN"} != new_schema_grants[migrated_group.name_in_account] or object_permissions[
migrated_group.name_in_account
] != PermissionLevel.CAN_USE:
installation_ctx.deployed_workflows.relay_logs("migrate-groups")
assert {"USAGE", "OWN"} == new_schema_grants[
migrated_group.name_in_account
], "Incorrect schema grants for migrated group"
assert (
object_permissions[migrated_group.name_in_account] == PermissionLevel.CAN_USE
), "Incorrect permissions for migrated group"
Loading