From ccf20c55617bd5909e5ac27b758c70cf7f9bbd7e Mon Sep 17 00:00:00 2001 From: Serge Smertin Date: Thu, 24 Oct 2024 20:42:44 +0200 Subject: [PATCH] Use table ownership heuristics for migrating ACLs This PR prepends `OWN` grants when migrating ACLs for tables. For detailed logic on ownership heuristics, see #3066 --- .../labs/ucx/contexts/application.py | 12 ++++- .../labs/ucx/hive_metastore/ownership.py | 28 ++++++++++- tests/unit/progress/test_ownership.py | 49 +++++++++++++++++++ 3 files changed, 87 insertions(+), 2 deletions(-) create mode 100644 tests/unit/progress/test_ownership.py diff --git a/src/databricks/labs/ucx/contexts/application.py b/src/databricks/labs/ucx/contexts/application.py index 889fba1eec..acf2c3d6b9 100644 --- a/src/databricks/labs/ucx/contexts/application.py +++ b/src/databricks/labs/ucx/contexts/application.py @@ -44,7 +44,11 @@ ) from databricks.labs.ucx.hive_metastore.mapping import TableMapping from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex -from databricks.labs.ucx.hive_metastore.ownership import TableMigrationOwnership, TableOwnership +from databricks.labs.ucx.hive_metastore.ownership import ( + TableMigrationOwnership, + TableOwnership, + TableOwnershipGrantLoader, +) from databricks.labs.ucx.hive_metastore.table_migrate import ( TableMigrationStatusRefresher, TablesMigrator, @@ -316,9 +320,15 @@ def acl_migrator(self): self.config.inventory_database, ) + @cached_property + def table_ownership_grant_loader(self) -> TableOwnershipGrantLoader: + return TableOwnershipGrantLoader(self.tables_crawler, self.table_ownership) + @cached_property def migrate_grants(self) -> MigrateGrants: + # owner grants have to come first grant_loaders: list[Callable[[], Iterable[Grant]]] = [ + self.table_ownership_grant_loader.load, self.grants_crawler.snapshot, self.principal_acl.get_interactive_cluster_grants, ] diff --git a/src/databricks/labs/ucx/hive_metastore/ownership.py b/src/databricks/labs/ucx/hive_metastore/ownership.py index b11f5f6e81..1738d0441d 100644 --- a/src/databricks/labs/ucx/hive_metastore/ownership.py +++ b/src/databricks/labs/ucx/hive_metastore/ownership.py @@ -1,4 +1,5 @@ import logging +from collections.abc import Iterable from functools import cached_property from databricks.labs.ucx.framework.owners import ( @@ -8,7 +9,7 @@ WorkspacePathOwnership, ) from databricks.labs.ucx.hive_metastore import TablesCrawler -from databricks.labs.ucx.hive_metastore.grants import GrantsCrawler +from databricks.labs.ucx.hive_metastore.grants import GrantsCrawler, Grant from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationStatus from databricks.labs.ucx.hive_metastore.tables import Table from databricks.labs.ucx.source_code.base import UsedTable @@ -85,6 +86,31 @@ def _grants_snapshot(self): return self._grants_crawler.snapshot() +class TableOwnershipGrantLoader: + def __init__(self, tables_crawler: TablesCrawler, table_ownership: TableOwnership) -> None: + self._tables_crawler = tables_crawler + self._table_ownership = table_ownership + + def load(self) -> Iterable[Grant]: + for table in self._tables_crawler.snapshot(): + owner = self._table_ownership.owner_of(table) + table_name, view_name = self._names(table) + yield Grant( + principal=owner, + action_type='OWN', + catalog=table.catalog, + database=table.database, + table=table_name, + view=view_name, + ) + + @staticmethod + def _names(table: Table) -> tuple[str | None, str | None]: + if table.view_text: + return None, table.name + return table.name, None + + class TableMigrationOwnership(Ownership[TableMigrationStatus]): """Determine ownership of table migration records in the inventory. diff --git a/tests/unit/progress/test_ownership.py b/tests/unit/progress/test_ownership.py new file mode 100644 index 0000000000..5e990f3f1f --- /dev/null +++ b/tests/unit/progress/test_ownership.py @@ -0,0 +1,49 @@ +from unittest.mock import create_autospec + +from databricks.labs.ucx.hive_metastore.grants import Grant +from databricks.labs.ucx.hive_metastore.ownership import TableOwnership, TableOwnershipGrantLoader +from databricks.labs.ucx.hive_metastore.tables import Table, TablesCrawler + + +def test_table_ownership_grants(): + tables_crawler = create_autospec(TablesCrawler) + tables_crawler.snapshot.return_value = [ + Table( + catalog='a', + database='b', + name='c', + object_type='EXTERNAL', + table_format='PARQUET', + ), + Table( + catalog='b', + database='c', + name='d', + object_type='VIEW', + table_format='...', + view_text='...', + ), + ] + table_ownership = create_autospec(TableOwnership) + table_ownership.owner_of.return_value = 'somebody' + + loader = TableOwnershipGrantLoader(tables_crawler, table_ownership) + + grants = list(loader.load()) + + assert grants == [ + Grant( + principal='somebody', + action_type='OWN', + catalog='a', + database='b', + table='c', + ), + Grant( + principal='somebody', + action_type='OWN', + catalog='b', + database='c', + view='d', + ), + ]