From 4fd9b2134c09ce28f65caba05205a173a1588754 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 25 Oct 2024 15:05:54 +0200 Subject: [PATCH 01/15] Assign TODO @JCZuurmond --- src/databricks/labs/ucx/contexts/workflow_task.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/contexts/workflow_task.py b/src/databricks/labs/ucx/contexts/workflow_task.py index 7243567fdf..e72928258e 100644 --- a/src/databricks/labs/ucx/contexts/workflow_task.py +++ b/src/databricks/labs/ucx/contexts/workflow_task.py @@ -233,7 +233,7 @@ def tables_progress(self) -> ProgressEncoder[Table]: @cached_property def historical_table_migration_log(self) -> ProgressEncoder[TableMigrationStatus]: - # TODO: merge into tables_progress + # TODO: @JCZuurmond merge into tables_progress return ProgressEncoder( self.sql_backend, self.table_migration_ownership, From f06f7ff2db706a7b5bd334f9d12faf23b0257521 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 25 Oct 2024 15:12:12 +0200 Subject: [PATCH 02/15] Add table progress encoder --- src/databricks/labs/ucx/progress/tables.py | 41 ++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 src/databricks/labs/ucx/progress/tables.py diff --git a/src/databricks/labs/ucx/progress/tables.py b/src/databricks/labs/ucx/progress/tables.py new file mode 100644 index 0000000000..168ce5e53e --- /dev/null +++ b/src/databricks/labs/ucx/progress/tables.py @@ -0,0 +1,41 @@ +from databricks.labs.lsql.backends import SqlBackend + +from databricks.labs.ucx.hive_metastore.tables import Table +from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex +from databricks.labs.ucx.hive_metastore.ownership import TableOwnership +from databricks.labs.ucx.progress.history import ProgressEncoder +from databricks.labs.ucx.progress.grants import GrantsProgressEncoder + + +class TableProgressEncoder(ProgressEncoder[Table]): + """Encoder class:Table to class:History. + + A progress failure for a table means: + - the table is not migrated yet + - the associated grants have a failure + """ + + def __init__( + self, + sql_backend: SqlBackend, + ownership: TableOwnership, + table_migration_index: TableMigrationIndex, + grants_progress_encoder: GrantsProgressEncoder, + run_id: int, + workspace_id: int, + catalog: str, + schema: str = "multiworkspace", + table: str = "historical", + ) -> None: + super().__init__( + sql_backend, + ownership, + Table, + run_id, + workspace_id, + catalog, + schema, + table, + ) + self._table_migration_index = table_migration_index + self._grants_progress_encoder = grants_progress_encoder From 480ac92f02e7e5d147e0b2c3e6c2c144174ad60b Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 25 Oct 2024 15:15:35 +0200 Subject: [PATCH 03/15] Add TableProgressEncoder to workflow task --- src/databricks/labs/ucx/contexts/workflow_task.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/databricks/labs/ucx/contexts/workflow_task.py b/src/databricks/labs/ucx/contexts/workflow_task.py index e72928258e..620896f803 100644 --- a/src/databricks/labs/ucx/contexts/workflow_task.py +++ b/src/databricks/labs/ucx/contexts/workflow_task.py @@ -23,11 +23,13 @@ from databricks.labs.ucx.hive_metastore import TablesInMounts, TablesCrawler from databricks.labs.ucx.hive_metastore.grants import Grant from databricks.labs.ucx.hive_metastore.table_size import TableSizeCrawler -from databricks.labs.ucx.hive_metastore.tables import FasterTableScanCrawler, Table +from databricks.labs.ucx.hive_metastore.tables import FasterTableScanCrawler from databricks.labs.ucx.hive_metastore.udfs import Udf from databricks.labs.ucx.installer.logs import TaskRunWarningRecorder +from databricks.labs.ucx.progress.grants import GrantsProgressEncoder from databricks.labs.ucx.progress.history import ProgressEncoder from databricks.labs.ucx.progress.jobs import JobsProgressEncoder +from databricks.labs.ucx.progress.tables import TableProgressEncoder from databricks.labs.ucx.progress.workflow_runs import WorkflowRunRecorder # As with GlobalContext, service factories unavoidably have a lot of public methods. @@ -188,8 +190,8 @@ def policies_progress(self) -> ProgressEncoder[PolicyInfo]: ) @cached_property - def grants_progress(self) -> ProgressEncoder[Grant]: - return ProgressEncoder( + def grants_progress(self) -> GrantsProgressEncoder: + return GrantsProgressEncoder( self.sql_backend, self.grant_ownership, Grant, @@ -221,11 +223,12 @@ def pipelines_progress(self) -> ProgressEncoder[PipelineInfo]: ) @cached_property - def tables_progress(self) -> ProgressEncoder[Table]: - return ProgressEncoder( + def tables_progress(self) -> TableProgressEncoder: + return TableProgressEncoder( self.sql_backend, self.table_ownership, - Table, + self.migration_status_refresher.index(force_refresh=False), + self.grants_progress, self.parent_run_id, self.workspace_id, self.config.ucx_catalog, From 4dc8bd44104d94270fe0981a72b0dae6f2844410 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 25 Oct 2024 15:16:11 +0200 Subject: [PATCH 04/15] Disable too many arguments --- src/databricks/labs/ucx/progress/tables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/progress/tables.py b/src/databricks/labs/ucx/progress/tables.py index 168ce5e53e..237dd6d370 100644 --- a/src/databricks/labs/ucx/progress/tables.py +++ b/src/databricks/labs/ucx/progress/tables.py @@ -15,7 +15,7 @@ class TableProgressEncoder(ProgressEncoder[Table]): - the associated grants have a failure """ - def __init__( + def __init__( # pylint disable=too-many-arguments self, sql_backend: SqlBackend, ownership: TableOwnership, From 4a7b2fefa6aeb2e71040555628c81ecb85f39308 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 25 Oct 2024 15:19:42 +0200 Subject: [PATCH 05/15] Keep consistent singular/plural --- src/databricks/labs/ucx/contexts/workflow_task.py | 6 +++--- src/databricks/labs/ucx/progress/grants.py | 2 +- src/databricks/labs/ucx/progress/tables.py | 6 +++--- tests/unit/progress/test_grants.py | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/src/databricks/labs/ucx/contexts/workflow_task.py b/src/databricks/labs/ucx/contexts/workflow_task.py index 620896f803..2678123475 100644 --- a/src/databricks/labs/ucx/contexts/workflow_task.py +++ b/src/databricks/labs/ucx/contexts/workflow_task.py @@ -26,7 +26,7 @@ from databricks.labs.ucx.hive_metastore.tables import FasterTableScanCrawler from databricks.labs.ucx.hive_metastore.udfs import Udf from databricks.labs.ucx.installer.logs import TaskRunWarningRecorder -from databricks.labs.ucx.progress.grants import GrantsProgressEncoder +from databricks.labs.ucx.progress.grants import GrantProgressEncoder from databricks.labs.ucx.progress.history import ProgressEncoder from databricks.labs.ucx.progress.jobs import JobsProgressEncoder from databricks.labs.ucx.progress.tables import TableProgressEncoder @@ -190,8 +190,8 @@ def policies_progress(self) -> ProgressEncoder[PolicyInfo]: ) @cached_property - def grants_progress(self) -> GrantsProgressEncoder: - return GrantsProgressEncoder( + def grants_progress(self) -> GrantProgressEncoder: + return GrantProgressEncoder( self.sql_backend, self.grant_ownership, Grant, diff --git a/src/databricks/labs/ucx/progress/grants.py b/src/databricks/labs/ucx/progress/grants.py index 47828dfaec..a8053a4986 100644 --- a/src/databricks/labs/ucx/progress/grants.py +++ b/src/databricks/labs/ucx/progress/grants.py @@ -5,7 +5,7 @@ from databricks.labs.ucx.progress.install import Historical -class GrantsProgressEncoder(ProgressEncoder[Grant]): +class GrantProgressEncoder(ProgressEncoder[Grant]): """Encoder class:Grant to class:History. A failure for a grants implies it cannot be mapped to Unity Catalog. diff --git a/src/databricks/labs/ucx/progress/tables.py b/src/databricks/labs/ucx/progress/tables.py index 237dd6d370..52736dd140 100644 --- a/src/databricks/labs/ucx/progress/tables.py +++ b/src/databricks/labs/ucx/progress/tables.py @@ -4,7 +4,7 @@ from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex from databricks.labs.ucx.hive_metastore.ownership import TableOwnership from databricks.labs.ucx.progress.history import ProgressEncoder -from databricks.labs.ucx.progress.grants import GrantsProgressEncoder +from databricks.labs.ucx.progress.grants import GrantProgressEncoder class TableProgressEncoder(ProgressEncoder[Table]): @@ -20,7 +20,7 @@ def __init__( # pylint disable=too-many-arguments sql_backend: SqlBackend, ownership: TableOwnership, table_migration_index: TableMigrationIndex, - grants_progress_encoder: GrantsProgressEncoder, + grant_progress_encoder: GrantProgressEncoder, run_id: int, workspace_id: int, catalog: str, @@ -38,4 +38,4 @@ def __init__( # pylint disable=too-many-arguments table, ) self._table_migration_index = table_migration_index - self._grants_progress_encoder = grants_progress_encoder + self._grant_progress_encoder = grant_progress_encoder diff --git a/tests/unit/progress/test_grants.py b/tests/unit/progress/test_grants.py index 0d7c3a5957..bafcc85aca 100644 --- a/tests/unit/progress/test_grants.py +++ b/tests/unit/progress/test_grants.py @@ -5,7 +5,7 @@ from databricks.labs.ucx.framework.owners import Ownership from databricks.labs.ucx.framework.utils import escape_sql_identifier from databricks.labs.ucx.hive_metastore.grants import Grant -from databricks.labs.ucx.progress.grants import GrantsProgressEncoder +from databricks.labs.ucx.progress.grants import GrantProgressEncoder @pytest.mark.parametrize( @@ -20,7 +20,7 @@ def test_grants_progress_encoder_no_failures(mock_backend, grant: Grant) -> None: ownership = create_autospec(Ownership) ownership.owner_of.return_value = "user" - encoder = GrantsProgressEncoder( + encoder = GrantProgressEncoder( mock_backend, ownership, Grant, @@ -53,7 +53,7 @@ def test_grants_progress_encoder_no_failures(mock_backend, grant: Grant) -> None def test_grants_progress_encoder_failures(mock_backend, grant: Grant, failure: str) -> None: ownership = create_autospec(Ownership) ownership.owner_of.return_value = "user" - encoder = GrantsProgressEncoder( + encoder = GrantProgressEncoder( mock_backend, ownership, Grant, From 246ed8124dee414439892307a60131b94b1764d3 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 25 Oct 2024 15:29:41 +0200 Subject: [PATCH 06/15] Test table migration index is used to encode table --- src/databricks/labs/ucx/progress/tables.py | 12 ++++++- tests/unit/progress/test_tables.py | 42 ++++++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 tests/unit/progress/test_tables.py diff --git a/src/databricks/labs/ucx/progress/tables.py b/src/databricks/labs/ucx/progress/tables.py index 52736dd140..02fcb8ba6a 100644 --- a/src/databricks/labs/ucx/progress/tables.py +++ b/src/databricks/labs/ucx/progress/tables.py @@ -1,10 +1,13 @@ +from dataclasses import replace + from databricks.labs.lsql.backends import SqlBackend from databricks.labs.ucx.hive_metastore.tables import Table from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex from databricks.labs.ucx.hive_metastore.ownership import TableOwnership -from databricks.labs.ucx.progress.history import ProgressEncoder from databricks.labs.ucx.progress.grants import GrantProgressEncoder +from databricks.labs.ucx.progress.history import ProgressEncoder +from databricks.labs.ucx.progress.install import Historical class TableProgressEncoder(ProgressEncoder[Table]): @@ -39,3 +42,10 @@ def __init__( # pylint disable=too-many-arguments ) self._table_migration_index = table_migration_index self._grant_progress_encoder = grant_progress_encoder + + def _encode_record_as_historical(self, record: Table) -> Historical: + historical = super()._encode_record_as_historical(record) + failures = [] + if not self._table_migration_index.is_migrated(record.database, record.name): + failures.append(["Pending migration"]) + return replace(historical, failures=historical.failures + failures) diff --git a/tests/unit/progress/test_tables.py b/tests/unit/progress/test_tables.py new file mode 100644 index 0000000000..638ed96e18 --- /dev/null +++ b/tests/unit/progress/test_tables.py @@ -0,0 +1,42 @@ +from unittest.mock import create_autospec + +import pytest + +from databricks.labs.ucx.framework.owners import Ownership +from databricks.labs.ucx.framework.utils import escape_sql_identifier +from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex +from databricks.labs.ucx.hive_metastore.tables import Table +from databricks.labs.ucx.progress.grants import GrantProgressEncoder +from databricks.labs.ucx.progress.tables import TableProgressEncoder + + +@pytest.mark.parametrize( + "table", + [ + Table("hive_metastore", "schema", "table", "MANAGED", "DELTA"), + ], +) +def test_table_progress_encoder_no_failures(mock_backend, table: Table) -> None: + ownership = create_autospec(Ownership) + ownership.owner_of.return_value = "user" + table_migration_index = create_autospec(TableMigrationIndex) + table_migration_index.is_migrated.return_value = True + grant_progress_encoder = create_autospec(GrantProgressEncoder) + encoder = TableProgressEncoder( + mock_backend, + ownership, + table_migration_index, + grant_progress_encoder, + run_id=1, + workspace_id=123456789, + catalog="test", + ) + + encoder.append_inventory_snapshot([table]) + + rows = mock_backend.rows_written_for(escape_sql_identifier(encoder.full_name), "append") + assert len(rows) > 0, f"No rows written for: {encoder.full_name}" + assert len(rows[0].failures) == 0 + ownership.owner_of.assert_called_once() + table_migration_index.is_migrated.assert_called_with(table.database, table.name) + grant_progress_encoder.assert_not_called() From a7e329cc75ca36ee03c75afca43351b8ccab9be9 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 25 Oct 2024 15:31:18 +0200 Subject: [PATCH 07/15] Test pending migration failure --- src/databricks/labs/ucx/progress/tables.py | 2 +- tests/unit/progress/test_tables.py | 32 ++++++++++++++++++++++ 2 files changed, 33 insertions(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/progress/tables.py b/src/databricks/labs/ucx/progress/tables.py index 02fcb8ba6a..5a02610563 100644 --- a/src/databricks/labs/ucx/progress/tables.py +++ b/src/databricks/labs/ucx/progress/tables.py @@ -47,5 +47,5 @@ def _encode_record_as_historical(self, record: Table) -> Historical: historical = super()._encode_record_as_historical(record) failures = [] if not self._table_migration_index.is_migrated(record.database, record.name): - failures.append(["Pending migration"]) + failures.append("Pending migration") return replace(historical, failures=historical.failures + failures) diff --git a/tests/unit/progress/test_tables.py b/tests/unit/progress/test_tables.py index 638ed96e18..3fa44ac98c 100644 --- a/tests/unit/progress/test_tables.py +++ b/tests/unit/progress/test_tables.py @@ -40,3 +40,35 @@ def test_table_progress_encoder_no_failures(mock_backend, table: Table) -> None: ownership.owner_of.assert_called_once() table_migration_index.is_migrated.assert_called_with(table.database, table.name) grant_progress_encoder.assert_not_called() + + +@pytest.mark.parametrize( + "table", + [ + Table("hive_metastore", "schema", "table", "MANAGED", "DELTA"), + ], +) +def test_table_progress_encoder_pending_migration_failure(mock_backend, table: Table) -> None: + ownership = create_autospec(Ownership) + ownership.owner_of.return_value = "user" + table_migration_index = create_autospec(TableMigrationIndex) + table_migration_index.is_migrated.return_value = False + grant_progress_encoder = create_autospec(GrantProgressEncoder) + encoder = TableProgressEncoder( + mock_backend, + ownership, + table_migration_index, + grant_progress_encoder, + run_id=1, + workspace_id=123456789, + catalog="test", + ) + + encoder.append_inventory_snapshot([table]) + + rows = mock_backend.rows_written_for(escape_sql_identifier(encoder.full_name), "append") + assert len(rows) > 0, f"No rows written for: {encoder.full_name}" + assert rows[0].failures == ["Pending migration"] + ownership.owner_of.assert_called_once() + table_migration_index.is_migrated.assert_called_with(table.database, table.name) + grant_progress_encoder.assert_not_called() From 396c40c3762b0a5ab7b36f729c22eb5a755f137e Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 25 Oct 2024 15:45:23 +0200 Subject: [PATCH 08/15] Add init to GrantProgressEncoder --- .../labs/ucx/contexts/workflow_task.py | 2 -- src/databricks/labs/ucx/progress/grants.py | 16 +++++++++++++++- tests/unit/progress/test_grants.py | 2 -- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/databricks/labs/ucx/contexts/workflow_task.py b/src/databricks/labs/ucx/contexts/workflow_task.py index 2678123475..690dde4b54 100644 --- a/src/databricks/labs/ucx/contexts/workflow_task.py +++ b/src/databricks/labs/ucx/contexts/workflow_task.py @@ -21,7 +21,6 @@ from databricks.labs.ucx.config import WorkspaceConfig from databricks.labs.ucx.contexts.application import GlobalContext from databricks.labs.ucx.hive_metastore import TablesInMounts, TablesCrawler -from databricks.labs.ucx.hive_metastore.grants import Grant from databricks.labs.ucx.hive_metastore.table_size import TableSizeCrawler from databricks.labs.ucx.hive_metastore.tables import FasterTableScanCrawler from databricks.labs.ucx.hive_metastore.udfs import Udf @@ -194,7 +193,6 @@ def grants_progress(self) -> GrantProgressEncoder: return GrantProgressEncoder( self.sql_backend, self.grant_ownership, - Grant, self.parent_run_id, self.workspace_id, self.config.ucx_catalog, diff --git a/src/databricks/labs/ucx/progress/grants.py b/src/databricks/labs/ucx/progress/grants.py index a8053a4986..5807799fa2 100644 --- a/src/databricks/labs/ucx/progress/grants.py +++ b/src/databricks/labs/ucx/progress/grants.py @@ -1,6 +1,8 @@ from dataclasses import replace -from databricks.labs.ucx.hive_metastore.grants import Grant +from databricks.labs.lsql.backends import SqlBackend + +from databricks.labs.ucx.hive_metastore.grants import Grant, GrantOwnership from databricks.labs.ucx.progress.history import ProgressEncoder from databricks.labs.ucx.progress.install import Historical @@ -11,6 +13,18 @@ class GrantProgressEncoder(ProgressEncoder[Grant]): A failure for a grants implies it cannot be mapped to Unity Catalog. """ + def __init__( + self, + sql_backend: SqlBackend, + ownership: GrantOwnership, + run_id: int, + workspace_id: int, + catalog: str, + schema: str = "multiworkspace", + table: str = "historical", + ) -> None: + super().__init__(sql_backend, ownership, Grant, run_id, workspace_id, catalog, schema, table) + def _encode_record_as_historical(self, record: Grant) -> Historical: historical = super()._encode_record_as_historical(record) failures = [] diff --git a/tests/unit/progress/test_grants.py b/tests/unit/progress/test_grants.py index bafcc85aca..45dda396cb 100644 --- a/tests/unit/progress/test_grants.py +++ b/tests/unit/progress/test_grants.py @@ -23,7 +23,6 @@ def test_grants_progress_encoder_no_failures(mock_backend, grant: Grant) -> None encoder = GrantProgressEncoder( mock_backend, ownership, - Grant, run_id=1, workspace_id=123456789, catalog="test", @@ -56,7 +55,6 @@ def test_grants_progress_encoder_failures(mock_backend, grant: Grant, failure: s encoder = GrantProgressEncoder( mock_backend, ownership, - Grant, run_id=1, workspace_id=123456789, catalog="test", From ae4bdc4eae5901f540868b20909adc8bab4bdf51 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 25 Oct 2024 15:45:44 +0200 Subject: [PATCH 09/15] Fix test name --- tests/unit/progress/test_grants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/progress/test_grants.py b/tests/unit/progress/test_grants.py index 45dda396cb..35ab48f8ea 100644 --- a/tests/unit/progress/test_grants.py +++ b/tests/unit/progress/test_grants.py @@ -17,7 +17,7 @@ Grant("principal", "USAGE", "catalog"), ], ) -def test_grants_progress_encoder_no_failures(mock_backend, grant: Grant) -> None: +def test_grant_progress_encoder_no_failures(mock_backend, grant: Grant) -> None: ownership = create_autospec(Ownership) ownership.owner_of.return_value = "user" encoder = GrantProgressEncoder( @@ -49,7 +49,7 @@ def test_grants_progress_encoder_no_failures(mock_backend, grant: Grant) -> None ), ], ) -def test_grants_progress_encoder_failures(mock_backend, grant: Grant, failure: str) -> None: +def test_grant_progress_encoder_failures(mock_backend, grant: Grant, failure: str) -> None: ownership = create_autospec(Ownership) ownership.owner_of.return_value = "user" encoder = GrantProgressEncoder( From c0e58a98bb03932519f169de4767b15c6c132789 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 25 Oct 2024 15:46:09 +0200 Subject: [PATCH 10/15] Fix disable too many arguments comment --- src/databricks/labs/ucx/progress/tables.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/progress/tables.py b/src/databricks/labs/ucx/progress/tables.py index 5a02610563..ec05761ffb 100644 --- a/src/databricks/labs/ucx/progress/tables.py +++ b/src/databricks/labs/ucx/progress/tables.py @@ -18,7 +18,7 @@ class TableProgressEncoder(ProgressEncoder[Table]): - the associated grants have a failure """ - def __init__( # pylint disable=too-many-arguments + def __init__( # pylint: disable=too-many-arguments self, sql_backend: SqlBackend, ownership: TableOwnership, From 63e080da26583cd4cfc0f3d01ec1b5663f5be64c Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 25 Oct 2024 15:52:10 +0200 Subject: [PATCH 11/15] Move refresh table migration status before update tables history log --- src/databricks/labs/ucx/progress/workflows.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/databricks/labs/ucx/progress/workflows.py b/src/databricks/labs/ucx/progress/workflows.py index ec172d6682..7ebb8af15b 100644 --- a/src/databricks/labs/ucx/progress/workflows.py +++ b/src/databricks/labs/ucx/progress/workflows.py @@ -47,6 +47,11 @@ def crawl_tables(self, ctx: RuntimeContext) -> None: ctx.tables_crawler.snapshot(force_refresh=True) @job_task(depends_on=[verify_prerequisites, crawl_tables], job_cluster="table_migration") + def refresh_table_migration_status(self, ctx: RuntimeContext) -> None: + """Scan the tables (and views) in the inventory and record whether each has been migrated or not.""" + ctx.migration_status_refresher.snapshot(force_refresh=True) + + @job_task(depends_on=[verify_prerequisites, crawl_tables, refresh_table_migration_status], job_cluster="table_migration") def update_tables_history_log(self, ctx: RuntimeContext) -> None: """Update the history log with the latest tables inventory snapshot.""" # The table migration cluster is not legacy-ACL enabled, so we can't crawl from here. @@ -136,16 +141,6 @@ def crawl_cluster_policies(self, ctx: RuntimeContext) -> None: cluster_policies_snapshot = ctx.policies_crawler.snapshot(force_refresh=True) history_log.append_inventory_snapshot(cluster_policies_snapshot) - @job_task(depends_on=[verify_prerequisites, crawl_tables, verify_prerequisites], job_cluster="table_migration") - def refresh_table_migration_status(self, ctx: RuntimeContext) -> None: - """Scan the tables (and views) in the inventory and record whether each has been migrated or not. - - The results of the scan are stored in the `$inventory.migration_status` inventory table. - """ - history_log = ctx.historical_table_migration_log - migration_status_snapshot = ctx.migration_status_refresher.snapshot(force_refresh=True) - history_log.append_inventory_snapshot(migration_status_snapshot) - @job_task(depends_on=[verify_prerequisites]) def assess_dashboards(self, ctx: RuntimeContext): """Scans all dashboards for migration issues in SQL code of embedded widgets. From db2a3e203c2c80e5c7c6db7b0b66951408cabc70 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 25 Oct 2024 15:52:58 +0200 Subject: [PATCH 12/15] Remove table migration status historical encoder --- src/databricks/labs/ucx/contexts/workflow_task.py | 12 ------------ tests/unit/progress/test_workflows.py | 5 ----- 2 files changed, 17 deletions(-) diff --git a/src/databricks/labs/ucx/contexts/workflow_task.py b/src/databricks/labs/ucx/contexts/workflow_task.py index 690dde4b54..e60e646668 100644 --- a/src/databricks/labs/ucx/contexts/workflow_task.py +++ b/src/databricks/labs/ucx/contexts/workflow_task.py @@ -232,18 +232,6 @@ def tables_progress(self) -> TableProgressEncoder: self.config.ucx_catalog, ) - @cached_property - def historical_table_migration_log(self) -> ProgressEncoder[TableMigrationStatus]: - # TODO: @JCZuurmond merge into tables_progress - return ProgressEncoder( - self.sql_backend, - self.table_migration_ownership, - TableMigrationStatus, - self.parent_run_id, - self.workspace_id, - self.config.ucx_catalog, - ) - @cached_property def udfs_progress(self) -> ProgressEncoder[Udf]: return ProgressEncoder( diff --git a/tests/unit/progress/test_workflows.py b/tests/unit/progress/test_workflows.py index f7f7a81b21..6ce5174847 100644 --- a/tests/unit/progress/test_workflows.py +++ b/tests/unit/progress/test_workflows.py @@ -26,11 +26,6 @@ RuntimeContext.policies_crawler, RuntimeContext.policies_progress, ), - ( - MigrationProgress.refresh_table_migration_status, - RuntimeContext.migration_status_refresher, - RuntimeContext.historical_table_migration_log, - ), ), ) def test_migration_progress_runtime_refresh(run_workflow, task, crawler, history_log) -> None: From 5c39208d3287e05cf6fc829b39646db631fbce1c Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Fri, 25 Oct 2024 16:26:28 +0200 Subject: [PATCH 13/15] Format --- src/databricks/labs/ucx/contexts/workflow_task.py | 1 - src/databricks/labs/ucx/progress/workflows.py | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/databricks/labs/ucx/contexts/workflow_task.py b/src/databricks/labs/ucx/contexts/workflow_task.py index e60e646668..387a0858b9 100644 --- a/src/databricks/labs/ucx/contexts/workflow_task.py +++ b/src/databricks/labs/ucx/contexts/workflow_task.py @@ -3,7 +3,6 @@ from databricks.labs.blueprint.installation import Installation from databricks.labs.lsql.backends import RuntimeBackend, SqlBackend -from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationStatus from databricks.sdk import WorkspaceClient, core from databricks.labs.ucx.__about__ import __version__ diff --git a/src/databricks/labs/ucx/progress/workflows.py b/src/databricks/labs/ucx/progress/workflows.py index 7ebb8af15b..060b2fdccf 100644 --- a/src/databricks/labs/ucx/progress/workflows.py +++ b/src/databricks/labs/ucx/progress/workflows.py @@ -51,7 +51,9 @@ def refresh_table_migration_status(self, ctx: RuntimeContext) -> None: """Scan the tables (and views) in the inventory and record whether each has been migrated or not.""" ctx.migration_status_refresher.snapshot(force_refresh=True) - @job_task(depends_on=[verify_prerequisites, crawl_tables, refresh_table_migration_status], job_cluster="table_migration") + @job_task( + depends_on=[verify_prerequisites, crawl_tables, refresh_table_migration_status], job_cluster="table_migration" + ) def update_tables_history_log(self, ctx: RuntimeContext) -> None: """Update the history log with the latest tables inventory snapshot.""" # The table migration cluster is not legacy-ACL enabled, so we can't crawl from here. From 50347f5e28302a664799defab116c19e76515b2d Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 28 Oct 2024 13:30:35 +0100 Subject: [PATCH 14/15] Remove table progress encoder dependency on grants --- .../labs/ucx/contexts/workflow_task.py | 1 - src/databricks/labs/ucx/progress/tables.py | 5 +---- tests/unit/progress/test_tables.py | 16 ++-------------- 3 files changed, 3 insertions(+), 19 deletions(-) diff --git a/src/databricks/labs/ucx/contexts/workflow_task.py b/src/databricks/labs/ucx/contexts/workflow_task.py index 387a0858b9..d840a6b087 100644 --- a/src/databricks/labs/ucx/contexts/workflow_task.py +++ b/src/databricks/labs/ucx/contexts/workflow_task.py @@ -225,7 +225,6 @@ def tables_progress(self) -> TableProgressEncoder: self.sql_backend, self.table_ownership, self.migration_status_refresher.index(force_refresh=False), - self.grants_progress, self.parent_run_id, self.workspace_id, self.config.ucx_catalog, diff --git a/src/databricks/labs/ucx/progress/tables.py b/src/databricks/labs/ucx/progress/tables.py index ec05761ffb..dfda0552d8 100644 --- a/src/databricks/labs/ucx/progress/tables.py +++ b/src/databricks/labs/ucx/progress/tables.py @@ -5,7 +5,6 @@ from databricks.labs.ucx.hive_metastore.tables import Table from databricks.labs.ucx.hive_metastore.table_migration_status import TableMigrationIndex from databricks.labs.ucx.hive_metastore.ownership import TableOwnership -from databricks.labs.ucx.progress.grants import GrantProgressEncoder from databricks.labs.ucx.progress.history import ProgressEncoder from databricks.labs.ucx.progress.install import Historical @@ -18,12 +17,11 @@ class TableProgressEncoder(ProgressEncoder[Table]): - the associated grants have a failure """ - def __init__( # pylint: disable=too-many-arguments + def __init__( self, sql_backend: SqlBackend, ownership: TableOwnership, table_migration_index: TableMigrationIndex, - grant_progress_encoder: GrantProgressEncoder, run_id: int, workspace_id: int, catalog: str, @@ -41,7 +39,6 @@ def __init__( # pylint: disable=too-many-arguments table, ) self._table_migration_index = table_migration_index - self._grant_progress_encoder = grant_progress_encoder def _encode_record_as_historical(self, record: Table) -> Historical: historical = super()._encode_record_as_historical(record) diff --git a/tests/unit/progress/test_tables.py b/tests/unit/progress/test_tables.py index 3fa44ac98c..a859bf5c03 100644 --- a/tests/unit/progress/test_tables.py +++ b/tests/unit/progress/test_tables.py @@ -23,13 +23,7 @@ def test_table_progress_encoder_no_failures(mock_backend, table: Table) -> None: table_migration_index.is_migrated.return_value = True grant_progress_encoder = create_autospec(GrantProgressEncoder) encoder = TableProgressEncoder( - mock_backend, - ownership, - table_migration_index, - grant_progress_encoder, - run_id=1, - workspace_id=123456789, - catalog="test", + mock_backend, ownership, table_migration_index, run_id=1, workspace_id=123456789, catalog="test" ) encoder.append_inventory_snapshot([table]) @@ -55,13 +49,7 @@ def test_table_progress_encoder_pending_migration_failure(mock_backend, table: T table_migration_index.is_migrated.return_value = False grant_progress_encoder = create_autospec(GrantProgressEncoder) encoder = TableProgressEncoder( - mock_backend, - ownership, - table_migration_index, - grant_progress_encoder, - run_id=1, - workspace_id=123456789, - catalog="test", + mock_backend, ownership, table_migration_index, run_id=1, workspace_id=123456789, catalog="test" ) encoder.append_inventory_snapshot([table]) From 92f4e4b3774313766c8a569194949bb5e5fd1044 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 28 Oct 2024 13:34:44 +0100 Subject: [PATCH 15/15] Document why we encode Table to historical --- src/databricks/labs/ucx/progress/tables.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/databricks/labs/ucx/progress/tables.py b/src/databricks/labs/ucx/progress/tables.py index dfda0552d8..6dc76132e2 100644 --- a/src/databricks/labs/ucx/progress/tables.py +++ b/src/databricks/labs/ucx/progress/tables.py @@ -41,6 +41,12 @@ def __init__( self._table_migration_index = table_migration_index def _encode_record_as_historical(self, record: Table) -> Historical: + """Encode record as historical. + + A table failure means that the table is pending migration. Grants are purposefully lef out, because a grant + might not be mappable to UC, like `READ_METADATA`, thus possibly resulting in false "pending migration" failure + for tables that are migrated to UC with their relevant grants also being migrated. + """ historical = super()._encode_record_as_historical(record) failures = [] if not self._table_migration_index.is_migrated(record.database, record.name):