From 6545a01d552974413947b1d42c3dff33a20783fc Mon Sep 17 00:00:00 2001 From: renardeinside Date: Wed, 26 Jul 2023 11:53:25 +0200 Subject: [PATCH 1/5] update readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 6d67ffac2b..e3d9c6b81b 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ Compute infrastructure: Workflows: - [x] Delta Live Tables -- [ ] Jobs +- [x] Jobs ML: From 356a325a69227837ec93914f9d039d1364f19cc4 Mon Sep 17 00:00:00 2001 From: renardeinside Date: Wed, 26 Jul 2023 13:02:05 +0200 Subject: [PATCH 2/5] add tests --- README.md | 2 +- .../managers/inventory/permissions.py | 6 +++ .../managers/inventory/types.py | 1 + tests/integration/conftest.py | 37 ++++++++++++++++++- 4 files changed, 44 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e3d9c6b81b..d60cc96ba9 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ Workflows: ML: -- [ ] MLflow experiments +- [x] MLflow experiments - [ ] MLflow registry - [ ] Legacy Mlflow model endpoints (?) diff --git a/src/uc_migration_toolkit/managers/inventory/permissions.py b/src/uc_migration_toolkit/managers/inventory/permissions.py index a38a8dce45..56939ea1c5 100644 --- a/src/uc_migration_toolkit/managers/inventory/permissions.py +++ b/src/uc_migration_toolkit/managers/inventory/permissions.py @@ -70,6 +70,12 @@ def get_inventorizers(): listing_function=provider.ws.jobs.list, id_attribute="job_id", ), + StandardInventorizer( + logical_object_type=LogicalObjectType.EXPERIMENT, + request_object_type=RequestObjectType.EXPERIMENTS, + listing_function=provider.ws.experiments.list_experiments, + id_attribute="experiment_id", + ), ] def inventorize_permissions(self): diff --git a/src/uc_migration_toolkit/managers/inventory/types.py b/src/uc_migration_toolkit/managers/inventory/types.py index bb592215a8..e42cd69fcf 100644 --- a/src/uc_migration_toolkit/managers/inventory/types.py +++ b/src/uc_migration_toolkit/managers/inventory/types.py @@ -39,6 +39,7 @@ def __repr__(self): class LogicalObjectType(StrEnum): + EXPERIMENT = "EXPERIMENT" JOB = "JOB" PIPELINE = "PIPELINE" CLUSTER = "CLUSTER" diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 008c1c879a..0a311072aa 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -7,6 +7,7 @@ import pytest from _pytest.fixtures import SubRequest from databricks.sdk import AccountClient +from databricks.sdk.core import DatabricksError from databricks.sdk.service.compute import ( ClusterDetails, CreateInstancePoolResponse, @@ -14,6 +15,7 @@ ) from databricks.sdk.service.iam import PermissionLevel from databricks.sdk.service.jobs import CreateResponse +from databricks.sdk.service.ml import CreateExperimentResponse from databricks.sdk.service.pipelines import ( CreatePipelineResponse, NotebookLibrary, @@ -50,6 +52,7 @@ NUM_TEST_CLUSTER_POLICIES = os.environ.get("NUM_TEST_CLUSTER_POLICIES", 3) NUM_TEST_PIPELINES = os.environ.get("NUM_TEST_PIPELINES", 3) NUM_TEST_JOBS = os.environ.get("NUM_TEST_JOBS", 3) +NUM_TEST_EXPERIMENTS = os.environ.get("NUM_TEST_EXPERIMENTS", 3) NUM_THREADS = os.environ.get("NUM_TEST_THREADS", 20) DB_CONNECT_CLUSTER_NAME = os.environ.get("DB_CONNECT_CLUSTER_NAME", "ucx-integration-testing") @@ -332,9 +335,40 @@ def clusters(env: EnvironmentInfo, ws: ImprovedWorkspaceClient) -> list[ClusterD logger.debug("Test clusters deleted") +@pytest.fixture(scope="session", autouse=True) +def experiments(ws: ImprovedWorkspaceClient, env: EnvironmentInfo) -> list[CreateExperimentResponse]: + logger.debug("Creating test experiments") + + try: + ws.workspace.mkdirs("/experiments") + except DatabricksError: + pass + + test_experiments = [ + ws.experiments.create_experiment(name=f"/experiments/{env.test_uid}-test-{i}") + for i in range(NUM_TEST_EXPERIMENTS) + ] + + _set_random_permissions( + test_experiments, + "experiment_id", + RequestObjectType.EXPERIMENTS, + env, + ws, + permission_levels=[PermissionLevel.CAN_MANAGE, PermissionLevel.CAN_READ, PermissionLevel.CAN_EDIT], + ) + + yield test_experiments + + logger.debug("Deleting test experiments") + executables = [partial(ws.experiments.delete_experiment, e.experiment_id) for e in test_experiments] + Threader(executables).run() + logger.debug("Test experiments deleted") + + @pytest.fixture(scope="session", autouse=True) def verifiable_objects( - clusters, instance_pools, cluster_policies, pipelines, jobs + clusters, instance_pools, cluster_policies, pipelines, jobs, experiments ) -> tuple[list, str, RequestObjectType]: _verifiable_objects = [ (clusters, "cluster_id", RequestObjectType.CLUSTERS), @@ -342,6 +376,7 @@ def verifiable_objects( (cluster_policies, "policy_id", RequestObjectType.CLUSTER_POLICIES), (pipelines, "pipeline_id", RequestObjectType.PIPELINES), (jobs, "job_id", RequestObjectType.JOBS), + (experiments, "experiment_id", RequestObjectType.EXPERIMENTS), ] yield _verifiable_objects From 572620c80551bc54c18ce0a2279dd29082d4a2f9 Mon Sep 17 00:00:00 2001 From: renardeinside Date: Wed, 26 Jul 2023 13:45:02 +0200 Subject: [PATCH 3/5] add impl for models --- README.md | 3 +- .../managers/inventory/listing.py | 17 ++++++++ .../managers/inventory/permissions.py | 7 ++++ .../managers/inventory/types.py | 1 + tests/integration/conftest.py | 40 ++++++++++++++++++- tests/integration/test_e2e.py | 2 +- 6 files changed, 65 insertions(+), 5 deletions(-) create mode 100644 src/uc_migration_toolkit/managers/inventory/listing.py diff --git a/README.md b/README.md index d60cc96ba9..69eca232b2 100644 --- a/README.md +++ b/README.md @@ -65,8 +65,7 @@ Workflows: ML: - [x] MLflow experiments -- [ ] MLflow registry -- [ ] Legacy Mlflow model endpoints (?) +- [x] MLflow registry SQL: diff --git a/src/uc_migration_toolkit/managers/inventory/listing.py b/src/uc_migration_toolkit/managers/inventory/listing.py new file mode 100644 index 0000000000..6ca99ee848 --- /dev/null +++ b/src/uc_migration_toolkit/managers/inventory/listing.py @@ -0,0 +1,17 @@ +from collections.abc import Iterator + +from databricks.sdk.service.ml import ModelDatabricks + +from uc_migration_toolkit.providers.client import provider + + +class CustomListing: + """ + Provides utility functions for custom listing operations + """ + + @staticmethod + def list_models() -> Iterator[ModelDatabricks]: + for model in provider.ws.model_registry.list_models(): + model_with_id = provider.ws.model_registry.get_model(model.name).registered_model_databricks + yield model_with_id diff --git a/src/uc_migration_toolkit/managers/inventory/permissions.py b/src/uc_migration_toolkit/managers/inventory/permissions.py index 56939ea1c5..4d18ef8afa 100644 --- a/src/uc_migration_toolkit/managers/inventory/permissions.py +++ b/src/uc_migration_toolkit/managers/inventory/permissions.py @@ -6,6 +6,7 @@ from uc_migration_toolkit.managers.group import MigrationGroupsProvider from uc_migration_toolkit.managers.inventory.inventorizer import StandardInventorizer +from uc_migration_toolkit.managers.inventory.listing import CustomListing from uc_migration_toolkit.managers.inventory.table import InventoryTableManager from uc_migration_toolkit.managers.inventory.types import ( LogicalObjectType, @@ -76,6 +77,12 @@ def get_inventorizers(): listing_function=provider.ws.experiments.list_experiments, id_attribute="experiment_id", ), + StandardInventorizer( + logical_object_type=LogicalObjectType.MODEL, + request_object_type=RequestObjectType.REGISTERED_MODELS, + listing_function=CustomListing.list_models, + id_attribute="id", + ), ] def inventorize_permissions(self): diff --git a/src/uc_migration_toolkit/managers/inventory/types.py b/src/uc_migration_toolkit/managers/inventory/types.py index e42cd69fcf..7d29bea43d 100644 --- a/src/uc_migration_toolkit/managers/inventory/types.py +++ b/src/uc_migration_toolkit/managers/inventory/types.py @@ -39,6 +39,7 @@ def __repr__(self): class LogicalObjectType(StrEnum): + MODEL = "MODEL" EXPERIMENT = "EXPERIMENT" JOB = "JOB" PIPELINE = "PIPELINE" diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index 0a311072aa..a1f09578ec 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -15,7 +15,8 @@ ) from databricks.sdk.service.iam import PermissionLevel from databricks.sdk.service.jobs import CreateResponse -from databricks.sdk.service.ml import CreateExperimentResponse +from databricks.sdk.service.ml import CreateExperimentResponse, ModelDatabricks +from databricks.sdk.service.ml import PermissionLevel as ModelPermissionLevel from databricks.sdk.service.pipelines import ( CreatePipelineResponse, NotebookLibrary, @@ -53,6 +54,7 @@ NUM_TEST_PIPELINES = os.environ.get("NUM_TEST_PIPELINES", 3) NUM_TEST_JOBS = os.environ.get("NUM_TEST_JOBS", 3) NUM_TEST_EXPERIMENTS = os.environ.get("NUM_TEST_EXPERIMENTS", 3) +NUM_TEST_MODELS = os.environ.get("NUM_TEST_MODELS", 3) NUM_THREADS = os.environ.get("NUM_TEST_THREADS", 20) DB_CONNECT_CLUSTER_NAME = os.environ.get("DB_CONNECT_CLUSTER_NAME", "ucx-integration-testing") @@ -366,9 +368,42 @@ def experiments(ws: ImprovedWorkspaceClient, env: EnvironmentInfo) -> list[Creat logger.debug("Test experiments deleted") +@pytest.fixture(scope="session", autouse=True) +def models(ws: ImprovedWorkspaceClient, env: EnvironmentInfo) -> list[ModelDatabricks]: + logger.debug("Creating models") + + test_models: list[ModelDatabricks] = [ + ws.model_registry.get_model( + ws.model_registry.create_model(f"{env.test_uid}-test-{i}").registered_model.name + ).registered_model_databricks + for i in range(NUM_TEST_MODELS) + ] + + _set_random_permissions( + test_models, + "id", + RequestObjectType.REGISTERED_MODELS, + env, + ws, + permission_levels=[ + ModelPermissionLevel.CAN_READ, + ModelPermissionLevel.CAN_MANAGE, + ModelPermissionLevel.CAN_MANAGE_PRODUCTION_VERSIONS, + ModelPermissionLevel.CAN_MANAGE_STAGING_VERSIONS, + ], + ) + + yield test_models + + logger.debug("Deleting test models") + executables = [partial(provider.ws.model_registry.delete_model, m.name) for m in test_models] + Threader(executables).run() + logger.debug("Test models deleted") + + @pytest.fixture(scope="session", autouse=True) def verifiable_objects( - clusters, instance_pools, cluster_policies, pipelines, jobs, experiments + clusters, instance_pools, cluster_policies, pipelines, jobs, experiments, models ) -> tuple[list, str, RequestObjectType]: _verifiable_objects = [ (clusters, "cluster_id", RequestObjectType.CLUSTERS), @@ -377,6 +412,7 @@ def verifiable_objects( (pipelines, "pipeline_id", RequestObjectType.PIPELINES), (jobs, "job_id", RequestObjectType.JOBS), (experiments, "experiment_id", RequestObjectType.EXPERIMENTS), + (models, "id", RequestObjectType.REGISTERED_MODELS), ] yield _verifiable_objects diff --git a/tests/integration/test_e2e.py b/tests/integration/test_e2e.py index f5737eee3c..307be674d3 100644 --- a/tests/integration/test_e2e.py +++ b/tests/integration/test_e2e.py @@ -25,7 +25,7 @@ def _verify_group_permissions( toolkit: GroupMigrationToolkit, target: Literal["backup", "account"], ): - logger.debug("Verifying that the permissions were applied to backup groups") + logger.debug(f"Verifying that the permissions of object {request_object_type} were applied to {target} groups") for _object in objects: _object_permissions = ws.permissions.get(request_object_type, getattr(_object, id_attribute)) From ca06e34afbd1f72a5825c9389b28068e5b7b4ec8 Mon Sep 17 00:00:00 2001 From: renardeinside Date: Wed, 26 Jul 2023 13:46:56 +0200 Subject: [PATCH 4/5] switch to getattr function --- src/uc_migration_toolkit/managers/inventory/inventorizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/uc_migration_toolkit/managers/inventory/inventorizer.py b/src/uc_migration_toolkit/managers/inventory/inventorizer.py index 02c00f2fc0..0c1ded5768 100644 --- a/src/uc_migration_toolkit/managers/inventory/inventorizer.py +++ b/src/uc_migration_toolkit/managers/inventory/inventorizer.py @@ -49,7 +49,7 @@ def preload(self): def _process_single_object(self, _object: InventoryObject) -> PermissionsInventoryItem: permissions = self._permissions_function( - self._request_object_type, _object.__getattribute__(self._id_attribute) + self._request_object_type, getattr(_object, self._id_attribute) ) inventory_item = PermissionsInventoryItem( object_id=str(_object.__getattribute__(self._id_attribute)), From acb26a918ce36654b5c5b755a2d0bdf3e2a0f026 Mon Sep 17 00:00:00 2001 From: renardeinside Date: Wed, 26 Jul 2023 13:51:51 +0200 Subject: [PATCH 5/5] switch to getattr --- .../managers/inventory/inventorizer.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/uc_migration_toolkit/managers/inventory/inventorizer.py b/src/uc_migration_toolkit/managers/inventory/inventorizer.py index 0c1ded5768..a6996280c7 100644 --- a/src/uc_migration_toolkit/managers/inventory/inventorizer.py +++ b/src/uc_migration_toolkit/managers/inventory/inventorizer.py @@ -48,11 +48,10 @@ def preload(self): logger.info(f"Object metadata prepared for {len(self._objects)} objects.") def _process_single_object(self, _object: InventoryObject) -> PermissionsInventoryItem: - permissions = self._permissions_function( - self._request_object_type, getattr(_object, self._id_attribute) - ) + object_id = str(getattr(_object, self._id_attribute)) + permissions = self._permissions_function(self._request_object_type, object_id) inventory_item = PermissionsInventoryItem( - object_id=str(_object.__getattribute__(self._id_attribute)), + object_id=object_id, logical_object_type=self._logical_object_type, request_object_type=self._request_object_type, object_permissions=permissions.as_dict(),