Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More ownership classes #2999

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion src/databricks/labs/ucx/assessment/clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ def _try_fetch(self) -> Iterable[ClusterInfo]:
yield ClusterInfo(*row)


class ClusterOwnership(Ownership[ClusterInfo]):
class ClusterInfoOwnership(Ownership[ClusterInfo]):
"""Determine ownership of clusters in the inventory.

This is the cluster creator (if known).
Expand All @@ -192,6 +192,16 @@ def _maybe_direct_owner(self, record: ClusterInfo) -> str | None:
return record.creator


class ClusterDetailsOwnership(Ownership[ClusterDetails]):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

where is it coming from? we don't have a concept of cluster details in our object model - we just have a cluster.

"""Determine ownership of clusters in the workspace.

This is the cluster creator (if known).
"""

def _maybe_direct_owner(self, record: ClusterDetails) -> str | None:
return record.creator_user_name


@dataclass
class PolicyInfo:
policy_id: str
Expand Down
13 changes: 12 additions & 1 deletion src/databricks/labs/ucx/assessment/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
RunType,
SparkJarTask,
SqlTask,
Job,
)

from databricks.labs.ucx.assessment.clusters import CheckClusterMixin
Expand Down Expand Up @@ -149,7 +150,7 @@ def _check_jar_task(self, all_task: list[RunTask]) -> list[str]:
return task_failures


class JobOwnership(Ownership[JobInfo]):
class JobInfoOwnership(Ownership[JobInfo]):
"""Determine ownership of jobs (workflows) in the inventory.

This is the job creator (if known).
Expand All @@ -159,6 +160,16 @@ def _maybe_direct_owner(self, record: JobInfo) -> str | None:
return record.creator


class JobOwnership(Ownership[Job]):
"""Determine ownership of jobs (workflows) in the workspace.

This is the job creator (if known).
"""

def _maybe_direct_owner(self, record: Job) -> str | None:
return record.creator_user_name


@dataclass
class SubmitRunInfo:
run_ids: str # JSON-encoded list of run ids
Expand Down
12 changes: 8 additions & 4 deletions tests/integration/assessment/test_clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
from databricks.labs.ucx.assessment.clusters import (
ClustersCrawler,
PoliciesCrawler,
ClusterOwnership,
ClusterDetailsOwnership,
ClusterInfoOwnership,
ClusterPolicyOwnership,
)

Expand Down Expand Up @@ -76,9 +77,12 @@ def test_cluster_ownership(ws, runtime_ctx, make_cluster, make_user, inventory_s

# Verify ownership is as expected.
administrator_locator = runtime_ctx.administrator_locator
ownership = ClusterOwnership(administrator_locator)
assert ownership.owner_of(my_cluster_record) == ws.current_user.me().user_name
assert ownership.owner_of(their_cluster_record) == another_user.user_name
info_ownership = ClusterInfoOwnership(administrator_locator)
assert info_ownership.owner_of(my_cluster_record) == ws.current_user.me().user_name
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ws.current_user.me() is a REST call, so it's probably polite to factor that out as a local variable rather than call it twice.

assert info_ownership.owner_of(their_cluster_record) == another_user.user_name
details_ownership = ClusterDetailsOwnership(administrator_locator)
assert details_ownership.owner_of(ws.clusters.get(my_cluster.cluster_id)) == ws.current_user.me().user_name
assert details_ownership.owner_of(ws.clusters.get(their_cluster.cluster_id)) == another_user.user_name


def test_cluster_crawler_mlr_no_isolation(ws, make_cluster, inventory_schema, sql_backend):
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/assessment/test_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from databricks.sdk.service.jobs import NotebookTask, RunTask
from databricks.sdk.service.workspace import ImportFormat

from databricks.labs.ucx.assessment.jobs import JobOwnership, JobsCrawler, SubmitRunsCrawler
from databricks.labs.ucx.assessment.jobs import JobInfoOwnership, JobsCrawler, SubmitRunsCrawler

from .test_assessment import _SPARK_CONF

Expand Down Expand Up @@ -80,5 +80,5 @@ def test_job_ownership(ws, runtime_ctx, make_job, inventory_schema, sql_backend)
job_record = next(record for record in records if record.job_id == str(job.job_id))

# Verify ownership is as expected.
ownership = JobOwnership(runtime_ctx.administrator_locator)
ownership = JobInfoOwnership(runtime_ctx.administrator_locator)
assert ownership.owner_of(job_record) == ws.current_user.me().user_name
34 changes: 28 additions & 6 deletions tests/unit/assessment/test_clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
from databricks.labs.ucx.assessment.azure import AzureServicePrincipalCrawler
from databricks.labs.ucx.assessment.clusters import (
ClustersCrawler,
PoliciesCrawler,
ClusterOwnership,
ClusterDetailsOwnership,
ClusterInfoOwnership,
ClusterInfo,
ClusterPolicyOwnership,
PoliciesCrawler,
PolicyInfo,
)
from databricks.labs.ucx.framework.crawlers import SqlBackend
Expand Down Expand Up @@ -185,27 +186,48 @@ def test_unsupported_clusters():
assert result_set[0].failures == '["cluster type not supported : LEGACY_PASSTHROUGH"]'


def test_cluster_owner_creator() -> None:
def test_cluster_info_owner_creator() -> None:
admin_locator = create_autospec(AdministratorLocator)

ownership = ClusterOwnership(admin_locator)
ownership = ClusterInfoOwnership(admin_locator)
owner = ownership.owner_of(ClusterInfo(creator="bob", cluster_id="1", success=1, failures="[]"))

assert owner == "bob"
admin_locator.get_workspace_administrator.assert_not_called()


def test_cluster_owner_creator_unknown() -> None:
def test_cluster_info_owner_creator_unknown() -> None:
admin_locator = create_autospec(AdministratorLocator)
admin_locator.get_workspace_administrator.return_value = "an_admin"

ownership = ClusterOwnership(admin_locator)
ownership = ClusterInfoOwnership(admin_locator)
owner = ownership.owner_of(ClusterInfo(creator=None, cluster_id="1", success=1, failures="[]"))

assert owner == "an_admin"
admin_locator.get_workspace_administrator.assert_called_once()


def test_cluster_details_owner_creator() -> None:
admin_locator = create_autospec(AdministratorLocator)

ownership = ClusterDetailsOwnership(admin_locator)
owner = ownership.owner_of(ClusterDetails(creator_user_name="bob", cluster_id="1"))

assert owner == "bob"
admin_locator.get_workspace_administrator.assert_not_called()


def test_cluster_details_owner_creator_unknown() -> None:
admin_locator = create_autospec(AdministratorLocator)
admin_locator.get_workspace_administrator.return_value = "an_admin"

ownership = ClusterDetailsOwnership(admin_locator)
owner = ownership.owner_of(ClusterDetails(cluster_id="1"))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know the default for creator_name is None, but I think I prefer to explicitly provide it because it's an important detail for what we're testing.


assert owner == "an_admin"
admin_locator.get_workspace_administrator.assert_called_once()


def test_policy_crawler():
ws = mock_workspace_client(
policy_ids=['single-user-with-spn', 'single-user-with-spn-policyid', 'single-user-with-spn-no-sparkversion'],
Expand Down
33 changes: 27 additions & 6 deletions tests/unit/assessment/test_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

import pytest
from databricks.labs.lsql.backends import MockBackend
from databricks.sdk.service.jobs import BaseJob, JobSettings
from databricks.sdk.service.jobs import BaseJob, JobSettings, Job

from databricks.labs.ucx.assessment.jobs import JobInfo, JobOwnership, JobsCrawler, SubmitRunsCrawler
from databricks.labs.ucx.assessment.jobs import JobInfo, JobInfoOwnership, JobsCrawler, SubmitRunsCrawler, JobOwnership
from databricks.labs.ucx.framework.owners import AdministratorLocator

from .. import mock_workspace_client
Expand Down Expand Up @@ -135,22 +135,43 @@ def test_job_run_crawler(jobruns_ids, cluster_ids, run_ids, failures):
assert result[0].failures == failures


def test_pipeline_owner_creator() -> None:
def test_jobinfo_owner_creator() -> None:
admin_locator = create_autospec(AdministratorLocator)

ownership = JobOwnership(admin_locator)
ownership = JobInfoOwnership(admin_locator)
owner = ownership.owner_of(JobInfo(creator="bob", job_id="1", success=1, failures="[]"))

assert owner == "bob"
admin_locator.get_workspace_administrator.assert_not_called()


def test_pipeline_owner_creator_unknown() -> None:
def test_jobinfo_owner_creator_unknown() -> None:
admin_locator = create_autospec(AdministratorLocator)
admin_locator.get_workspace_administrator.return_value = "an_admin"

ownership = JobOwnership(admin_locator)
ownership = JobInfoOwnership(admin_locator)
owner = ownership.owner_of(JobInfo(creator=None, job_id="1", success=1, failures="[]"))

assert owner == "an_admin"
admin_locator.get_workspace_administrator.assert_called_once()


def test_job_owner_creator() -> None:
admin_locator = create_autospec(AdministratorLocator)

ownership = JobOwnership(admin_locator)
owner = ownership.owner_of(Job(creator_user_name="bob", job_id=1))

assert owner == "bob"
admin_locator.get_workspace_administrator.assert_not_called()


def test_job_owner_creator_unknown() -> None:
admin_locator = create_autospec(AdministratorLocator)
admin_locator.get_workspace_administrator.return_value = "an_admin"

ownership = JobOwnership(admin_locator)
owner = ownership.owner_of(Job(job_id=1))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likewise here with creator_user_name=None.


assert owner == "an_admin"
admin_locator.get_workspace_administrator.assert_called_once()