Skip to content

Commit

Permalink
Organise modules by domain (#197)
Browse files Browse the repository at this point in the history
This simplifies code and moves modules and classes by their respective
domains/components, roughly mapped on folders. This is a change from the
previous diamond-shaped cross-module dependency layout, which is harder
to evolve.

Main tenets be:

- any component can depend on `mixins` (staging are for SDK) and
`framework` (common modules).
- tight coupling is allowed within a single component. An example is
local group migration, which exposes a single high-level migration
toolkit (`from databricks.labs.ucx.workspace_access import
GroupMigrationToolkit`) and leaves implementation details private.
Another example is a toolkit to migrate from HMS to UC DDL.
- all execution is either triggered from `install` (which also handles
update) or `runtime` (as the entrypoint for jobs). Multiple toolkits may
be combined into a single Databricks Workflows (aka Jobs) - e.g. pulling
permissions and tables inventory is necessary for the assessment step,
but later the Workflows may branch out.
- if two classes are used together in 90% of cases, they have to be
defined in the single file (aka Python Module). We don't define `types`
and `managers` modules/packages, as this is an example of a
diamond-shaped dependencies, which are more difficult to evolve over
time.
- methods and fields have to be private by default. make them public
only by necessity.
- classes must have the following order - __init__, overridden methods,
public methods, other methods. the most important logic has to be first.
  • Loading branch information
nfx authored and william-conti committed Sep 15, 2023
1 parent 5fb624d commit b66ce3e
Show file tree
Hide file tree
Showing 82 changed files with 1,114 additions and 1,298 deletions.
4 changes: 2 additions & 2 deletions notebooks/toolkit.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
MigrationConfig,
TaclConfig,
)
from databricks.labs.ucx.toolkits.group_migration import GroupMigrationToolkit
from databricks.labs.ucx.toolkits.table_acls import TaclToolkit
from databricks.labs.ucx.workspace_access import GroupMigrationToolkit
from databricks.labs.ucx.hive_metastore import TaclToolkit

# COMMAND ----------

Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -163,14 +163,14 @@ ban-relative-imports = "all"
"ARG001" # tests may not use the provided fixtures
]

"src/databricks/labs/ucx/providers/mixins/redash.py" = ["A002", "A003", "N815"]
"src/databricks/labs/ucx/mixins/redash.py" = ["A002", "A003", "N815"]

[tool.coverage.run]
branch = true
parallel = true

[tool.coverage.report]
omit = ["src/databricks/labs/ucx/providers/mixins/*"]
omit = ["src/databricks/labs/ucx/mixins/*"]
exclude_lines = [
"no cov",
"if __name__ == .__main__.:",
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
from databricks.labs.ucx.logger import _install
from databricks.labs.ucx.framework.logger import _install

_install()
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from databricks.sdk.service.compute import Language

from databricks.labs.ucx.assessment import commands
from databricks.labs.ucx.providers.mixins.compute import CommandExecutor
from databricks.labs.ucx.mixins.compute import CommandExecutor

logger = logging.getLogger(__name__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from databricks.sdk import WorkspaceClient

from databricks.labs.ucx.providers.mixins.sql import StatementExecutionExt
from databricks.labs.ucx.mixins.sql import StatementExecutionExt

logger = logging.getLogger(__name__)

Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
from concurrent.futures import ALL_COMPLETED, ThreadPoolExecutor
from typing import Generic, TypeVar

from databricks.labs.ucx.generic import StrEnum

ExecutableResult = TypeVar("ExecutableResult")
ExecutableFunction = Callable[..., ExecutableResult]
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -66,20 +64,5 @@ def run(self) -> list[ExecutableResult]:
return collected


class Request:
def __init__(self, req: dict):
self.request = req

def as_dict(self) -> dict:
return self.request


class WorkspaceLevelEntitlement(StrEnum):
WORKSPACE_ACCESS = "workspace-access"
DATABRICKS_SQL_ACCESS = "databricks-sql-access"
ALLOW_CLUSTER_CREATE = "allow-cluster-create"
ALLOW_INSTANCE_POOL_CREATE = "allow-instance-pool-create"


def noop():
pass
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pathlib import Path

from databricks.labs.ucx.config import MigrationConfig
from databricks.labs.ucx.logger import _install
from databricks.labs.ucx.framework.logger import _install

_TASKS: dict[str, "Task"] = {}

Expand Down
15 changes: 0 additions & 15 deletions src/databricks/labs/ucx/generic.py

This file was deleted.

3 changes: 3 additions & 0 deletions src/databricks/labs/ucx/hive_metastore/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from databricks.labs.ucx.hive_metastore.table_acls import TaclToolkit

__all__ = ["TaclToolkit"]
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
from dataclasses import dataclass
from functools import partial

from databricks.labs.ucx.tacl._internal import CrawlerBase
from databricks.labs.ucx.tacl.tables import TablesCrawler
from databricks.labs.ucx.utils import ThreadedExecution
from databricks.labs.ucx.framework.crawlers import CrawlerBase
from databricks.labs.ucx.framework.parallel import ThreadedExecution
from databricks.labs.ucx.hive_metastore.tables import TablesCrawler


@dataclass(frozen=True)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@

from databricks.sdk import WorkspaceClient

from databricks.labs.ucx.tacl._internal import (
from databricks.labs.ucx.framework.crawlers import (
RuntimeBackend,
SqlBackend,
StatementExecutionBackend,
)
from databricks.labs.ucx.tacl.grants import GrantsCrawler
from databricks.labs.ucx.tacl.tables import TablesCrawler
from databricks.labs.ucx.hive_metastore.grants import GrantsCrawler
from databricks.labs.ucx.hive_metastore.tables import TablesCrawler

logger = logging.getLogger(__name__)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@
from dataclasses import dataclass
from functools import partial

from databricks.labs.ucx.providers.mixins.sql import Row
from databricks.labs.ucx.tacl._internal import CrawlerBase, SqlBackend
from databricks.labs.ucx.utils import ThreadedExecution
from databricks.labs.ucx.framework.crawlers import CrawlerBase, SqlBackend
from databricks.labs.ucx.framework.parallel import ThreadedExecution
from databricks.labs.ucx.mixins.sql import Row

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -75,8 +75,7 @@ def __init__(self, backend: SqlBackend, catalog, schema):
Initializes a TablesCrawler instance.
Args:
ws (WorkspaceClient): The WorkspaceClient instance.
warehouse_id: The warehouse ID.
backend (SqlBackend): The SQL Execution Backend abstraction (either REST API or Spark)
catalog (str): The catalog name for the inventory persistence.
schema: The schema name for the inventory persistence.
"""
Expand Down
2 changes: 1 addition & 1 deletion src/databricks/labs/ucx/install.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@

from databricks.labs.ucx.__about__ import __version__
from databricks.labs.ucx.config import GroupsConfig, MigrationConfig, TaclConfig
from databricks.labs.ucx.framework.tasks import _TASKS
from databricks.labs.ucx.runtime import main
from databricks.labs.ucx.tasks import _TASKS

TAG_STEP = "step"
TAG_APP = "App"
Expand Down
1 change: 0 additions & 1 deletion src/databricks/labs/ucx/inventory/__init__.py

This file was deleted.

64 changes: 0 additions & 64 deletions src/databricks/labs/ucx/inventory/permissions.py

This file was deleted.

31 changes: 0 additions & 31 deletions src/databricks/labs/ucx/inventory/permissions_inventory.py

This file was deleted.

32 changes: 0 additions & 32 deletions src/databricks/labs/ucx/inventory/types.py

This file was deleted.

File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
33 changes: 0 additions & 33 deletions src/databricks/labs/ucx/providers/groups_info.py

This file was deleted.

6 changes: 3 additions & 3 deletions src/databricks/labs/ucx/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
from databricks.sdk import WorkspaceClient

from databricks.labs.ucx.config import MigrationConfig
from databricks.labs.ucx.tasks import task, trigger
from databricks.labs.ucx.toolkits.group_migration import GroupMigrationToolkit
from databricks.labs.ucx.toolkits.table_acls import TaclToolkit
from databricks.labs.ucx.framework.tasks import task, trigger
from databricks.labs.ucx.hive_metastore import TaclToolkit
from databricks.labs.ucx.workspace_access import GroupMigrationToolkit

logger = logging.getLogger(__name__)

Expand Down
Loading

0 comments on commit b66ce3e

Please sign in to comment.