Skip to content

Commit

Permalink
[chore] Rewrite multiline list comprehensions as for-loops and enforc…
Browse files Browse the repository at this point in the history
…e it as a linter rule (#2820)

Improve readability of parts of the codebase
  • Loading branch information
nfx authored Oct 3, 2024
1 parent 352126d commit 674e39f
Show file tree
Hide file tree
Showing 21 changed files with 143 additions and 151 deletions.
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ dependencies = [
"mypy~=1.9.0",
"pylint~=3.3.1",
"pylint-pytest==2.0.0a0",
"databricks-labs-pylint~=0.4.0",
"databricks-labs-pylint~=0.5",
"databricks-labs-pytester>=0.2.1",
"pytest~=8.3.3",
"pytest-cov~=4.1.0",
Expand Down Expand Up @@ -241,6 +241,7 @@ limit-inference-results = 100
load-plugins = [
"databricks.labs.pylint.mocking",
"databricks.labs.pylint.eradicate",
"databricks.labs.pylint.readability",
"pylint_pytest",
"pylint.extensions.bad_builtin",
"pylint.extensions.broad_try_clause",
Expand Down
13 changes: 8 additions & 5 deletions src/databricks/labs/ucx/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,11 +550,14 @@ def revert_cluster_remap(w: WorkspaceClient, prompts: Prompts):
"""Reverting Re-mapping of clusters from UC"""
logger.info("Reverting the Remapping of the Clusters from UC")
ctx = WorkspaceContext(w)
cluster_ids = [
cluster_files.path.split("/")[-1].split(".")[0]
for cluster_files in ctx.installation.files()
if cluster_files.path is not None and cluster_files.path.find("backup/clusters") > 0
]
cluster_ids = []
for cluster_files in ctx.installation.files():
if cluster_files.path is None:
continue
if cluster_files.path.find("backup/clusters") == 0:
continue
cluster_id = cluster_files.path.split("/")[-1].split(".")[0]
cluster_ids.append(cluster_id)
if not cluster_ids:
logger.info("There is no cluster files in the backup folder. Skipping the reverting process")
return
Expand Down
33 changes: 12 additions & 21 deletions src/databricks/labs/ucx/hive_metastore/grants.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,9 +623,12 @@ def _get_privilege(self, table: Table, locations: dict[str, str], mounts: list[M

def _get_database_grants(self, tables: list[Table], principals: list[str]) -> list[Grant]:
databases = {table.database for table in tables}
return [
Grant(principal, "USAGE", "hive_metastore", database) for database in databases for principal in principals
]
grants = []
for database in databases:
for principal in principals:
grant = Grant(principal, "USAGE", "hive_metastore", database)
grants.append(grant)
return grants

def _get_grants(
self, locations: dict[str, str], principals: list[str], tables: list[Table], mounts: list[Mount]
Expand All @@ -635,33 +638,21 @@ def _get_grants(
for table in tables:
privilege = self._get_privilege(table, locations, mounts)
if privilege == "READ_FILES":
grants.extend(
[Grant(principal, "SELECT", table.catalog, table.database, table.name) for principal in principals]
)
for principal in principals:
grants.append(Grant(principal, "SELECT", table.catalog, table.database, table.name))
filtered_tables.append(table)
continue
if privilege == "WRITE_FILES":
grants.extend(
[
Grant(principal, "ALL PRIVILEGES", table.catalog, table.database, table.name)
for principal in principals
]
)
for principal in principals:
grants.append(Grant(principal, "ALL PRIVILEGES", table.catalog, table.database, table.name))
filtered_tables.append(table)
continue
if table.view_text is not None:
grants.extend(
[
Grant(principal, "ALL PRIVILEGES", table.catalog, table.database, view=table.name)
for principal in principals
]
)
for principal in principals:
grants.append(Grant(principal, "ALL PRIVILEGES", table.catalog, table.database, view=table.name))
filtered_tables.append(table)

database_grants = self._get_database_grants(filtered_tables, principals)

grants.extend(database_grants)

return grants

def _get_cluster_principal_mapping(self, cluster_id: str, object_type: str) -> list[str]:
Expand Down
11 changes: 6 additions & 5 deletions src/databricks/labs/ucx/installer/logs.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,11 @@ def _record_task(self, task_name: str, log: TextIO, log_creation_timestamp: dt.d
log (TextIO): The task log
log_creation_timestamp (datetime.datetime): The log creation timestamp.
"""
log_records = [
LogRecord(
log_records = []
for partial_log_record in parse_logs(log):
if logging.getLevelName(partial_log_record.level) < logging.WARNING:
continue
log_record = LogRecord(
timestamp=int(
log_creation_timestamp.replace(
hour=partial_log_record.time.hour,
Expand All @@ -156,9 +159,7 @@ def _record_task(self, task_name: str, log: TextIO, log_creation_timestamp: dt.d
component=partial_log_record.component,
message=partial_log_record.message,
)
for partial_log_record in parse_logs(log)
if logging.getLevelName(partial_log_record.level) >= logging.WARNING
]
log_records.append(log_record)
return log_records

def snapshot(self) -> list[LogRecord]:
Expand Down
17 changes: 6 additions & 11 deletions src/databricks/labs/ucx/source_code/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,17 +91,12 @@ def register_dependency(self, dependency: Dependency) -> MaybeGraph:
if not container:
problem = DependencyProblem('dependency-register-failed', 'Failed to register dependency', dependency.path)
return MaybeGraph(child_graph, [problem])
problems = container.build_dependency_graph(child_graph)
return MaybeGraph(
child_graph,
[
dataclasses.replace(
problem,
source_path=dependency.path if problem.is_path_missing() else problem.source_path,
)
for problem in problems
],
)
problems = []
for problem in container.build_dependency_graph(child_graph):
if problem.is_path_missing():
problem = dataclasses.replace(problem, source_path=dependency.path)
problems.append(problem)
return MaybeGraph(child_graph, problems)

def locate_dependency(self, path: Path) -> MaybeGraph:
# need a list since unlike JS, Python won't let you assign closure variables
Expand Down
29 changes: 18 additions & 11 deletions src/databricks/labs/ucx/source_code/notebooks/cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,17 @@ def is_runnable(self) -> bool:
def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProblem]:
context = parent.new_dependency_graph_context()
analyzer = PythonCodeAnalyzer(context, self._original_code)
python_dependency_problems = analyzer.build_graph()
# Position information for the Python code is within the code and needs to be mapped to the location within the parent nodebook.
return [
dataclasses.replace(
# Position information for the Python code is within the code and needs to be mapped to the location within
# the parent nodebook.
problems = []
for problem in analyzer.build_graph():
problem = dataclasses.replace(
problem,
start_line=self.original_offset + problem.start_line,
end_line=self.original_offset + problem.end_line,
)
for problem in python_dependency_problems
]
problems.append(problem)
return problems

def build_inherited_context(self, graph: DependencyGraph, child_path: Path) -> InheritedContext:
context = graph.new_dependency_graph_context()
Expand Down Expand Up @@ -178,11 +179,17 @@ def build_dependency_graph(self, parent: DependencyGraph) -> list[DependencyProb
path, idx, line = self._read_notebook_path()
if path is not None:
start_line = self._original_offset + idx
problems = parent.register_notebook(path, True)
return [
dataclasses.replace(problem, start_line=start_line, start_col=0, end_line=start_line, end_col=len(line))
for problem in problems
]
problems = []
for problem in parent.register_notebook(path, True):
problem = dataclasses.replace(
problem,
start_line=start_line,
start_col=0,
end_line=start_line,
end_col=len(line),
)
problems.append(problem)
return problems
start_line = self._original_offset
problem = DependencyProblem(
'invalid-run-cell',
Expand Down
14 changes: 8 additions & 6 deletions src/databricks/labs/ucx/source_code/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,13 +78,15 @@ def refresh_report(self, sql_backend: SqlBackend, inventory_database: str):
)
# dump dfsas
assessment_end = datetime.now(timezone.utc)
all_dfsas = [
dataclasses.replace(
dfsa, assessment_start_timestamp=assessment_start, assessment_end_timestamp=assessment_end
processed = []
for dfsa in all_dfsas:
dfsa = dataclasses.replace(
dfsa,
assessment_start_timestamp=assessment_start,
assessment_end_timestamp=assessment_end,
)
for dfsa in all_dfsas
]
self._directfs_crawler.dump_all(all_dfsas)
processed.append(dfsa)
self._directfs_crawler.dump_all(processed)

def _dashboard_ids_in_scope(self) -> list[str]:
if self._include_dashboard_ids is not None: # an empty list is accepted
Expand Down
12 changes: 7 additions & 5 deletions src/databricks/labs/ucx/workspace_access/clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@ def __init__(self, installation: Installation, ws: WorkspaceClient, prompts: Pro
self._installation = installation

def list_cluster(self):
clusters = [
clusters
for clusters in self._ws.clusters.list()
if clusters.cluster_source is not None and clusters.cluster_source.name != "JOB"
]
clusters = []
for cluster_info in self._ws.clusters.list():
if cluster_info.cluster_source is None:
continue
if cluster_info.cluster_source.name == "JOB":
continue
clusters.append(cluster_info)
return clusters

def _get_access_mode(self, access_mode: str):
Expand Down
11 changes: 8 additions & 3 deletions src/databricks/labs/ucx/workspace_access/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,16 +414,21 @@ def inner() -> Iterator[ml.ModelDatabricks]:


def experiments_listing(ws: WorkspaceClient):
def _get_repo_nb_tag(experiment):
repo_nb_tag = []
for tag in experiment.tags:
if tag.key == "mlflow.experiment.sourceType" and tag.value == "REPO_NOTEBOOK":
repo_nb_tag.append(tag)
return repo_nb_tag

def inner() -> Iterator[ml.Experiment]:
for experiment in ws.experiments.list_experiments():
# We filter-out notebook-based experiments, because they are covered by notebooks listing in
# workspace-based notebook experiment
if experiment.tags:
nb_tag = [t for t in experiment.tags if t.key == "mlflow.experimentType" and t.value == "NOTEBOOK"]
# repo-based notebook experiment
repo_nb_tag = [
t for t in experiment.tags if t.key == "mlflow.experiment.sourceType" and t.value == "REPO_NOTEBOOK"
]
repo_nb_tag = _get_repo_nb_tag(experiment)
if nb_tag or repo_nb_tag:
continue

Expand Down
9 changes: 4 additions & 5 deletions src/databricks/labs/ucx/workspace_access/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -857,11 +857,10 @@ def _check_for_deleted_workspace_groups(
) -> None:
attributes = "id,displayName"
expected_deletions = {group.id_in_workspace for group in deleted_workspace_groups}
pending_deletions = [
GroupDeletionIncompleteError(group.id, group.display_name)
for group in self._list_workspace_groups("WorkspaceGroup", attributes)
if group.id in expected_deletions
]
pending_deletions = []
for group in self._list_workspace_groups("WorkspaceGroup", attributes):
if group.id in expected_deletions:
pending_deletions.append(GroupDeletionIncompleteError(group.id, group.display_name))
if pending_deletions:
if logger.isEnabledFor(still_present_log_level):
logger.log(
Expand Down
15 changes: 7 additions & 8 deletions tests/integration/aws/test_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,13 @@ def test_create_external_location(ws, env_or_skip, make_random, inventory_schema
aws_cli_ctx.principal_acl,
)
external_location_migration.run()
external_location = [
external_location
for external_location in list(ws.external_locations.list())
if external_location.name == f"bucket{rand}_folder1"
]
assert len(external_location) == 1
assert external_location[0].url == f"s3://bucket{rand}/FOLDER1"
assert external_location[0].credential_name == f"ucx_{rand}"
out = []
for external_location in list(ws.external_locations.list()):
if external_location.name == f"bucket{rand}_folder1":
out.append(external_location)
assert len(out) == 1
assert out[0].url == f"s3://bucket{rand}/FOLDER1"
assert out[0].credential_name == f"ucx_{rand}"


def test_create_uber_instance_profile(ws, env_or_skip, make_random, make_cluster_policy, aws_cli_ctx):
Expand Down
25 changes: 13 additions & 12 deletions tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,19 +334,20 @@ def get_azure_spark_conf():
class StaticTablesCrawler(TablesCrawler):
def __init__(self, sb: SqlBackend, schema: str, tables: list[TableInfo]):
super().__init__(sb, schema)
self._tables = [
Table(
catalog=_.catalog_name,
database=_.schema_name,
name=_.name,
object_type=f"{_.table_type.value}",
view_text=_.view_definition,
location=_.storage_location,
table_format=f"{_.data_source_format.value}" if _.table_type.value != "VIEW" else "",
# type: ignore[arg-type]
self._tables = []
for _ in tables:
self._tables.append(
Table(
catalog=_.catalog_name,
database=_.schema_name,
name=_.name,
object_type=f"{_.table_type.value}",
view_text=_.view_definition,
location=_.storage_location,
table_format=f"{_.data_source_format.value}" if _.table_type.value != "VIEW" else "",
# type: ignore[arg-type]
)
)
for _ in tables
]

def snapshot(self, *, force_refresh: bool = False) -> list[Table]:
return self._tables
Expand Down
6 changes: 3 additions & 3 deletions tests/integration/hive_metastore/test_migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -457,9 +457,9 @@ def test_mapping_skips_tables_databases(ws, sql_backend, runtime_ctx, make_catal
)

dst_catalog = make_catalog()
dst_schemas = [
runtime_ctx.make_schema(catalog_name=dst_catalog.name, name=src_schema.name) for src_schema in src_schemas
]
dst_schemas = []
for src_schema in src_schemas:
dst_schemas.append(runtime_ctx.make_schema(catalog_name=dst_catalog.name, name=src_schema.name))

rules = [
Rule.from_src_dst(src_tables[0], dst_schemas[0]),
Expand Down
9 changes: 4 additions & 5 deletions tests/integration/hive_metastore/test_table_move.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,10 @@ def test_move_tables_no_from_schema(ws, sql_backend, make_random, make_catalog,
to_catalog = make_catalog()
table_move = TableMove(ws, sql_backend)
table_move.move(from_catalog.name, from_schema, "*", to_catalog.name, from_schema, del_table=False)
rec_results = [
rec.message
for rec in caplog.records
if f"schema {from_schema} not found in catalog {from_catalog.name}" in rec.message
]
rec_results = []
for rec in caplog.records:
if f"schema {from_schema} not found in catalog {from_catalog.name}" in rec.message:
rec_results.append(rec.message)
assert len(rec_results) == 1


Expand Down
15 changes: 5 additions & 10 deletions tests/integration/source_code/test_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,13 +681,8 @@ def test_job_dependency_problem_egg_dbr14plus(make_job, make_directory, simple_c
j = make_job(libraries=[library])

problems, *_ = simple_ctx.workflow_linter.lint_job(j.job_id)
assert (
len(
[
problem
for problem in problems
if problem.message == "Installing eggs is no longer supported on Databricks 14.0 or higher"
]
)
== 1
)
actual = []
for problem in problems:
if problem.message == "Installing eggs is no longer supported on Databricks 14.0 or higher":
actual.append(problem)
assert len(actual) == 1
17 changes: 8 additions & 9 deletions tests/integration/workspace_access/test_permissions_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,12 @@ def object_types(self) -> set[str]:
]
assert snapshot == expected

saved = [
Permissions(*row)
for row in sql_backend.fetch(
f"SELECT object_id, object_type, raw\n"
f"FROM {inventory_schema}.permissions\n"
f"ORDER BY object_id\n"
f"LIMIT {len(expected)+1}"
)
]
saved = []
for row in sql_backend.fetch(
f"SELECT object_id, object_type, raw\n"
f"FROM {inventory_schema}.permissions\n"
f"ORDER BY object_id\n"
f"LIMIT {len(expected)+1}"
):
saved.append(Permissions(**row))
assert saved == expected
Loading

0 comments on commit 674e39f

Please sign in to comment.