From e44630d598d11d9dd7cc08f50e5fff5d7d7ea338 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 21 Oct 2024 08:38:04 +0200 Subject: [PATCH 01/11] Replace finished TODO with docstring --- src/databricks/labs/ucx/source_code/python_libraries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/source_code/python_libraries.py b/src/databricks/labs/ucx/source_code/python_libraries.py index 4313e7feeb..60d6eb8fbf 100644 --- a/src/databricks/labs/ucx/source_code/python_libraries.py +++ b/src/databricks/labs/ucx/source_code/python_libraries.py @@ -22,7 +22,7 @@ class PythonLibraryResolver(LibraryResolver): - # TODO: https://github.com/databrickslabs/ucx/issues/1640 + """Resolve python libraries by registering and installing Python libraries.""" def __init__( self, From 2177ec563c729a5a1007d3ff27d7490e1ac7edc3 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 21 Oct 2024 09:01:59 +0200 Subject: [PATCH 02/11] Add notebook that install pytest twice --- .../source_code/samples/pip_install_pytest_twice.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 tests/unit/source_code/samples/pip_install_pytest_twice.py diff --git a/tests/unit/source_code/samples/pip_install_pytest_twice.py b/tests/unit/source_code/samples/pip_install_pytest_twice.py new file mode 100644 index 0000000000..bf805b633a --- /dev/null +++ b/tests/unit/source_code/samples/pip_install_pytest_twice.py @@ -0,0 +1,13 @@ +# Databricks notebook source + +# COMMAND ---------- + +# MAGIC %pip install pytest + +# COMMAND ---------- + +# MAGIC %pip install pytest + +# COMMAND ---------- + +import pytest From 9cf8fee296cd02fccfa166a62906b19f01db87f3 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 21 Oct 2024 09:08:30 +0200 Subject: [PATCH 03/11] Install python twice with index url The index url is required to avoid local pytest --- tests/integration/source_code/test_libraries.py | 17 +++++++++++++++++ .../samples/pip_install_pytest_twice.py | 4 ++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/tests/integration/source_code/test_libraries.py b/tests/integration/source_code/test_libraries.py index 39192f226f..27db817a9b 100644 --- a/tests/integration/source_code/test_libraries.py +++ b/tests/integration/source_code/test_libraries.py @@ -56,3 +56,20 @@ def test_build_notebook_dependency_graphs_fails_installing_when_spaces(simple_ct assert not maybe.problems assert maybe.graph.all_relative_names() == {f"{notebook}.py", "thingy/__init__.py"} + + +def test_build_notebook_dependency_graphs_when_installing_pytest_twice(simple_ctx) -> None: + ctx = simple_ctx.replace(path_lookup=MockPathLookup()) + maybe = ctx.dependency_resolver.build_notebook_dependency_graph( + Path("pip_install_pytest_twice"), CurrentSessionState() + ) + assert not maybe.problems + + +def test_build_notebook_dependency_graphs_when_installing_pytest_twice_alternative(simple_ctx): + ctx = simple_ctx.replace(path_lookup=MockPathLookup()) + for _ in range(2): + maybe = ctx.dependency_resolver.build_notebook_dependency_graph( + Path("pip_install_pytest_with_index_url"), CurrentSessionState() + ) + assert not maybe.problems diff --git a/tests/unit/source_code/samples/pip_install_pytest_twice.py b/tests/unit/source_code/samples/pip_install_pytest_twice.py index bf805b633a..dd7a7ca107 100644 --- a/tests/unit/source_code/samples/pip_install_pytest_twice.py +++ b/tests/unit/source_code/samples/pip_install_pytest_twice.py @@ -2,11 +2,11 @@ # COMMAND ---------- -# MAGIC %pip install pytest +# MAGIC %pip install pytest --index-url https://pypi.python.org/simple # COMMAND ---------- -# MAGIC %pip install pytest +# MAGIC %pip install pytest --index-url https://pypi.python.org/simple # COMMAND ---------- From bbf81eee4ecfc001c06de17602f337f18e1fe082 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 21 Oct 2024 09:30:01 +0200 Subject: [PATCH 04/11] Handle installing libraries twice --- .../labs/ucx/source_code/python_libraries.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/src/databricks/labs/ucx/source_code/python_libraries.py b/src/databricks/labs/ucx/source_code/python_libraries.py index 60d6eb8fbf..c086604055 100644 --- a/src/databricks/labs/ucx/source_code/python_libraries.py +++ b/src/databricks/labs/ucx/source_code/python_libraries.py @@ -61,13 +61,22 @@ def _install_library(self, path_lookup: PathLookup, *libraries: str) -> list[Dep return self._install_egg(*resolved_libraries) return self._install_pip(*resolved_libraries) - @staticmethod - def _resolve_libraries(path_lookup: PathLookup, *libraries: str) -> list[str]: - # Resolve relative pip installs from notebooks: %pip install ../../foo.whl + def _resolve_libraries(self, path_lookup: PathLookup, *libraries: str) -> list[str]: + """Resolve pip installs as library (pip install databricks-labs-ucx) or as path (pip install ../../ucx.whl). + + A library is defined as library, i.e. *not* as path, when the library: + 1. Is not found in the path lookup. + 2. Is not located in the temporary virtual environment where this resolver install libraries. If this is the + case, it signals the library is installed for a second time. + + Otherwise, we consider the library to be a path. + + Note: The above works given our design choice to *only* build dependency graphs when all dependencies are found. + """ libs = [] for library in libraries: maybe_library = path_lookup.resolve(Path(library)) - if maybe_library is None: + if maybe_library is None or maybe_library.is_relative_to(self._temporary_virtual_environment): libs.append(library) else: libs.append(maybe_library.as_posix()) From 77b0ee3a0d34f839ee0f0b80e51ed148e2396ec1 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 21 Oct 2024 09:35:42 +0200 Subject: [PATCH 05/11] Test installing library twice with different example notebooks --- tests/integration/source_code/test_libraries.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/integration/source_code/test_libraries.py b/tests/integration/source_code/test_libraries.py index 27db817a9b..f8c2a400d8 100644 --- a/tests/integration/source_code/test_libraries.py +++ b/tests/integration/source_code/test_libraries.py @@ -66,10 +66,18 @@ def test_build_notebook_dependency_graphs_when_installing_pytest_twice(simple_ct assert not maybe.problems -def test_build_notebook_dependency_graphs_when_installing_pytest_twice_alternative(simple_ctx): +@pytest.mark.parametrize( + "notebook", + ( + "pip_install_demo_wheel", + "pip_install_multiple_packages", + "pip_install_pytest_with_index_url", + ), +) +def test_build_notebook_dependency_graphs_when_installing_notebooks_twice(simple_ctx, notebook) -> None: ctx = simple_ctx.replace(path_lookup=MockPathLookup()) for _ in range(2): maybe = ctx.dependency_resolver.build_notebook_dependency_graph( - Path("pip_install_pytest_with_index_url"), CurrentSessionState() + Path(notebook), CurrentSessionState() ) assert not maybe.problems From 6c9e4211d4736deac823b4f44076d1bff53ad1e0 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 21 Oct 2024 09:45:01 +0200 Subject: [PATCH 06/11] Format --- tests/integration/source_code/test_libraries.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/integration/source_code/test_libraries.py b/tests/integration/source_code/test_libraries.py index f8c2a400d8..c5b13ce821 100644 --- a/tests/integration/source_code/test_libraries.py +++ b/tests/integration/source_code/test_libraries.py @@ -81,3 +81,6 @@ def test_build_notebook_dependency_graphs_when_installing_notebooks_twice(simple Path(notebook), CurrentSessionState() ) assert not maybe.problems + for _ in range(2): + maybe = ctx.dependency_resolver.build_notebook_dependency_graph(Path(notebook), CurrentSessionState()) + assert not maybe.problems From 2cc256328d650640e2bfffd5dfd7cff34fc6aafe Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 21 Oct 2024 10:02:16 +0200 Subject: [PATCH 07/11] Find pip already exists warning --- tests/integration/source_code/test_libraries.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tests/integration/source_code/test_libraries.py b/tests/integration/source_code/test_libraries.py index c5b13ce821..10ff33146b 100644 --- a/tests/integration/source_code/test_libraries.py +++ b/tests/integration/source_code/test_libraries.py @@ -3,6 +3,8 @@ because it uses the context and the time it takes to run the test. """ +import logging +import re from pathlib import Path import pytest @@ -66,6 +68,11 @@ def test_build_notebook_dependency_graphs_when_installing_pytest_twice(simple_ct assert not maybe.problems +PIP_ALREADY_EXISTS_WARNING = re.compile( + r".*WARNING: Target directory .+ already exists\. Specify --upgrade to force replacement.*" +) + + @pytest.mark.parametrize( "notebook", ( @@ -74,13 +81,10 @@ def test_build_notebook_dependency_graphs_when_installing_pytest_twice(simple_ct "pip_install_pytest_with_index_url", ), ) -def test_build_notebook_dependency_graphs_when_installing_notebooks_twice(simple_ctx, notebook) -> None: +def test_build_notebook_dependency_graphs_when_installing_notebooks_twice(caplog, simple_ctx, notebook) -> None: ctx = simple_ctx.replace(path_lookup=MockPathLookup()) - for _ in range(2): - maybe = ctx.dependency_resolver.build_notebook_dependency_graph( - Path(notebook), CurrentSessionState() - ) - assert not maybe.problems + with caplog.at_level(logging.DEBUG, logger="databricks.labs.ucx.source_code.python_libraries"): for _ in range(2): maybe = ctx.dependency_resolver.build_notebook_dependency_graph(Path(notebook), CurrentSessionState()) assert not maybe.problems + assert not PIP_ALREADY_EXISTS_WARNING.match(caplog.text.replace("\n", " ")), "Pip already exists warning detected" From 0cc766a2602e24a27118ba8bd5b5561ac2a9518d Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 21 Oct 2024 10:09:25 +0200 Subject: [PATCH 08/11] Add upgrade flag to pip install command --- src/databricks/labs/ucx/source_code/python_libraries.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/databricks/labs/ucx/source_code/python_libraries.py b/src/databricks/labs/ucx/source_code/python_libraries.py index c086604055..6c33b6941d 100644 --- a/src/databricks/labs/ucx/source_code/python_libraries.py +++ b/src/databricks/labs/ucx/source_code/python_libraries.py @@ -90,6 +90,7 @@ def _install_pip(self, *libraries: str) -> list[DependencyProblem]: *libraries, "-t", str(self._temporary_virtual_environment), + "--upgrade", # Upgrades when library already installed ] return_code, stdout, stderr = self._runner(args) logger.debug(f"pip output:\n{stdout}\n{stderr}") From 8896b21240fb23b5512ebd480f623dfda2d9a14c Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 21 Oct 2024 10:25:13 +0200 Subject: [PATCH 09/11] Print pip output only when there is any --- src/databricks/labs/ucx/source_code/python_libraries.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/databricks/labs/ucx/source_code/python_libraries.py b/src/databricks/labs/ucx/source_code/python_libraries.py index 6c33b6941d..d470cee209 100644 --- a/src/databricks/labs/ucx/source_code/python_libraries.py +++ b/src/databricks/labs/ucx/source_code/python_libraries.py @@ -93,7 +93,8 @@ def _install_pip(self, *libraries: str) -> list[DependencyProblem]: "--upgrade", # Upgrades when library already installed ] return_code, stdout, stderr = self._runner(args) - logger.debug(f"pip output:\n{stdout}\n{stderr}") + if stdout or stderr: + logger.debug(f"pip output:\n{stdout}\n{stderr}") if return_code != 0: command = " ".join(args) problem = DependencyProblem("library-install-failed", f"'{command}' failed with '{stderr}'") From 958d4492e52167e1038b85cda29a541d3cc45c86 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 21 Oct 2024 10:29:42 +0200 Subject: [PATCH 10/11] Move pip already exists warning check to other test --- .../integration/source_code/test_libraries.py | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/tests/integration/source_code/test_libraries.py b/tests/integration/source_code/test_libraries.py index 10ff33146b..457d65239b 100644 --- a/tests/integration/source_code/test_libraries.py +++ b/tests/integration/source_code/test_libraries.py @@ -60,17 +60,18 @@ def test_build_notebook_dependency_graphs_fails_installing_when_spaces(simple_ct assert maybe.graph.all_relative_names() == {f"{notebook}.py", "thingy/__init__.py"} -def test_build_notebook_dependency_graphs_when_installing_pytest_twice(simple_ctx) -> None: - ctx = simple_ctx.replace(path_lookup=MockPathLookup()) - maybe = ctx.dependency_resolver.build_notebook_dependency_graph( - Path("pip_install_pytest_twice"), CurrentSessionState() +def test_build_notebook_dependency_graphs_when_installing_pytest_twice(caplog, simple_ctx) -> None: + pip_already_exists_warning = re.compile( + r".*WARNING: Target directory .+ already exists\. Specify --upgrade to force replacement.*" ) + ctx = simple_ctx.replace(path_lookup=MockPathLookup()) + with caplog.at_level(logging.DEBUG, logger="databricks.labs.ucx.source_code.python_libraries"): + maybe = ctx.dependency_resolver.build_notebook_dependency_graph( + Path("pip_install_pytest_twice"), CurrentSessionState() + ) assert not maybe.problems - - -PIP_ALREADY_EXISTS_WARNING = re.compile( - r".*WARNING: Target directory .+ already exists\. Specify --upgrade to force replacement.*" -) + assert maybe.graph.path_lookup.resolve(Path("pytest")) + assert not pip_already_exists_warning.match(caplog.text.replace("\n", " ")), "Pip already exists warning detected" @pytest.mark.parametrize( @@ -83,8 +84,6 @@ def test_build_notebook_dependency_graphs_when_installing_pytest_twice(simple_ct ) def test_build_notebook_dependency_graphs_when_installing_notebooks_twice(caplog, simple_ctx, notebook) -> None: ctx = simple_ctx.replace(path_lookup=MockPathLookup()) - with caplog.at_level(logging.DEBUG, logger="databricks.labs.ucx.source_code.python_libraries"): - for _ in range(2): - maybe = ctx.dependency_resolver.build_notebook_dependency_graph(Path(notebook), CurrentSessionState()) - assert not maybe.problems - assert not PIP_ALREADY_EXISTS_WARNING.match(caplog.text.replace("\n", " ")), "Pip already exists warning detected" + for _ in range(2): + maybe = ctx.dependency_resolver.build_notebook_dependency_graph(Path(notebook), CurrentSessionState()) + assert not maybe.problems From 9a4e00cd140acd9ea779337be6ca97a2c9e773f4 Mon Sep 17 00:00:00 2001 From: Cor Zuurmond Date: Mon, 21 Oct 2024 10:34:31 +0200 Subject: [PATCH 11/11] Use quiet flag instead of index url when install pip twice --- tests/unit/source_code/samples/pip_install_pytest_twice.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/source_code/samples/pip_install_pytest_twice.py b/tests/unit/source_code/samples/pip_install_pytest_twice.py index dd7a7ca107..194321d6a9 100644 --- a/tests/unit/source_code/samples/pip_install_pytest_twice.py +++ b/tests/unit/source_code/samples/pip_install_pytest_twice.py @@ -2,11 +2,11 @@ # COMMAND ---------- -# MAGIC %pip install pytest --index-url https://pypi.python.org/simple +# MAGIC %pip install pytest --quiet # COMMAND ---------- -# MAGIC %pip install pytest --index-url https://pypi.python.org/simple +# MAGIC %pip install pytest --quiet # COMMAND ----------