From f5cb9288f17fa0be386bdacaa5e31cfc7b66c626 Mon Sep 17 00:00:00 2001 From: Lars George Date: Mon, 28 Aug 2023 20:11:05 +0200 Subject: [PATCH 1/6] Install script --- bin/install.py | 112 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 bin/install.py diff --git a/bin/install.py b/bin/install.py new file mode 100644 index 0000000000..7cf64133cb --- /dev/null +++ b/bin/install.py @@ -0,0 +1,112 @@ +import os +import sys +from io import BytesIO +import shutil +import argparse +import subprocess +import tempfile + +from databricks.sdk import WorkspaceClient +from databricks.sdk.service.workspace import ImportFormat + +INSTALL_NOTEBOOK = """ +# Databricks notebook source +# MAGIC %md +# MAGIC # UCX - The UC Migration Toolkit +# MAGIC +# MAGIC This notebook installs `ucx` as a Wheel package locally. +# MAGIC and restart the Python interpreter. + +# COMMAND ---------- + +# MAGIC %pip install {remote_wheel_file} +dbutils.library.restartPython() + +""" + +parser = argparse.ArgumentParser(prog="ucx", + description="Builds and installs ucx.") +parser.add_argument("--folder", "-f", default="ucx", + help="name of folder in workspace, default: ucx") +parser.add_argument("--verbose", "-v", action="store_true", + help="increase output verbosity") +parser.add_argument("--debug", action="store_true", + help="enable debug mode") +args = parser.parse_args() + + +def delete_local_dir(dir): + try: + shutil.rmtree(dir) + except OSError as e: + if args.verbose: + print(f"Error: {e.filename} - {e.strerror}.") + + +def main(): + # build wheel in temp directory + tmp_dir = tempfile.TemporaryDirectory() + if args.verbose: + print(f"Created temporary directory: {tmp_dir.name}") + if args.verbose: + subprocess.run([ + "python3", "-m", "pip", + "wheel", "--no-deps", + "--wheel-dir", tmp_dir.name, + ".."], + check=True) + else: + subprocess.run([ + "python3", "-m", "pip", + "wheel", "--no-deps", "--quiet", + "--wheel-dir", tmp_dir.name, + ".."], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=True) + # get wheel name as first file in the temp directory + files = os.listdir(tmp_dir.name) + wheel_file_name = files[0] + local_wheel_file = tmp_dir.name + '/' + wheel_file_name + if args.verbose: + print(f"Wheel file: {wheel_file_name}") + # upload wheel and starer notebook to workspace + ws = WorkspaceClient() + folder_base = f"/Users/{ws.current_user.me().user_name}/{args.folder}" + remote_wheel_file = f"{folder_base}/{wheel_file_name}" + remote_notebook_file = f"{folder_base}/install_ucx.py" + if args.verbose: + print(f"Remote wheel file: {remote_wheel_file}") + print(f"Remote notebook file: {remote_notebook_file}") + print("Uploading...") + try: + folder_files = [] + for f in ws.workspace.list(folder_base): + folder_files.append(f.path) + print(f"ERROR: Remote folder '{folder_base}' already exists!") + print(f"Found: {folder_files} - ABORTING!") + sys.exit(-1) + except: + pass + ws.workspace.mkdirs(folder_base) + with open(local_wheel_file, "rb") as fh: + buf = BytesIO(fh.read()) + ws.workspace.upload( + path=remote_wheel_file, + content=buf, + format=ImportFormat.AUTO + ) + buf = BytesIO(INSTALL_NOTEBOOK.format( + remote_wheel_file=remote_wheel_file).encode()) + ws.workspace.upload( + path=remote_notebook_file, + content=buf + ) + # cleanup + delete_local_dir(tmp_dir.name) + if args.verbose: + print("DONE.") + + +if __name__ == "__main__": + main() \ No newline at end of file From 2078e4062763327fcb8cbcb8b2c229cecdb17524 Mon Sep 17 00:00:00 2001 From: Lars George Date: Mon, 28 Aug 2023 22:30:21 +0200 Subject: [PATCH 2/6] Fixed path and text --- bin/install.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/install.py b/bin/install.py index 7cf64133cb..b665181365 100644 --- a/bin/install.py +++ b/bin/install.py @@ -14,12 +14,12 @@ # MAGIC %md # MAGIC # UCX - The UC Migration Toolkit # MAGIC -# MAGIC This notebook installs `ucx` as a Wheel package locally. -# MAGIC and restart the Python interpreter. +# MAGIC This notebook installs `ucx` as a wheel package locally +# MAGIC and then restarts the Python interpreter. # COMMAND ---------- -# MAGIC %pip install {remote_wheel_file} +# MAGIC %pip install /Workspace{remote_wheel_file} dbutils.library.restartPython() """ From 5af99e2b041cd4ce7aabb77d42f97c14b77cb4d2 Mon Sep 17 00:00:00 2001 From: Lars George Date: Tue, 29 Aug 2023 14:38:45 +0200 Subject: [PATCH 3/6] Added PR feedback, logging etc. --- bin/install.py | 126 +++++++++++++++++++++++++++++-------------------- 1 file changed, 75 insertions(+), 51 deletions(-) diff --git a/bin/install.py b/bin/install.py index b665181365..154044e56f 100644 --- a/bin/install.py +++ b/bin/install.py @@ -1,6 +1,7 @@ import os import sys from io import BytesIO +import logging import shutil import argparse import subprocess @@ -9,6 +10,9 @@ from databricks.sdk import WorkspaceClient from databricks.sdk.service.workspace import ImportFormat +from databricks.labs.ucx.logger import _install + + INSTALL_NOTEBOOK = """ # Databricks notebook source # MAGIC %md @@ -24,76 +28,78 @@ """ +# install logging backend +_install() +logger = logging.getLogger(__name__) + +# parse command line parameters parser = argparse.ArgumentParser(prog="ucx", description="Builds and installs ucx.") parser.add_argument("--folder", "-f", default="ucx", help="name of folder in workspace, default: ucx") -parser.add_argument("--verbose", "-v", action="store_true", - help="increase output verbosity") +parser.add_argument("--quiet", action="store_true", + help="suppress extraneous information") parser.add_argument("--debug", action="store_true", help="enable debug mode") args = parser.parse_args() +# adjust logging levels as needed +if args.debug: + logging.getLogger("databricks").setLevel("DEBUG") + def delete_local_dir(dir): + """Helper to delete a directory""" try: shutil.rmtree(dir) except OSError as e: - if args.verbose: - print(f"Error: {e.filename} - {e.strerror}.") + logger.error(f"Error: {e.filename} - {e.strerror}.") -def main(): - # build wheel in temp directory - tmp_dir = tempfile.TemporaryDirectory() - if args.verbose: - print(f"Created temporary directory: {tmp_dir.name}") - if args.verbose: - subprocess.run([ - "python3", "-m", "pip", - "wheel", "--no-deps", - "--wheel-dir", tmp_dir.name, - ".."], - check=True) - else: - subprocess.run([ - "python3", "-m", "pip", - "wheel", "--no-deps", "--quiet", - "--wheel-dir", tmp_dir.name, - ".."], - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - check=True) - # get wheel name as first file in the temp directory - files = os.listdir(tmp_dir.name) - wheel_file_name = files[0] - local_wheel_file = tmp_dir.name + '/' + wheel_file_name - if args.verbose: - print(f"Wheel file: {wheel_file_name}") - # upload wheel and starer notebook to workspace - ws = WorkspaceClient() - folder_base = f"/Users/{ws.current_user.me().user_name}/{args.folder}" - remote_wheel_file = f"{folder_base}/{wheel_file_name}" - remote_notebook_file = f"{folder_base}/install_ucx.py" - if args.verbose: - print(f"Remote wheel file: {remote_wheel_file}") - print(f"Remote notebook file: {remote_notebook_file}") - print("Uploading...") +def folder_exists(folder_base, ws): + """Helper to check if a workspace folder exists""" + folder_files = [] try: - folder_files = [] for f in ws.workspace.list(folder_base): folder_files.append(f.path) - print(f"ERROR: Remote folder '{folder_base}' already exists!") - print(f"Found: {folder_files} - ABORTING!") - sys.exit(-1) + logger.debug(f"Folder files: {folder_files}") + return True except: - pass + return False + + +def build_wheel(): + """Helper to build the wheel package""" + tmp_dir = tempfile.TemporaryDirectory() + logger.debug(f"Created temporary directory: {tmp_dir.name}") + streams = {} + if args.quiet: + streams = { + "stdout": subprocess.DEVNULL, + "stderr": subprocess.DEVNULL, + } + subprocess.run([ + "python3", "-m", "pip", + "wheel", "--no-deps", + "--wheel-dir", tmp_dir.name, + ".."], + **streams, + check=True) + return tmp_dir.name + + +def upload_artifacts(folder_base, local_wheel_file, wheel_file_name, ws): + """Helper to upload artifacts into a workspace folder""" + remote_wheel_file = f"{folder_base}/{wheel_file_name}" + remote_notebook_file = f"{folder_base}/install_ucx.py" + logger.info(f"Remote wheel file: {remote_wheel_file}") + logger.info(f"Remote notebook file: {remote_notebook_file}") + logger.info("Uploading...") ws.workspace.mkdirs(folder_base) with open(local_wheel_file, "rb") as fh: - buf = BytesIO(fh.read()) ws.workspace.upload( path=remote_wheel_file, - content=buf, + content=fh.read(), format=ImportFormat.AUTO ) buf = BytesIO(INSTALL_NOTEBOOK.format( @@ -102,11 +108,29 @@ def main(): path=remote_notebook_file, content=buf ) + + +def main(): + # preflight check + ws = WorkspaceClient() + folder_base = f"/Users/{ws.current_user.me().user_name}/{args.folder}" + if folder_exists(folder_base, ws): + logger.error( + f"ERROR: Remote folder '{folder_base}' already exists, aborting!") + sys.exit(-1) + # build wheel in temp directory + tmp_dir = build_wheel() + # get wheel name as first file in the temp directory + files = os.listdir(tmp_dir) + wheel_file_name = files[0] + local_wheel_file = tmp_dir + '/' + wheel_file_name + logger.info(f"Wheel file: {wheel_file_name}") + # upload wheel and starer notebook to workspace + upload_artifacts(folder_base, local_wheel_file, wheel_file_name, ws) # cleanup - delete_local_dir(tmp_dir.name) - if args.verbose: - print("DONE.") + delete_local_dir(tmp_dir) + logger.info("DONE.") if __name__ == "__main__": - main() \ No newline at end of file + main() From c35fa4dd0c66f1874411e7093f08bb6e104cadfa Mon Sep 17 00:00:00 2001 From: Lars George Date: Tue, 29 Aug 2023 14:49:22 +0200 Subject: [PATCH 4/6] Fixed linter warnings --- bin/install.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/bin/install.py b/bin/install.py index 154044e56f..d0f2117268 100644 --- a/bin/install.py +++ b/bin/install.py @@ -8,6 +8,7 @@ import tempfile from databricks.sdk import WorkspaceClient +from databricks.sdk.core import DatabricksError from databricks.sdk.service.workspace import ImportFormat from databricks.labs.ucx.logger import _install @@ -48,10 +49,10 @@ logging.getLogger("databricks").setLevel("DEBUG") -def delete_local_dir(dir): +def delete_local_dir(dir_name): """Helper to delete a directory""" try: - shutil.rmtree(dir) + shutil.rmtree(dir_name) except OSError as e: logger.error(f"Error: {e.filename} - {e.strerror}.") @@ -64,7 +65,7 @@ def folder_exists(folder_base, ws): folder_files.append(f.path) logger.debug(f"Folder files: {folder_files}") return True - except: + except DatabricksError: return False From 5ad24b9ad349dbe05c5ceb311b26b44bef6f4475 Mon Sep 17 00:00:00 2001 From: Lars George Date: Tue, 29 Aug 2023 14:58:18 +0200 Subject: [PATCH 5/6] Fixed linter warnings --- bin/install.py | 50 ++++++++++++++++---------------------------------- 1 file changed, 16 insertions(+), 34 deletions(-) diff --git a/bin/install.py b/bin/install.py index d0f2117268..fc408ba77e 100644 --- a/bin/install.py +++ b/bin/install.py @@ -1,11 +1,11 @@ -import os -import sys -from io import BytesIO +import argparse import logging +import os import shutil -import argparse import subprocess +import sys import tempfile +from io import BytesIO from databricks.sdk import WorkspaceClient from databricks.sdk.core import DatabricksError @@ -13,7 +13,6 @@ from databricks.labs.ucx.logger import _install - INSTALL_NOTEBOOK = """ # Databricks notebook source # MAGIC %md @@ -34,14 +33,10 @@ logger = logging.getLogger(__name__) # parse command line parameters -parser = argparse.ArgumentParser(prog="ucx", - description="Builds and installs ucx.") -parser.add_argument("--folder", "-f", default="ucx", - help="name of folder in workspace, default: ucx") -parser.add_argument("--quiet", action="store_true", - help="suppress extraneous information") -parser.add_argument("--debug", action="store_true", - help="enable debug mode") +parser = argparse.ArgumentParser(prog="ucx", description="Builds and installs ucx.") +parser.add_argument("--folder", "-f", default="ucx", help="name of folder in workspace, default: ucx") +parser.add_argument("--quiet", action="store_true", help="suppress extraneous information") +parser.add_argument("--debug", action="store_true", help="enable debug mode") args = parser.parse_args() # adjust logging levels as needed @@ -79,13 +74,9 @@ def build_wheel(): "stdout": subprocess.DEVNULL, "stderr": subprocess.DEVNULL, } - subprocess.run([ - "python3", "-m", "pip", - "wheel", "--no-deps", - "--wheel-dir", tmp_dir.name, - ".."], - **streams, - check=True) + subprocess.run( + ["python3", "-m", "pip", "wheel", "--no-deps", "--wheel-dir", tmp_dir.name, ".."], **streams, check=True + ) return tmp_dir.name @@ -98,17 +89,9 @@ def upload_artifacts(folder_base, local_wheel_file, wheel_file_name, ws): logger.info("Uploading...") ws.workspace.mkdirs(folder_base) with open(local_wheel_file, "rb") as fh: - ws.workspace.upload( - path=remote_wheel_file, - content=fh.read(), - format=ImportFormat.AUTO - ) - buf = BytesIO(INSTALL_NOTEBOOK.format( - remote_wheel_file=remote_wheel_file).encode()) - ws.workspace.upload( - path=remote_notebook_file, - content=buf - ) + ws.workspace.upload(path=remote_wheel_file, content=fh.read(), format=ImportFormat.AUTO) + buf = BytesIO(INSTALL_NOTEBOOK.format(remote_wheel_file=remote_wheel_file).encode()) + ws.workspace.upload(path=remote_notebook_file, content=buf) def main(): @@ -116,15 +99,14 @@ def main(): ws = WorkspaceClient() folder_base = f"/Users/{ws.current_user.me().user_name}/{args.folder}" if folder_exists(folder_base, ws): - logger.error( - f"ERROR: Remote folder '{folder_base}' already exists, aborting!") + logger.error(f"ERROR: Remote folder '{folder_base}' already exists, aborting!") sys.exit(-1) # build wheel in temp directory tmp_dir = build_wheel() # get wheel name as first file in the temp directory files = os.listdir(tmp_dir) wheel_file_name = files[0] - local_wheel_file = tmp_dir + '/' + wheel_file_name + local_wheel_file = tmp_dir + "/" + wheel_file_name logger.info(f"Wheel file: {wheel_file_name}") # upload wheel and starer notebook to workspace upload_artifacts(folder_base, local_wheel_file, wheel_file_name, ws) From d67d3c61e80e9b6492625bd726ccfa367ffe05d4 Mon Sep 17 00:00:00 2001 From: Lars George Date: Tue, 29 Aug 2023 15:01:58 +0200 Subject: [PATCH 6/6] Fixed linter warnings --- bin/install.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/install.py b/bin/install.py index fc408ba77e..fa8575e2a5 100644 --- a/bin/install.py +++ b/bin/install.py @@ -75,7 +75,7 @@ def build_wheel(): "stderr": subprocess.DEVNULL, } subprocess.run( - ["python3", "-m", "pip", "wheel", "--no-deps", "--wheel-dir", tmp_dir.name, ".."], **streams, check=True + [sys.executable, "-m", "pip", "wheel", "--no-deps", "--wheel-dir", tmp_dir.name, ".."], **streams, check=True ) return tmp_dir.name