diff --git a/.gitignore b/.gitignore index 2d5462d1..51520b99 100644 --- a/.gitignore +++ b/.gitignore @@ -14,11 +14,16 @@ data8assets/ summer/ test-repo/ venv/ +.idea/ .ipynb_checkpoints docs/_build +jupyterhub.sqlite +jupyterhub_cookie_secret +/jupyterhub-proxy.pid + node_modules/ package-lock.json +nbgitpuller/static/dist -nbgitpuller/static/dist \ No newline at end of file diff --git a/dev-requirements.txt b/dev-requirements.txt index f797739a..bb21db7a 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -2,4 +2,4 @@ jupyter-packaging>=0.10 pytest pytest-cov flake8 -nbclassic +nbclassic \ No newline at end of file diff --git a/nbgitpuller/handlers.py b/nbgitpuller/handlers.py index f83ad7d5..07b3a279 100644 --- a/nbgitpuller/handlers.py +++ b/nbgitpuller/handlers.py @@ -6,13 +6,14 @@ import threading import json import os -from queue import Queue, Empty +from queue import Queue import jinja2 from .pull import GitPuller from .version import __version__ + class SyncHandler(IPythonHandler): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) @@ -38,6 +39,37 @@ def emit(self, data): self.write('data: {}\n\n'.format(serialized_data)) yield self.flush() + @gen.coroutine + def _wait_for_sync_progress_queue(self, queue): + """ + The loop below constantly checks the queue parameter for messages + that are being sent to the UI so the user is kept aware of progress related to + the downloading of archives and the merging of files into the user's home folder + + :param queue: download_queue or the original pull queue + """ + while True: + if queue.empty(): + yield gen.sleep(0.5) + continue + progress = queue.get_nowait() + if progress is None: + return + if isinstance(progress, Exception): + self.emit({ + 'phase': 'error', + 'message': str(progress), + 'output': '\n'.join([ + line.strip() + for line in traceback.format_exception( + type(progress), progress, progress.__traceback__ + ) + ]) + }) + return + + self.emit({'output': progress, 'phase': 'syncing'}) + @web.authenticated @gen.coroutine def get(self): @@ -51,8 +83,11 @@ def get(self): return try: + q = Queue() + repo = self.get_argument('repo') branch = self.get_argument('branch', None) + content_provider = self.get_argument('contentProvider', None) depth = self.get_argument('depth', None) if depth: depth = int(depth) @@ -73,11 +108,8 @@ def get(self): self.set_header('content-type', 'text/event-stream') self.set_header('cache-control', 'no-cache') - gp = GitPuller(repo, repo_dir, branch=branch, depth=depth, parent=self.settings['nbapp']) - - q = Queue() - def pull(): + gp = GitPuller(repo, repo_dir, branch=branch, depth=depth, parent=self.settings['nbapp'], content_provider=content_provider, repo_parent_dir=repo_parent_dir, other_kw_args=self.request.arguments.items()) try: for line in gp.pull(): q.put_nowait(line) @@ -86,34 +118,12 @@ def pull(): except Exception as e: q.put_nowait(e) raise e - self.gp_thread = threading.Thread(target=pull) + self.gp_thread = threading.Thread(target=pull) self.gp_thread.start() - - while True: - try: - progress = q.get_nowait() - except Empty: - yield gen.sleep(0.5) - continue - if progress is None: - break - if isinstance(progress, Exception): - self.emit({ - 'phase': 'error', - 'message': str(progress), - 'output': '\n'.join([ - line.strip() - for line in traceback.format_exception( - type(progress), progress, progress.__traceback__ - ) - ]) - }) - return - - self.emit({'output': progress, 'phase': 'syncing'}) - + yield self._wait_for_sync_progress_queue(q) self.emit({'phase': 'finished'}) + except Exception as e: self.emit({ 'phase': 'error', @@ -151,6 +161,7 @@ def get(self): repo = self.get_argument('repo') branch = self.get_argument('branch', None) depth = self.get_argument('depth', None) + content_provider = self.get_argument('contentProvider', None) urlPath = self.get_argument('urlpath', None) or \ self.get_argument('urlPath', None) subPath = self.get_argument('subpath', None) or \ @@ -171,10 +182,19 @@ def get(self): else: path = 'tree/' + path + if content_provider is not None: + path = "tree/" + self.write( self.render_template( 'status.html', - repo=repo, branch=branch, path=path, depth=depth, targetpath=targetpath, version=__version__ + repo=repo, + branch=branch, + path=path, + depth=depth, + contentProvider=content_provider, + targetpath=targetpath, + version=__version__ )) self.flush() diff --git a/nbgitpuller/plugin_hook_specs.py b/nbgitpuller/plugin_hook_specs.py new file mode 100644 index 00000000..e6703ad9 --- /dev/null +++ b/nbgitpuller/plugin_hook_specs.py @@ -0,0 +1,48 @@ +import pluggy + +# this hookspec is decorating the handle_files function below. The decorator defines +# the interface(hook specifications) for any implementing content-provider plugins. The project name, nbgitpuller, +# is passed to the constructor for HookspecMarker and HookimplMarker as well as to the constructor for the +# PluginManager in handlers.py in order to allow the PluginManager.add_hookspecs method to automatically discover +# all marked functions. +hookspec = pluggy.HookspecMarker("nbgitpuller") + +# As a convenience the hookimpl field can be used by content-provider plugins to decorate the implementations of the +# handle_files function. A content-provider plugin could create the HookImplMarker itself but in order to register +# with the PluginManager the name('nbgitpuller') must be used as we do here. +hookimpl = pluggy.HookimplMarker("nbgitpuller") + + +@hookspec(firstresult=True) +def handle_files(repo_parent_dir, other_kw_args): + """ + This function must be implemented by content-provider plugins in order to handle the downloading and decompression + of a non-git sourced compressed archive. + + The repo_parent_dir is where you will save your downloaded archive + + The parameter, other_kw_args, contains all the arguments you put on the nbgitpuller URL link or passed to GitPuller + via CLI. This allows you flexibility to pass information your content-provider download plugin may need to + successfully download source files. + + This function needs to return two pieces of information as a json object: + - output_dir -- the is the name of the directory that will hold all the files you want GitPuller to expose + for comparison, when git is the source, this is name of git repository you are pulling + - origin_repo_path -- this is path to the local git repo that "acts" like the remote origin you would use + if the content-provider is git. + + Once the files are saved to the directory, git puller can handle all the standard functions needed to make sure + source files are updated or created as needed. + + I suggest you study the function handle_files_helper in file plugin_helper.py found in the + nbgitpuller-downloader-plugins repository to get a deep sense of how + we handle the downloading of compressed archives. There is also more documentation in the docs section of + nbgitpuller. Finally, you can always implement the entire download process yourself and not use the + handle_files_helper function but please to sure understand what is being passed into and back to the nbgitpuller + handlers. + + :param str repo_parent_dir: save your downloaded archive here + :param dict other_kw_args: this includes any argument you put on the nbgitpuller URL or pass via CLI as a dict + :return two parameter json output_dir and origin_repo_path + :rtype json object + """ diff --git a/nbgitpuller/pull.py b/nbgitpuller/pull.py index cc18ac97..6ffc34c1 100644 --- a/nbgitpuller/pull.py +++ b/nbgitpuller/pull.py @@ -4,9 +4,23 @@ import time import argparse import datetime +import pluggy +import importlib_metadata +import inspect from traitlets import Integer, default from traitlets.config import Configurable from functools import partial +import plugin_hook_specs + + +class ContentProviderException(Exception): + """ + Custom Exception thrown when the content_provider key specifying + the downloader plugin is not installed or can not be found by the + name given + """ + def __init__(self, response=None): + self.response = response def execute_cmd(cmd, **kwargs): @@ -45,6 +59,40 @@ def flush(): raise subprocess.CalledProcessError(ret, cmd) +def load_downloader_plugin_classes_from_entrypoints(group, content_provider): + for dist in list(importlib_metadata.distributions()): + for ep in dist.entry_points: + if ep.group == group: + plugin = ep.load() + for name, cls in inspect.getmembers(plugin, inspect.isclass): + if cls.__module__ == ep.value and ep.name == content_provider: + for fn_name, fn in inspect.getmembers(cls, inspect.isfunction): + if fn_name == "handle_files": + return cls + return None + + +def setup_plugins(content_provider): + """ + This automatically searches for and loads packages whose entrypoint is nbgitpuller. If found, + the plugin manager object is returned and used to execute the hook implemented by + the plugin. + :param content_provider: this is the name of the content_provider; each plugin is named to identify the + content_provider of the archive to be loaded(e.g. googledrive, dropbox, etc) + :return: returns the PluginManager object used to call the implemented hooks of the plugin + :raises: ContentProviderException -- this occurs when the content_provider parameter is not found + """ + plugin_manager = pluggy.PluginManager("nbgitpuller") + plugin_manager.add_hookspecs(plugin_hook_specs) + download_class = load_downloader_plugin_classes_from_entrypoints("nbgitpuller", content_provider) + downloader_obj = download_class() + #num_loaded = plugin_manager.load_setuptools_entrypoints("nbgitpuller", name=content_provider) + if download_class is None: + raise ContentProviderException(f"The content_provider key you supplied in the URL could not be found: {content_provider}") + plugin_manager.register(downloader_obj) + return {"plugin_manager": plugin_manager, "downloader_obj": downloader_obj } + + class GitPuller(Configurable): depth = Integer( config=True, @@ -71,12 +119,9 @@ def __init__(self, git_url, repo_dir, **kwargs): self.git_url = git_url self.branch_name = kwargs.pop("branch") - - if self.branch_name is None: - self.branch_name = self.resolve_default_branch() - elif not self.branch_exists(self.branch_name): - raise ValueError(f"Branch: {self.branch_name} -- not found in repo: {self.git_url}") - + self.content_provider = kwargs.pop("content_provider") + self.repo_parent_dir = kwargs.pop("repo_parent_dir") + self.other_kw_args = kwargs.pop("other_kw_args") self.repo_dir = repo_dir newargs = {k: v for k, v in kwargs.items() if v is not None} super(GitPuller, self).__init__(**newargs) @@ -135,11 +180,37 @@ def resolve_default_branch(self): logging.exception(m) raise ValueError(m) + def handle_archive_download(self): + try: + plugin_info = setup_plugins(self.content_provider) + plugin_manager = plugin_info["plugin_manager"] + downloader_obj = plugin_info["downloader_obj"] + other_kw_args = {k: v[0].decode() for k, v in self.other_kw_args} + yield from plugin_manager.hook.handle_files(repo_parent_dir=self.repo_parent_dir,other_kw_args=other_kw_args) + results = downloader_obj.handle_files_results + self.repo_dir = self.repo_parent_dir + results["output_dir"] + self.git_url = "file://" + results["origin_repo_path"] + except ContentProviderException as c: + raise c + + def handle_branch_name(self): + if self.branch_name is None: + self.branch_name = self.resolve_default_branch() + elif not self.branch_exists(self.branch_name): + raise ValueError(f"Branch: {self.branch_name} -- not found in repo: {self.git_url}") + def pull(self): """ - Pull selected repo from a remote git repository, + if compressed archive download first. + Execute pull of repo from a git repository(remote or temporary local created for compressed archives), while preserving user changes """ + # if content_provider is specified then we are dealing with compressed archive and not a git repo + if self.content_provider is not None: + yield from self.handle_archive_download() + + self.handle_branch_name() + if not os.path.exists(self.repo_dir): yield from self.initialize_repo() else: @@ -303,14 +374,20 @@ def main(): parser = argparse.ArgumentParser(description='Synchronizes a github repository with a local repository.') parser.add_argument('git_url', help='Url of the repo to sync') - parser.add_argument('branch_name', default=None, help='Branch of repo to sync', nargs='?') - parser.add_argument('repo_dir', default='.', help='Path to clone repo under', nargs='?') + parser.add_argument('repo_dir', help='Path to clone repo under', nargs='?') + parser.add_argument('--branch_name', default=None, help='Branch of repo to sync', nargs='?') + parser.add_argument('--content_provider', default=None, help='If downloading compressed archive instead of using git repo set this(e.g. dropbox, googledrive, generic_web)', nargs='?') + parser.add_argument('--repo_parent_dir', default='.', help='Only used if downloading compressed archive, location of download', nargs='?') + parser.add_argument('--other_kw_args', default=None, help='you can pass any keyword args you want as a dict{"arg1":"value1","arg2":"value2"} -- could be used in downloader plugins', nargs='?') args = parser.parse_args() for line in GitPuller( args.git_url, args.repo_dir, - branch=args.branch_name if args.branch_name else None + branch=args.branch_name if args.branch_name else None, + content_provider=args.content_provider if args.content_provider else None, + repo_parent_dir=args.repo_parent_dir if args.repo_parent_dir else None, + other_kw_args=args.other_kw_args if args.other_kw_args else None ).pull(): print(line) diff --git a/nbgitpuller/static/js/index.js b/nbgitpuller/static/js/index.js index 042aefd6..f51332d1 100644 --- a/nbgitpuller/static/js/index.js +++ b/nbgitpuller/static/js/index.js @@ -2,15 +2,15 @@ import { Terminal } from 'xterm'; import { FitAddon } from 'xterm-addon-fit'; import css from '../../../node_modules/xterm/css/xterm.css'; -function GitSync(baseUrl, repo, branch, depth, targetpath, path) { +function GitSync(baseUrl, repo, branch, depth, targetpath, path, contentProvider) { // Class that talks to the API backend & emits events as appropriate this.baseUrl = baseUrl; this.repo = repo; this.branch = branch; this.depth = depth; this.targetpath = targetpath; + this.contentProvider = contentProvider; this.redirectUrl = baseUrl + path; - this.callbacks = {}; } @@ -42,6 +42,9 @@ GitSync.prototype.start = function() { if (typeof this.branch !== 'undefined' && this.branch != undefined) { syncUrlParams['branch'] = this.branch; } + if (typeof this.contentProvider !== 'undefined' && this.contentProvider != undefined) { + syncUrlParams['contentProvider'] = this.contentProvider; + } var syncUrl = this.baseUrl + 'git-pull/api?' + $.param(syncUrlParams); this.eventSource = new EventSource(syncUrl); @@ -139,7 +142,8 @@ var gs = new GitSync( get_body_data('branch'), get_body_data('depth'), get_body_data('targetpath'), - get_body_data('path') + get_body_data('path'), + get_body_data('contentProvider') ); var gsv = new GitSyncView( diff --git a/nbgitpuller/templates/status.html b/nbgitpuller/templates/status.html index f4388510..3d8a0e18 100644 --- a/nbgitpuller/templates/status.html +++ b/nbgitpuller/templates/status.html @@ -7,11 +7,12 @@ data-path="{{ path | urlencode }}" {% if branch %}data-branch="{{ branch | urlencode }}"{% endif %} {% if depth %}data-depth="{{ depth | urlencode }}"{% endif %} +{% if contentProvider %}data-content-provider="{{ contentProvider | urlencode }}"{% endif %} data-targetpath="{{ targetpath | urlencode }}" {% endblock %} {% block site %} -
+
diff --git a/setup.py b/setup.py index 3efef420..5c3691b3 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ packages=find_packages(), include_package_data=True, platforms='any', - install_requires=['notebook>=5.5.0', 'jupyter_server>=1.10.1', 'tornado'], + install_requires=['notebook>=5.5.0', 'jupyter_server>=1.10.1', 'tornado', 'pluggy'], data_files=[ ('etc/jupyter/jupyter_server_config.d', ['nbgitpuller/etc/jupyter_server_config.d/nbgitpuller.json']), ('etc/jupyter/jupyter_notebook_config.d', ['nbgitpuller/etc/jupyter_notebook_config.d/nbgitpuller.json'])