Skip to content

Commit

Permalink
Added non-git source puller functionality
Browse files Browse the repository at this point in the history
Handles non-git source compressed
archives from google drive, dropbox, and any publicly
available web address.
  • Loading branch information
sean-morris committed Jul 21, 2021
1 parent 1e57904 commit d02f6cd
Show file tree
Hide file tree
Showing 11 changed files with 1,738 additions and 7 deletions.
1 change: 1 addition & 0 deletions MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
include *.md
include LICENSE
include setup.cfg
recursive-include nbgitpuller/plugins *
recursive-include nbgitpuller/static *
recursive-include nbgitpuller/templates *
43 changes: 38 additions & 5 deletions nbgitpuller/handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,12 @@

from .pull import GitPuller
from .version import __version__
from .hookspecs import NonGitSourceSpec
from .plugins.zip_puller import ZipSourceGoogleDriveDownloader
from .plugins.zip_puller import ZipSourceDropBoxDownloader
from .plugins.zip_puller import ZipSourceWebDownloader
import pluggy
import distutils.util


class SyncHandler(IPythonHandler):
Expand Down Expand Up @@ -38,6 +44,17 @@ def emit(self, data):
self.write('data: {}\n\n'.format(serialized_data))
yield self.flush()

def setup_plugins(self, repo):
pm = pluggy.PluginManager("nbgitpuller")
pm.add_hookspecs(NonGitSourceSpec)
if "drive.google.com" in repo:
pm.register(ZipSourceGoogleDriveDownloader())
elif "dropbox.com" in repo:
pm.register(ZipSourceDropBoxDownloader())
else:
pm.register(ZipSourceWebDownloader())
return pm

@web.authenticated
@gen.coroutine
def get(self):
Expand All @@ -53,6 +70,7 @@ def get(self):
try:
repo = self.get_argument('repo')
branch = self.get_argument('branch', None)
compressed = self.get_argument('compressed', "false")
depth = self.get_argument('depth', None)
if depth:
depth = int(depth)
Expand All @@ -73,6 +91,12 @@ def get(self):
self.set_header('content-type', 'text/event-stream')
self.set_header('cache-control', 'no-cache')

if distutils.util.strtobool(compressed):
pm = self.setup_plugins(repo)
results = pm.hook.handle_files(repo=repo, repo_parent_dir=repo_parent_dir)[0]
repo_dir = repo_parent_dir + results["unzip_dir"]
repo = "file://" + results["origin_repo_path"]

gp = GitPuller(repo, repo_dir, branch=branch, depth=depth, parent=self.settings['nbapp'])

q = Queue()
Expand Down Expand Up @@ -151,16 +175,19 @@ def get(self):
repo = self.get_argument('repo')
branch = self.get_argument('branch', None)
depth = self.get_argument('depth', None)
compressed = self.get_argument('compressed', "false")
urlPath = self.get_argument('urlpath', None) or \
self.get_argument('urlPath', None)
self.get_argument('urlPath', None)
subPath = self.get_argument('subpath', None) or \
self.get_argument('subPath', '.')
self.get_argument('subPath', '.')
app = self.get_argument('app', app_env)
parent_reldir = os.getenv('NBGITPULLER_PARENTPATH', '')
targetpath = self.get_argument('targetpath', None) or \
self.get_argument('targetPath', repo.split('/')[-1])
self.get_argument('targetPath', repo.split('/')[-1])

if urlPath:
if distutils.util.strtobool(compressed):
path = 'tree/'
elif urlPath:
path = urlPath
else:
path = os.path.join(parent_reldir, targetpath, subPath)
Expand All @@ -174,7 +201,13 @@ def get(self):
self.write(
self.render_template(
'status.html',
repo=repo, branch=branch, path=path, depth=depth, targetpath=targetpath, version=__version__
repo=repo,
branch=branch,
compressed=compressed,
path=path,
depth=depth,
targetpath=targetpath,
version=__version__
))
self.flush()

Expand Down
18 changes: 18 additions & 0 deletions nbgitpuller/hookspecs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import pluggy

hookspec = pluggy.HookspecMarker("nbgitpuller")


class NonGitSourceSpec(object):
@hookspec
def handle_files(self, repo, repo_parent_dir):
"""
This handles the downloading of non-git source
files into the user directory. Once downloaded,
the files are merged into a local git repository.
Once the local git repository is updated(or created
the first time), git puller can then handle this
directory as it would sources coming from a
git repository.
"""
Empty file added nbgitpuller/plugins/__init__.py
Empty file.
146 changes: 146 additions & 0 deletions nbgitpuller/plugins/plugin_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import subprocess
import os
import stat
import logging
import requests
from requests_file import FileAdapter
import shutil
import re


# for large files from Google Drive
def get_confirm_token(response):
for key, value in response.cookies.items():
if key.startswith('download_warning'):
return value
return None


# sets up the a local repo that acts like a remote
def initialize_local_repo(local_repo_path):
logging.info(f"Creating local_repo_path: {local_repo_path}")
try:
os.makedirs(local_repo_path)
except OSError:
logging.info(f"Directory exists: {local_repo_path}")

subprocess_helper("git init --bare", local_repo_path)

# Make our bare repository serveable over dumb HTTP
hook_path = os.path.join(local_repo_path, 'hooks', 'post-update')
os.rename(
os.path.join(local_repo_path, 'hooks', 'post-update.sample'),
hook_path
)
os.chmod(hook_path, os.stat(hook_path).st_mode | stat.S_IEXEC)


# local repo cloned from the "remote" which is in user drive
def clone_local_origin_repo(origin_repo_path, temp_download_repo):
logging.info(f"Creating temp_download_repo: {temp_download_repo}")
try:
os.makedirs(temp_download_repo)
except OSError:
logging.info(f"Directory exists: {temp_download_repo}")

cmd = f"git clone file://{origin_repo_path} {temp_download_repo}"
subprocess_helper(cmd, temp_download_repo)


# this is needed to unarchive various formats(eg. zip, tgz, etc)
def determine_file_extension(url, response):
file_type = response.headers.get('content-type')
content_disposition = response.headers.get('content-disposition')
ext = None
if content_disposition:
fname = re.findall("filename\*?=([^;]+)", content_disposition)
fname = fname[0].strip().strip('"')
ext = fname.split(".")[1]
elif file_type and "/zip" in file_type:
ext = "zip"
else:
url = url.split("/")[-1]
if "?" in url:
url = url[0:url.find('?')]
if "." in url:
ext = url.split(".")[1]

if not ext:
m = f"Could not determine the file extension for unarchiving: {url}"
raise Exception(m)
return ext


# the downloaded content is in the response -- unarchive and save to the disk
def save_response_content(url, response, temp_download_repo):
try:
ext = determine_file_extension(url, response)
CHUNK_SIZE = 32768
temp_download_file = f"{temp_download_repo}/download.{ext}"
with open(temp_download_file, "wb") as f:
for chunk in response.iter_content(CHUNK_SIZE):
# filter out keep-alive new chunks
if chunk:
f.write(chunk)

shutil.unpack_archive(temp_download_file, temp_download_repo)

os.remove(temp_download_file)
except Exception as e:
m = f"Problem handling file download: {str(e)}"
raise Exception(m)


# grab archive file from url
def fetch_files(url, id=-1):
session = requests.Session()
session.mount('file://', FileAdapter()) # add adapter for pytests
response = session.get(url, params={'id': id}, stream=True)
token = get_confirm_token(response)
if token:
params = {'id': id, 'confirm': token}
response = session.get(url, params=params, stream=True)

return response


# this drive the file handling -- called from zip_puller by all the
# handle_files implementations for GoogleDrive, Dropbox, and standard
# Web url
def handle_files_helper(args):
try:
origin_repo = args["repo_parent_dir"] + args["origin_dir"]
temp_download_repo = args["repo_parent_dir"] + args["download_dir"]
if os.path.exists(temp_download_repo):
shutil.rmtree(temp_download_repo)

if not os.path.exists(origin_repo):
initialize_local_repo(origin_repo)

clone_local_origin_repo(origin_repo, temp_download_repo)
save_response_content(args["repo"], args["response"], temp_download_repo)
subprocess_helper("git add .", temp_download_repo)
subprocess_helper("git -c [email protected] -c user.name=nbgitpuller commit -m test --allow-empty", temp_download_repo)
subprocess_helper("git push origin master", temp_download_repo)
unzipped_dirs = os.listdir(temp_download_repo)

dir_names = list(filter(lambda dir: ".git" not in dir, unzipped_dirs))
return {"unzip_dir": dir_names[0], "origin_repo_path": origin_repo}
except Exception as e:
logging.exception(e)
raise ValueError(e)


# executes git commands for us
def subprocess_helper(cmd, cwd):
try:
subprocess.run(
cmd.split(" "),
capture_output=True,
text=True,
check=True,
cwd=cwd
)
except Exception:
m = f"Problem executing git command: {cmd}"
raise Exception(m)
67 changes: 67 additions & 0 deletions nbgitpuller/plugins/zip_puller.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from .plugin_helper import fetch_files
from .plugin_helper import handle_files_helper
import pluggy

hookimpl = pluggy.HookimplMarker("nbgitpuller")
TEMP_DOWNLOAD_REPO_DIR = ".temp_download_repo"
CACHED_ORIGIN_NON_GIT_REPO = ".origin_non_git_sources"


# handles standard web addresses(not google drive or dropbox)
class ZipSourceWebDownloader(object):
@hookimpl
# repo --> this is download url
# repo_parent_dir --> where we will create the repo
def handle_files(self, repo, repo_parent_dir):
response = fetch_files(repo)
args = {
"repo": repo,
"repo_parent_dir": repo_parent_dir,
"response": response,
"origin_dir": CACHED_ORIGIN_NON_GIT_REPO,
"download_dir": TEMP_DOWNLOAD_REPO_DIR
}
return handle_files_helper(args)


# handles downloads from google drive
class ZipSourceGoogleDriveDownloader(object):
def __init__(self):
self.DOWNLOAD_URL = "https://docs.google.com/uc?export=download"

def get_id(self, repo):
start_id_index = repo.index("d/") + 2
end_id_index = repo.index("/view")
return repo[start_id_index:end_id_index]

# repo --> this is download url - it has the id
# repo_parent_dir --> where we will create the repo
@hookimpl
def handle_files(self, repo, repo_parent_dir):
response = fetch_files(self.DOWNLOAD_URL, self.get_id(repo))
args = {
"repo": repo,
"repo_parent_dir": repo_parent_dir,
"response": response,
"origin_dir": CACHED_ORIGIN_NON_GIT_REPO,
"download_dir": TEMP_DOWNLOAD_REPO_DIR
}
return handle_files_helper(args)


# handles downloads from DropBox
class ZipSourceDropBoxDownloader(object):
# repo --> this is download url
# repo_parent_dir --> where we will create the repo
@hookimpl
def handle_files(self, repo, repo_parent_dir):
repo = repo.replace("dl=0", "dl=1") # download set to 1 for dropbox
response = fetch_files(repo)
args = {
"repo": repo,
"repo_parent_dir": repo_parent_dir,
"response": response,
"origin_dir": CACHED_ORIGIN_NON_GIT_REPO,
"download_dir": TEMP_DOWNLOAD_REPO_DIR
}
return handle_files_helper(args)
7 changes: 6 additions & 1 deletion nbgitpuller/static/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,13 @@ require([

Terminal.applyAddon(fit);

function GitSync(baseUrl, repo, branch, depth, targetpath, path) {
function GitSync(baseUrl, repo, branch, depth, compressed, targetpath, path) {
// Class that talks to the API backend & emits events as appropriate
this.baseUrl = baseUrl;
this.repo = repo;
this.branch = branch;
this.depth = depth;
this.compressed = compressed;
this.targetpath = targetpath;
this.redirectUrl = baseUrl + path;

Expand Down Expand Up @@ -52,6 +53,9 @@ require([
if (typeof this.branch !== 'undefined' && this.branch != undefined) {
syncUrlParams['branch'] = this.branch;
}
if (typeof this.compressed !== 'undefined' && this.compressed != undefined) {
syncUrlParams['compressed'] = this.compressed;
}
var syncUrl = this.baseUrl + 'git-pull/api?' + $.param(syncUrlParams);

this.eventSource = new EventSource(syncUrl);
Expand Down Expand Up @@ -133,6 +137,7 @@ require([
utils.get_body_data('repo'),
utils.get_body_data('branch'),
utils.get_body_data('depth'),
utils.get_body_data('compressed'),
utils.get_body_data('targetpath'),
utils.get_body_data('path')
);
Expand Down
1 change: 1 addition & 0 deletions nbgitpuller/templates/status.html
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
data-path="{{ path | urlencode }}"
{% if branch %}data-branch="{{ branch | urlencode }}"{% endif %}
{% if depth %}data-depth="{{ depth | urlencode }}"{% endif %}
{% if compressed %}data-compressed="{{ compressed | urlencode }}"{% endif %}
data-targetpath="{{ targetpath | urlencode }}"
{% endblock %}

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
packages=find_packages(),
include_package_data=True,
platforms='any',
install_requires=['notebook>=5.5.0', 'tornado'],
install_requires=['notebook>=5.5.0', 'tornado', 'requests', 'requests-file'],
data_files=[
('etc/jupyter/jupyter_notebook_config.d', ['nbgitpuller/etc/nbgitpuller.json'])
],
Expand Down
Loading

0 comments on commit d02f6cd

Please sign in to comment.