Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

reimplement entrypoint in Python #1014

Merged
merged 4 commits into from
Mar 23, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions repo2docker/buildpacks/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,6 @@
import sys
import hashlib
import escapism
import xml.etree.ElementTree as ET

from traitlets import Dict

# Only use syntax features supported by Docker 17.09
TEMPLATE = r"""
Expand Down Expand Up @@ -181,6 +178,8 @@
{% endif -%}

# Add entrypoint
ENV PYTHONUNBUFFERED=1
COPY /python3-login /usr/local/bin/python3-login
COPY /repo2docker-entrypoint /usr/local/bin/repo2docker-entrypoint
ENTRYPOINT ["/usr/local/bin/repo2docker-entrypoint"]

Expand All @@ -193,9 +192,7 @@
{% endif %}
"""

ENTRYPOINT_FILE = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "repo2docker-entrypoint"
)
HERE = os.path.dirname(os.path.abspath(__file__))

# Also used for the group
DEFAULT_NB_UID = 1000
Expand Down Expand Up @@ -582,7 +579,8 @@ def _filter_tar(tar):
dest_path, src_path = self.generate_build_context_filename(src)
tar.add(src_path, dest_path, filter=_filter_tar)

tar.add(ENTRYPOINT_FILE, "repo2docker-entrypoint", filter=_filter_tar)
for fname in ("repo2docker-entrypoint", "python3-login"):
tar.add(os.path.join(HERE, fname), fname, filter=_filter_tar)

tar.add(".", "src/", filter=_filter_tar)

Expand Down
11 changes: 11 additions & 0 deletions repo2docker/buildpacks/python3-login
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash -l
# This is an executable that launches Python in a login shell
# to ensure that full profile setup occurs.
# shebang on linux only allows 1 argument,
# so we couldn't pick a login shell in one shebang line
# for a Python script

# -u means unbuffered, which one ~always wants in a container
# otherwise output can be mysteriously missing

exec python3 -u "$@"
121 changes: 97 additions & 24 deletions repo2docker/buildpacks/repo2docker-entrypoint
Original file line number Diff line number Diff line change
@@ -1,24 +1,97 @@
#!/bin/bash -l
# lightest possible entrypoint that ensures that
# we use a login shell to get a fully configured shell environment
# (e.g. sourcing /etc/profile.d, ~/.bashrc, and friends)

# Setup a file descriptor (FD) that is connected to a tee process which
# writes its input to $REPO_DIR/.jupyter-server-log.txt
# We later use this FD as a place to redirect the output of the actual
# command to. We can't add `tee` to the command directly as that will prevent
# the container from exiting when `docker stop` is run.
# See https://stackoverflow.com/a/55678435
exec {log_fd}> >(exec tee $REPO_DIR/.jupyter-server-log.txt)

if [[ ! -z "${R2D_ENTRYPOINT:-}" ]]; then
if [[ ! -x "$R2D_ENTRYPOINT" ]]; then
chmod u+x "$R2D_ENTRYPOINT"
fi
exec "$R2D_ENTRYPOINT" "$@" 2>&1 >&"$log_fd"
else
exec "$@" 2>&1 >&"$log_fd"
fi

# Close the logging output again
exec {log_fd}>&-
#!/usr/local/bin/python3-login
# note: must run on Python >= 3.5, which mainly means no f-strings

# goals:
# - load environment variables from a login shell (bash -l)
# - preserve signal handling of subprocess (kill -TERM and friends)
# - tee output to a log file

import fcntl
import os
import select
import signal
import subprocess
import sys

# output chunk size to read
CHUNK_SIZE = 1024

# signals to be forwarded to the child
# everything catchable, excluding SIGCHLD
SIGNALS = set(signal.Signals) - {signal.SIGKILL, signal.SIGSTOP, signal.SIGCHLD}


def main():

# open log file to send output
log_file = open(
os.path.join(os.environ.get("REPO_DIR", "."), ".jupyter-server-log.txt"),
"ab",
)

# build the command
# like `exec "$@"`
command = sys.argv[1:]
# load entrypoint override from env
r2d_entrypoint = os.environ.get("R2D_ENTRYPOINT")
if r2d_entrypoint:
command.insert(0, r2d_entrypoint)

# launch the subprocess
child = subprocess.Popen(
command,
bufsize=1,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
)

# hook up ~all signals so that every signal the parent gets,
# the children also get

def relay_signal(sig, frame):
"""Relay a signal to children"""
# DEBUG: show signal
child.send_signal(sig)

for signum in SIGNALS:
signal.signal(signum, relay_signal)
minrk marked this conversation as resolved.
Show resolved Hide resolved

# tee output from child to both our stdout and the log file
def tee(chunk):
"""Tee output from child to both our stdout and the log file"""
for f in [sys.stdout.buffer, log_file]:
f.write(chunk)
f.flush()

# make stdout pipe non-blocking
# this means child.stdout.read(nbytes)
# will always return immediately, even if there's nothing to read
flags = fcntl.fcntl(child.stdout, fcntl.F_GETFL)
fcntl.fcntl(child.stdout, fcntl.F_SETFL, flags | os.O_NONBLOCK)
poller = select.poll()
poller.register(child.stdout)

# while child is running, constantly relay output
while child.poll() is None:
chunk = child.stdout.read(CHUNK_SIZE)
if chunk:
tee(chunk)
else:
# empty chunk means nothing to read
# wait for output on the pipe
# timeout is in milliseconds
poller.poll(1000)

# child has exited, continue relaying any remaining output
# At this point, read() will return an empty string when it's done
chunk = child.stdout.read()
while chunk:
tee(chunk)
chunk = child.stdout.read()

# make our returncode match the child's returncode
sys.exit(child.returncode)


if __name__ == "__main__":
main()
21 changes: 9 additions & 12 deletions tests/unit/test_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@
"""
import os
import subprocess
import sys
import tempfile
import time
from getpass import getuser


def test_env():
def test_env(capfd):
"""
Validate that you can define environment variables

Expand Down Expand Up @@ -42,32 +43,28 @@ def test_env():
# value
"--env",
"SPAM_2=",
# "--",
tmpdir,
"--",
"/bin/bash",
"-c",
# Docker exports all passed env variables, so we can
# just look at exported variables.
"export; sleep 1",
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removing this sleep is what verifies that this is (probably) actually fixing the problem.

# "export; echo TIMDONE",
# "export",
"export",
],
universal_newlines=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
)
captured = capfd.readouterr()
print(captured.out, end="")
print(captured.err, file=sys.stderr, end="")

assert result.returncode == 0

# all docker output is returned by repo2docker on stderr
# extract just the declare for better failure message formatting
# stdout should be empty
assert not result.stdout

print(result.stderr.split("\n"))
# assert False

# stderr should contain lines of output
declares = [x for x in result.stderr.split("\n") if x.startswith("declare")]
declares = [x for x in captured.err.splitlines() if x.startswith("declare")]
assert 'declare -x FOO="{}"'.format(ts) in declares
assert 'declare -x BAR="baz"' in declares
assert 'declare -x SPAM="eggs"' in declares
Expand Down