-
Notifications
You must be signed in to change notification settings - Fork 92
/
Dockerfile
213 lines (192 loc) · 10 KB
/
Dockerfile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
# syntax=docker/dockerfile:1
# Dockerfile for base image of all pangeo images
FROM ubuntu:22.04
# build file for pangeo images
LABEL org.opencontainers.image.source=https://github.com/pangeo-data/pangeo-docker-images
# Setup environment to match variables set by repo2docker as much as possible
# The name of the conda environment into which the requested packages are installed
ENV CONDA_ENV=notebook \
# Tell apt-get to not block installs by asking for interactive human input
DEBIAN_FRONTEND=noninteractive \
# Set username, uid and gid (same as uid) of non-root user the container will be run as
NB_USER=jovyan \
NB_UID=1000 \
# Use /bin/bash as shell, not the default /bin/sh (arrow keys, etc don't work then)
SHELL=/bin/bash \
# Setup locale to be UTF-8, avoiding gnarly hard to debug encoding errors
LANG=C.UTF-8 \
LC_ALL=C.UTF-8 \
# Install conda in the same place repo2docker does
CONDA_DIR=/srv/conda
# All env vars that reference other env vars need to be in their own ENV block
# Path to the python environment where the jupyter notebook packages are installed
ENV NB_PYTHON_PREFIX=${CONDA_DIR}/envs/${CONDA_ENV} \
# Home directory of our non-root user
HOME=/home/${NB_USER}
# Add both our notebook env as well as default conda installation to $PATH
# Thus, when we start a `python` process (for kernels, or notebooks, etc),
# it loads the python in the notebook conda environment, as that comes
# first here.
ENV PATH=${NB_PYTHON_PREFIX}/bin:${CONDA_DIR}/bin:${PATH}
# Ask dask to read config from ${CONDA_DIR}/etc rather than
# the default of /etc, since the non-root jovyan user can write
# to ${CONDA_DIR}/etc but not to /etc
ENV DASK_ROOT_CONFIG=${CONDA_DIR}/etc
RUN echo "Creating ${NB_USER} user..." \
# Create a group for the user to be part of, with gid same as uid
&& groupadd --gid ${NB_UID} ${NB_USER} \
# Create non-root user, with given gid, uid and create $HOME
&& useradd --create-home --gid ${NB_UID} --no-log-init --uid ${NB_UID} ${NB_USER} \
# Make sure that /srv is owned by non-root user, so we can install things there
&& chown -R ${NB_USER}:${NB_USER} /srv
# Run conda activate each time a bash shell starts, so users don't have to manually type conda activate
# Note this is only read by shell, but not by the jupyter notebook - that relies
# on us starting the correct `python` process, which we do by adding the notebook conda environment's
# bin to PATH earlier ($NB_PYTHON_PREFIX/bin)
RUN echo ". ${CONDA_DIR}/etc/profile.d/conda.sh ; conda activate ${CONDA_ENV}" > /etc/profile.d/init_conda.sh
# Install basic apt packages
RUN echo "Installing Apt-get packages..." \
&& apt-get update --fix-missing > /dev/null \
&& apt-get install -y apt-utils wget zip tzdata > /dev/null \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Add TZ configuration - https://github.com/PrefectHQ/prefect/issues/3061
ENV TZ=UTC
# ========================
USER ${NB_USER}
WORKDIR ${HOME}
# Install latest mambaforge in ${CONDA_DIR}
RUN echo "Installing Miniforge..." \
&& URL="https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-$(uname -m).sh" \
&& wget --quiet ${URL} -O installer.sh \
&& /bin/bash installer.sh -u -b -p ${CONDA_DIR} \
&& rm installer.sh \
&& mamba install conda-lock -y \
&& mamba clean -afy \
# After installing the packages, we cleanup some unnecessary files
# to try reduce image size - see https://jcristharif.com/conda-docker-tips.html
# Although we explicitly do *not* delete .pyc files, as that seems to slow down startup
# quite a bit unfortunately - see https://github.com/2i2c-org/infrastructure/issues/2047
&& find ${CONDA_DIR} -follow -type f -name '*.a' -delete
EXPOSE 8888
ENTRYPOINT ["/srv/start"]
#CMD ["jupyter", "notebook", "--ip", "0.0.0.0"]
# We use ONBUILD (https://docs.docker.com/engine/reference/builder/#onbuild)
# to support triggering certain behavior when specific files exist in the directories of our
# child images (such as base-notebook, pangeo-notebook, etc). For example,
# in pangeo-notebook/Dockerfile, we *only* inherit from base-image:master, and
# that triggers all these ONBUILD directives - it is as if these ONBUILD
# directives are located inside pangeo-notebook/Dockerfile. This lets us
# keep the Dockerfiles for our child docker images simple, and customize
# them by just adding files with known names to them. This is
# to *mimic* the repo2docker behavior, where users can just add
# environment.yml, requirements.txt, apt.txt etc files to get certain
# behavior without having to understand how Dockerfiles work. We use
# ONBUILD to support a subset of the files that repo2docker supports.
# We do not use repo2docker itself here, to make the images much smaller
# and easier to reason about.
# ----------------------
ONBUILD USER root
# FIXME (?): user and home folder is hardcoded for now
# FIXME (?): this line breaks the cache of all steps below
ONBUILD COPY --chown=jovyan:jovyan . /home/jovyan
# repo2docker will load files from a .binder or binder directory if
# present. We check if those directories exist, and print a diagnostic
# message here.
ONBUILD RUN echo "Checking for 'binder' or '.binder' subfolder" \
; if [ -d binder ] ; then \
echo "Using 'binder/' build context" \
; elif [ -d .binder ] ; then \
echo "Using '.binder/' build context" \
; else \
echo "Using './' build context" \
; fi
# Install apt packages specified in a apt.txt file if it exists.
# Unlike repo2docker, blank lines nor comments are supported here.
ONBUILD RUN echo "Checking for 'apt.txt'..." \
; [ -d binder ] && cd binder \
; [ -d .binder ] && cd .binder \
; if test -f "apt.txt" ; then \
apt-get update --fix-missing > /dev/null \
# Read apt.txt line by line, and execute apt-get install -y for each line in apt.txt
&& xargs -a apt.txt apt-get install -y \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/* \
; fi
# If a jupyter_notebook_config.py exists, copy it to /etc/jupyter so
# it will be read by jupyter processes when they start. This feature is
# not available in repo2docker.
ONBUILD RUN echo "Checking for 'jupyter_notebook_config.py'..." \
; [ -d binder ] && cd binder \
; [ -d .binder ] && cd .binder \
; if test -f "jupyter_notebook_config.py" ; then \
mkdir -p /etc/jupyter \
&& cp jupyter_notebook_config.py /etc/jupyter \
; fi
ONBUILD USER ${NB_USER}
# We want to keep our images as reproducible as possible. If a lock
# file with exact versions of all required packages is present, we use
# it to install packages. conda-lock (https://github.com/conda-incubator/conda-lock)
# is used to generate this conda-lock.yml file from a given environment.yml
# file - so we get the exact same versions each time the image is built. Note that
# different packages may be used for different CPU architectures, but still,
# the same dockerfile can be used to build different architecture images. This
# also lets us see what packages have changed between two images by diffing
# the contents of the lock file between those image versions.
# If a lock file is not present, we use the environment.yml file. And
# if that is also not present, we use the pangeo-notebook conda-forge
# package (https://anaconda.org/conda-forge/pangeo-notebook) to install
# a list of base packages.
# After installing the packages, we cleanup some unnecessary files
# to try reduce image size - see https://jcristharif.com/conda-docker-tips.html
ONBUILD RUN echo "Checking for 'conda-lock.yml' or 'environment.yml'..." \
; [ -d binder ] && cd binder \
; [ -d .binder ] && cd .binder \
; if test -f "conda-lock.yml" ; then echo "Using conda-lock.yml" & \
conda-lock install --name ${CONDA_ENV} \
; elif test -f "environment.yml" ; then echo "Using environment.yml" & \
mamba env create --name ${CONDA_ENV} -f environment.yml \
; else echo "No conda-lock.yml or environment.yml! *creating default env*" ; \
mamba create --name ${CONDA_ENV} pangeo-notebook \
; fi \
&& mamba clean -yaf \
&& find ${CONDA_DIR} -follow -type f -name '*.a' -delete \
&& find ${CONDA_DIR} -follow -type f -name '*.js.map' -delete \
; if ls ${NB_PYTHON_PREFIX}/lib/python*/site-packages/bokeh/server/static > /dev/null 2>&1; then \
find ${NB_PYTHON_PREFIX}/lib/python*/site-packages/bokeh/server/static -follow -type f -name '*.js' ! -name '*.min.js' -delete \
; fi
# If a requirements.txt file exists, use pip to install packages
# listed there. We don't want to save cached wheels in the image
# to avoid wasting space.
ONBUILD RUN echo "Checking for pip 'requirements.txt'..." \
; [ -d binder ] && cd binder \
; [ -d .binder ] && cd .binder \
; if test -f "requirements.txt" ; then \
${NB_PYTHON_PREFIX}/bin/pip install --no-cache -r requirements.txt \
; fi
# If a postBuild file exists, run it!
# After it's done, we try to remove any possible cruft commands there
# leave behind under $HOME - particularly stuff that jupyterlab extensions
# leave behind.
ONBUILD RUN echo "Checking for 'postBuild'..." \
; [ -d binder ] && cd binder \
; [ -d .binder ] && cd .binder \
; if test -f "postBuild" ; then \
chmod +x postBuild \
&& ./postBuild \
&& rm -rf /tmp/* \
&& rm -rf ${HOME}/.cache ${HOME}/.npm ${HOME}/.yarn \
&& rm -rf ${NB_PYTHON_PREFIX}/share/jupyter/lab/staging \
&& find ${CONDA_DIR} -follow -type f -name '*.a' -delete \
&& find ${CONDA_DIR} -follow -type f -name '*.js.map' -delete \
; fi
# If a start file exists, put that under /srv/start. Used in the
# same way as a start file in repo2docker.
ONBUILD RUN echo "Checking for 'start'..." \
; [ -d binder ] && cd binder \
; [ -d .binder ] && cd .binder \
; if test -f "start" ; then \
chmod +x start \
&& cp start /srv/start \
; fi
# ----------------------