-
Notifications
You must be signed in to change notification settings - Fork 463
/
Dockerfile.neuron
183 lines (151 loc) · 6.34 KB
/
Dockerfile.neuron
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
FROM ubuntu:20.04
LABEL maintainer="Amazon AI"
LABEL dlc_major_version="1"
# Specify LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT
# https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
# Define framework-related package sources
ARG PYTHON=python3
ARG PYTHON_PIP=python3-pip
ARG PIP=pip3
ARG PYTHON_VERSION=3.8.16
ARG TFS_SHORT_VERSION=1.15.5
ARG TF_S3_URL=https://tensorflow-aws.s3-us-west-2.amazonaws.com
# Prevent docker build get stopped by requesting user interaction
ENV DEBIAN_FRONTEND=noninteractive
ENV DEBCONF_NONINTERACTIVE_SEEN=true
# Set environment variables for MKL
# https://www.tensorflow.org/performance/performance_guide#tensorflow_with_intel%C2%AE_mkl_dnn
ENV KMP_AFFINITY=granularity=fine,compact,1,0
ENV KMP_BLOCKTIME=1
ENV KMP_SETTINGS=0
# Python won’t try to write .pyc or .pyo files on the import of source modules
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
# See http://bugs.python.org/issue19846
ENV PYTHONIOENCODING=UTF-8
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8
# Specify the location of module that contains the training logic for SageMaker
# https://docs.aws.amazon.com/sagemaker/latest/dg/docker-container-environmental-variables-entrypoint.html
ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}"
ENV PATH="$PATH:/sagemaker"
ENV LD_LIBRARY_PATH='/usr/local/lib:$LD_LIBRARY_PATH'
ENV MODEL_BASE_PATH=/models
# The only required piece is the model name in order to differentiate endpoints
ENV MODEL_NAME=model
RUN apt-get update \
&& apt-get upgrade -y \
&& apt-get install -y --no-install-recommends \
build-essential \
ca-certificates \
curl \
emacs \
git \
gnupg2 \
libssl1.1 \
openssh-client \
openssh-server \
openssl \
vim \
wget \
unzip \
zlib1g-dev \
&& curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \
&& echo 'deb http://nginx.org/packages/ubuntu/ focal nginx' >> /etc/apt/sources.list \
&& apt-get update \
&& apt-get -y install --no-install-recommends \
nginx=1.20.1* \
nginx-module-njs=1.20.1* \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
# SSH login fix. Otherwise user is kicked off after login
RUN sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd
# Create SSH key.
RUN mkdir -p /root/.ssh/ \
&& mkdir -p /var/run/sshd \
&& ssh-keygen -q -t rsa -N '' -f /root/.ssh/id_rsa \
&& cp /root/.ssh/id_rsa.pub /root/.ssh/authorized_keys \
&& printf "Host *\n StrictHostKeyChecking no\n" >> /root/.ssh/config
WORKDIR /
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
libbz2-dev \
libc6-dev \
libffi-dev \
libgdbm-dev \
libncursesw5-dev \
libreadline-gplv2-dev \
libsqlite3-dev \
libssl-dev \
libcap-dev \
tk-dev \
python3-venv \
&& rm -rf /var/lib/apt/lists/* \
&& apt-get clean
RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \
&& tar -xvf Python-$PYTHON_VERSION.tgz \
&& cd Python-$PYTHON_VERSION \
&& ./configure && make && make install \
&& make && make install && rm -rf ../Python-$PYTHON_VERSION* \
&& rm -rf /tmp/tmp*
RUN echo "deb https://apt.repos.neuron.amazonaws.com focal main" > /etc/apt/sources.list.d/neuron.list
RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
RUN apt-get update
RUN apt-get install -y \
tensorflow-model-server-neuron=1.15.* \
aws-neuronx-tools
RUN ${PIP} --no-cache-dir install --upgrade \
pip \
setuptools
# Some TF tools expect a "python" binary
RUN ln -s $(which python3) /usr/local/bin/python \
&& ln -s $(which pip3) /usr/bin/pip
# cython, falcon, gunicorn, grpc
RUN ${PIP} install --no-cache-dir -U \
"awscli<2" \
"pyYAML>=5.4,<5.5" \
boto3 \
cython==0.29.21 \
falcon==2.0.0 \
gunicorn==20.0.4 \
gevent==21.1.1 \
requests==2.25.1 \
grpcio==1.56.2 \
protobuf==3.14.0
# pip install statements have been separated out into multiple sequentially executed statements to
# resolve package dependencies during installation.
RUN ${PIP} install tensorflow_serving_api==1.15 --no-deps --extra-index-url https://pip.repos.neuron.amazonaws.com \
&& ${PIP} install "tensorboard-neuron>=1.15,<1.16" --extra-index-url https://pip.repos.neuron.amazonaws.com \
&& ${PIP} install neuron-cc==1.13.* --extra-index-url https://pip.repos.neuron.amazonaws.com \
&& ${PIP} install tensorflow-neuron==1.15.5.2.6.* --extra-index-url https://pip.repos.neuron.amazonaws.com
COPY ./sagemaker /sagemaker
RUN curl ${TF_S3_URL}/MKL-Libraries/libiomp5.so -o /usr/local/lib/libiomp5.so \
&& curl ${TF_S3_URL}/MKL-Libraries/libmklml_intel.so -o /usr/local/lib/libmklml_intel.so
# Expose ports
# gRPC and REST
EXPOSE 8500 8501
# Set where models should be stored in the container
RUN mkdir -p ${MODEL_BASE_PATH}
# Create a script that runs the model server so we can use environment variables
# while also passing in arguments from the docker command line
RUN echo '#!/bin/bash \n\n' > /usr/local/bin/entrypoint.sh \
&& echo '/usr/local/bin/tensorflow_model_server_neuron --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/local/bin/entrypoint.sh \
&& chmod +x /usr/local/bin/entrypoint.sh
COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
RUN chmod +x /usr/local/bin/deep_learning_container.py
RUN HOME_DIR=/root \
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
&& chmod +x /usr/local/bin/testOSSCompliance \
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
&& rm -rf ${HOME_DIR}/oss_compliance*
RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow/license.txt -o /license.txt
COPY neuron-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
COPY neuron-monitor.sh /usr/local/bin/neuron-monitor.sh
RUN chmod +x /usr/local/bin/dockerd-entrypoint.py \
&& chmod +x /usr/local/bin/neuron-monitor.sh
ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
CMD ["/usr/local/bin/entrypoint.sh"]