-
Notifications
You must be signed in to change notification settings - Fork 463
/
Dockerfile.neuronx
164 lines (140 loc) · 5.92 KB
/
Dockerfile.neuronx
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
FROM ubuntu:20.04
LABEL maintainer="Amazon AI"
#SDK 1.17.1 has version 1. We skipped 1.18.0.
LABEL dlc_major_version="1"
# Specify accept-bind-to-port LABEL for inference pipelines to use SAGEMAKER_BIND_TO_PORT
# https://docs.aws.amazon.com/sagemaker/latest/dg/inference-pipeline-real-time.html
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true
ARG PYTHON=python3.10
ARG PYTHON_PIP=python3-pip
ARG PIP=pip3
ARG PYTHON_VERSION=3.10.12
ARG TFS_SHORT_VERSION=2.10
# Neuron SDK components version numbers
ARG NEURONX_RUNTIME_LIB_VERSION=2.20.*
ARG NEURONX_COLLECTIVES_LIB_VERSION=2.20.*
ARG NEURONX_TOOLS_VERSION=2.17.*
ARG NEURONX_FRAMEWORK_VERSION=2.10.1.2.1.*
ARG NEURONX_TF_MODEL_SERVER_VERSION=2.10.1.2.10.2.*
ARG NEURONX_CC_VERSION=2.12.*
ARG NEURONX_TENSORBOARD_PLUGIN_VERSION=2.6.1.*
# See http://bugs.python.org/issue19846
ENV LANG=C.UTF-8
# Python won’t try to write .pyc or .pyo files on the import of source modules
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV SAGEMAKER_TFS_VERSION="${TFS_SHORT_VERSION}"
ENV PATH="/opt/aws/neuron/bin:$PATH:/sagemaker"
ENV LD_LIBRARY_PATH='/opt/aws/neuron/lib:/usr/local/lib:$LD_LIBRARY_PATH'
ENV MODEL_BASE_PATH=/models
# The only required piece is the model name in order to differentiate endpoints
ENV MODEL_NAME=model
ENV DEBIAN_FRONTEND=noninteractive
# nginx + njs
RUN apt-get update \
&& apt-get upgrade -y \
&& apt-get -y upgrade --only-upgrade systemd \
&& apt-get -y install --no-install-recommends \
curl \
gnupg2 \
ca-certificates \
emacs \
git \
unzip \
wget \
vim \
libbz2-dev \
liblzma-dev \
libffi-dev \
build-essential \
zlib1g-dev \
openssl \
libssl1.1 \
libreadline-gplv2-dev \
libncursesw5-dev \
libssl-dev \
libsqlite3-dev \
tk-dev \
libgdbm-dev \
libcap-dev \
libc6-dev \
&& curl -s http://nginx.org/keys/nginx_signing.key | apt-key add - \
&& echo 'deb http://nginx.org/packages/ubuntu/ focal nginx' >> /etc/apt/sources.list \
&& apt-get update \
&& apt-get -y install --no-install-recommends \
nginx=1.20.1* \
nginx-module-njs=1.20.1* \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Install python3.10
RUN wget https://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz \
&& tar -xvf Python-$PYTHON_VERSION.tgz \
&& cd Python-$PYTHON_VERSION \
&& ./configure && make && make install \
&& rm -rf ../Python-$PYTHON_VERSION* \
&& rm -rf /tmp/tmp*
RUN echo "deb https://apt.repos.neuron.amazonaws.com focal main" > /etc/apt/sources.list.d/neuron.list
RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add -
RUN apt-get update
RUN apt-get install -y \
tensorflow-model-server-neuronx=${NEURONX_TF_MODEL_SERVER_VERSION} \
aws-neuronx-tools=${NEURONX_TOOLS_VERSION} \
aws-neuronx-collectives=${NEURONX_COLLECTIVES_LIB_VERSION} \
aws-neuronx-runtime-lib=${NEURONX_RUNTIME_LIB_VERSION} \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
RUN ${PIP} --no-cache-dir install --upgrade \
pip \
setuptools
# cython, falcon, gunicorn, grpc
RUN ${PIP} install --no-cache-dir \
"awscli<2" \
boto3 \
cython==0.29.* \
falcon==2.* \
gunicorn==20.1.* \
"gevent>=23.9.0" \
requests \
grpcio==1.56.2 \
"urllib3<1.27,>=1.25.4" \
"protobuf<4" \
# using --no-dependencies to avoid installing tensorflow binary
&& ${PIP} install --no-dependencies --no-cache-dir \
tensorflow-serving-api==2.10.1
# pip install statements have been separated out into multiple sequentially executed statements to
# resolve package dependencies during installation.
RUN ${PIP} install neuronx-cc==${NEURONX_CC_VERSION} tensorflow-neuronx==${NEURONX_FRAMEWORK_VERSION} --extra-index-url https://pip.repos.neuron.amazonaws.com \
&& ${PIP} install tensorboard-plugin-neuronx==${NEURONX_TENSORBOARD_PLUGIN_VERSION} --extra-index-url https://pip.repos.neuron.amazonaws.com
COPY ./sagemaker /sagemaker
# Some TF tools expect a "python" binary
RUN ln -s $(which ${PYTHON}) /usr/local/bin/python \
&& ln -s $(which ${PIP}) /usr/bin/pip
RUN curl https://tensorflow-aws.s3-us-west-2.amazonaws.com/MKL-Libraries/libiomp5.so -o /usr/local/lib/libiomp5.so
RUN curl https://tensorflow-aws.s3-us-west-2.amazonaws.com/MKL-Libraries/libmklml_intel.so -o /usr/local/lib/libmklml_intel.so
# Expose ports
# gRPC and REST
EXPOSE 8500 8501
# Set where models should be stored in the container
RUN mkdir -p ${MODEL_BASE_PATH}
# Create a script that runs the model server so we can use environment variables
# while also passing in arguments from the docker command line
RUN echo '#!/bin/bash \n\n' > /usr/local/bin/entrypoint.sh \
&& echo '/usr/local/bin/tensorflow_model_server_neuron --port=8500 --rest_api_port=8501 --model_name=${MODEL_NAME} --model_base_path=${MODEL_BASE_PATH}/${MODEL_NAME} "$@"' >> /usr/local/bin/entrypoint.sh \
&& chmod +x /usr/local/bin/entrypoint.sh
COPY deep_learning_container.py /usr/local/bin/deep_learning_container.py
RUN chmod +x /usr/local/bin/deep_learning_container.py
RUN HOME_DIR=/root \
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
&& chmod +x /usr/local/bin/testOSSCompliance \
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} ${PYTHON} \
&& rm -rf ${HOME_DIR}/oss_compliance*
RUN curl https://aws-dlc-licenses.s3.amazonaws.com/tensorflow-$TFS_SHORT_VERSION/license.txt -o /license.txt
COPY neuron-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
COPY neuron-monitor.sh /usr/local/bin/neuron-monitor.sh
RUN chmod +x /usr/local/bin/dockerd-entrypoint.py \
&& chmod +x /usr/local/bin/neuron-monitor.sh
ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]
CMD ["/usr/local/bin/entrypoint.sh"]