Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pull request/1064 #1069

Merged
merged 8 commits into from
Sep 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 13 additions & 13 deletions docker/dockerfile.merlin
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@ ARG TRITON_VERSION=23.06
ARG DLFW_VERSION=23.06

ARG FULL_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3
ARG SDK_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3-sdk
ARG BASE_IMAGE=nvcr.io/nvidia/tritonserver:${TRITON_VERSION}-py3-min
ARG DLFW_IMAGE=nvcr.io/nvidia/tensorflow:${TRITON_VERSION}-tf2-py3

FROM ${FULL_IMAGE} as triton
FROM ${SDK_IMAGE} as sdk
FROM ${DLFW_IMAGE} as dlfw
FROM ${BASE_IMAGE} as build

Expand Down Expand Up @@ -118,8 +120,9 @@ COPY --chown=1000:1000 --from=triton /opt/tritonserver/lib lib/
COPY --chown=1000:1000 --from=triton /opt/tritonserver/include include/
COPY --chown=1000:1000 --from=triton /opt/tritonserver/repoagents/ repoagents/
COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/python backends/
# NOTE 2023-07: fil-backend is not available on ARM.
COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/fil* backends/fil/
# NOTE 2023-09: fil-backend is not available on ARM. Some docker versions flag an error if there is
# not a single source file to copy. To avoid this, we als specify a small dummy file.
COPY --chown=1000:1000 --from=triton /opt/tritonserver/LICENSE /opt/tritonserver/backends/fil/* backends/fil/
COPY --chown=1000:1000 --from=triton /usr/bin/serve /usr/bin/.

ENV PATH=/opt/tritonserver/bin:${PATH}:
Expand Down Expand Up @@ -187,11 +190,12 @@ RUN ARCH=$([ "${TARGETARCH}" = "arm64" ] && echo "sbsa" || echo "x86_64") && \
python3 \
python3-pip \
python3-dev \
python3-libnvinfer \
rapidjson-dev \
tree \
wget \
zlib1g-dev \
# Required to build RocksDB and RdKafka..
# Required to build RocksDB and RdKafka.
libgflags-dev \
libbz2-dev \
libsnappy-dev \
Expand All @@ -208,11 +212,6 @@ RUN ARCH=$([ "${TARGETARCH}" = "arm64" ] && echo "sbsa" || echo "x86_64") && \
openssh-server \
# [ HugeCTR ]
libaio-dev && \
# NOTE: libnvinfer is installed anyway, just Python bindings are missing on ARM.
if [[ "$TARGETARCH" != "arm64" ]]; then \
# TensorRT dependencies
apt install -y --no-install-recommends python3-libnvinfer \
; fi && \
apt autoremove -y && \
apt clean && \
rm -rf /var/lib/apt/lists/*
Expand All @@ -225,7 +224,7 @@ ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${JAVA_HOME}/lib:${JAVA_HOME}/lib/server
# Binaries
COPY --chown=1000:1000 --from=build /usr/local/bin/cmake /usr/local/bin/
COPY --chown=1000:1000 --from=build /usr/local/bin/pytest /usr/local/bin/
COPY --chown=1000:1000 --from=build /usr/local/bin/perf_* /usr/local/bin/
COPY --chown=1000:1000 --from=sdk /usr/local/bin/perf_* /usr/local/bin/

# Triton Server
WORKDIR /opt/tritonserver
Expand All @@ -237,8 +236,9 @@ COPY --chown=1000:1000 --from=triton /opt/tritonserver/lib lib/
COPY --chown=1000:1000 --from=triton /opt/tritonserver/include include/
COPY --chown=1000:1000 --from=triton /opt/tritonserver/repoagents/ repoagents/
COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/python backends/python/
# NOTE 2023-07: fil-backend is not available on ARM.
COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/fil* backends/fil/
# NOTE 2023-09: fil-backend is not available on ARM. Some docker versions flag an error if there is
# not a single source file to copy. To avoid this, we als specify a small dummy file.
COPY --chown=1000:1000 --from=triton /opt/tritonserver/LICENSE /opt/tritonserver/backends/fil/* backends/fil/
COPY --chown=1000:1000 --from=triton /opt/tritonserver/backends/tensorrt backends/tensorrt/
COPY --chown=1000:1000 --from=triton /usr/bin/serve /usr/bin/.
COPY --chown=1000:1000 --from=triton /usr/lib/*-linux-gnu/libdcgm.so.2 /tmp
Expand Down Expand Up @@ -362,7 +362,7 @@ ENV PATH=${PATH}:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin \
YARN_NODEMANAGER_USER=root \
# Tackles with ThreadReaper stack overflow issues: https://bugs.openjdk.java.net/browse/JDK-8153057
LIBHDFS_OPTS='-Djdk.lang.processReaperUseDefaultStackSize=true' \
# Tackles with JVM setting error signals that UCX library will check (GitLab issue #425).
# Tackles with JVM setting error signals that the UCX library checks (GitLab issue #425).
UCX_ERROR_SIGNALS='' \
CLASSPATH=${CLASSPATH}:\
${HADOOP_HOME}/etc/hadoop/*:\
Expand All @@ -389,7 +389,7 @@ ENV PATH=$PATH:${HUGECTR_HOME}/bin \
LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${HUGECTR_HOME}/lib

RUN if [ "${HUGECTR_DEV_MODE}" == "false" ]; then \
# Install HugeCTR inference which is dependency for hps_backenc
# Install HugeCTR inference which is dependency for hps_backend
git clone --branch ${HUGECTR_VER} --depth 1 https://${_CI_JOB_TOKEN}${_HUGECTR_REPO} /hugectr && \
cd /hugectr && \
git submodule update --init --recursive && \
Expand Down
4 changes: 3 additions & 1 deletion docker/dockerfile.tf
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ ARG _CI_JOB_TOKEN=""
ARG HUGECTR_VER=main

ENV LD_LIBRARY_PATH=/usr/local/lib/python${PYTHON_VERSION}/dist-packages/tensorflow:$LD_LIBRARY_PATH \
LIBRARY_PATH=${HUGECTR_HOME}/lib:$LIBRARY_PATH \
SOK_COMPILE_UNIT_TEST=ON

RUN mkdir -p /usr/local/nvidia/lib64 && \
Expand All @@ -55,6 +54,9 @@ ARG INSTALL_DISTRIBUTED_EMBEDDINGS=false
ARG TFDE_VER=v23.03.00

RUN if [ "$HUGECTR_DEV_MODE" == "false" ]; then \
export HUGECTR_HOME=/usr/local/hugectr && \
rm -rf ${HUGECTR_HOME}/lib/libgmock* ${HUGECTR_HOME}/lib/pkgconfig/gmock* ${HUGECTR_HOME}/include/gmock && \
rm -rf ${HUGECTR_HOME}/lib/libgtest* ${HUGECTR_HOME}/lib/pkgconfig/gtest* ${HUGECTR_HOME}/include/gtest && \
git clone --branch ${HUGECTR_VER} --depth 1 --recurse-submodules --shallow-submodules https://${_CI_JOB_TOKEN}${_HUGECTR_REPO} /hugectr && \
pushd /hugectr && \
rm -rf .git/modules && \
Expand Down
Loading