datarobot · elatt · Oct 9, 2024 · Sep 11, 2024 · Oct 8, 2024 · Oct 8, 2024
diff --git a/custom_model_runner/datarobot_drum/drum/gpu_predictors/base.py b/custom_model_runner/datarobot_drum/drum/gpu_predictors/base.py
@@ -83,6 +83,12 @@ def __init__(self):
         except ImportError:
             raise DrumCommonException("OpenAI Python SDK is not installed")
 
+    def _supports_chat(self):
+        return True
+
+    def _chat(self, completion_create_params):
+        return self.ai_client.chat.completions.create(**completion_create_params)
+
     def has_read_input_data_hook(self):
         return False
 

diff --git a/custom_model_runner/datarobot_drum/drum/language_predictors/base_language_predictor.py b/custom_model_runner/datarobot_drum/drum/language_predictors/base_language_predictor.py
@@ -143,12 +143,21 @@ def _init_mlops(self):
         self._mlops.init()
 
     def _configure_mlops_for_chat(self):
-        self._mlops.set_channel_config("spooler_type=API")
+        # If monitor_settings were provided (e.g. for testing) use them, otherwise we will
+        # use the API spooler as the default config.
+        if self._params.get("monitor_settings"):
+            self._mlops.set_channel_config(self._params["monitor_settings"])
+        else:
+            self._mlops.set_api_spooler(
+                # TODO: when 10.2.0 has been released...
+                # mlops_service_url=self._params["external_webserver_url"],
+                # mlops_api_token=self._params["api_token"],
+            )
 
-        self._prompt_column_name = self._get_prompt_column_name()
+        self._prompt_column_name = self.get_prompt_column_name()
         logger.debug("Prompt column name: %s", self._prompt_column_name)
 
-    def _get_prompt_column_name(self):
+    def get_prompt_column_name(self):
         if not self._params.get("deployment_id", None):
             logger.error(
                 "No deployment ID found while configuring mlops for chat. "
@@ -241,7 +250,7 @@ def chat(self, completion_create_params):
         else:
 
             def generator():
-                message_content = ""
+                message_content = []
                 try:
                     for chunk in response:
                         message_content += (
@@ -255,7 +264,7 @@ def generator():
                     raise
 
                 self._mlops_report_chat_prediction(
-                    completion_create_params, start_time, message_content
+                    completion_create_params, start_time, "".join(message_content)
                 )
 
             return generator()

diff --git a/custom_model_runner/datarobot_drum/resource/drum_server_utils.py b/custom_model_runner/datarobot_drum/resource/drum_server_utils.py
@@ -78,6 +78,7 @@ def __init__(
         fail_on_shutdown_error=True,
         pass_args_as_env_vars=False,
         verbose: bool = True,
+        logging_level: Optional[str] = None,
         append_cmd: Optional[str] = None,
         user_secrets_mount_path: Optional[str] = None,
         thread_class=Thread,
@@ -100,6 +101,7 @@ def __init__(
         self._with_nginx = nginx
         self._fail_on_shutdown_error = fail_on_shutdown_error
         self._verbose = verbose
+        self._log_level = logging_level or logging.getLevelName(logging.root.level).lower()
 
         self._pass_args_as_env_vars = pass_args_as_env_vars
         self._custom_model_dir = custom_model_dir
@@ -175,8 +177,7 @@ def server_thread(self):
         return self._server_thread
 
     def get_command(self):
-        log_level = logging.getLevelName(logging.root.level).lower()
-        cmd = "{} server --logging-level={}".format(ArgumentsOptions.MAIN_COMMAND, log_level)
+        cmd = f"{ArgumentsOptions.MAIN_COMMAND} server --logging-level={self._log_level}"
 
         if self._pass_args_as_env_vars:
             os.environ[ArgumentOptionsEnvVars.CODE_DIR] = str(self._custom_model_dir)

diff --git a/public_dropin_gpu_environments/nim_llm/dr_requirements.in b/public_dropin_gpu_environments/nim_llm/dr_requirements.in
@@ -1,8 +1,5 @@
-psutil
-boto3
-openai
-awscli
 datarobot
 datarobot-mlops
 datarobot-mlops-connected-client
 datarobot-drum
+openai>=1.17.0
diff --git a/public_dropin_gpu_environments/nim_llm/dr_requirements.txt b/public_dropin_gpu_environments/nim_llm/dr_requirements.txt
@@ -2,7 +2,7 @@
 # This file is autogenerated by pip-compile with Python 3.9
 # by the following command:
 #
-#    pip-compile dr_requirements.in
+#    pip-compile --no-emit-index-url --no-emit-trusted-host dr_requirements.in
 #
 aiohappyeyeballs==2.3.5
     # via aiohttp
@@ -22,15 +22,6 @@ async-timeout==4.0.3
     # via aiohttp
 attrs==23.2.0
     # via aiohttp
-awscli==1.32.65
-    # via -r dr_requirements.in
-boto3==1.34.65
-    # via -r dr_requirements.in
-botocore==1.34.65
-    # via
-    #   awscli
-    #   boto3
-    #   s3transfer
 certifi==2024.7.4
     # via
     #   httpcore
@@ -40,8 +31,6 @@ charset-normalizer==3.3.2
     # via requests
 click==8.1.7
     # via flask
-colorama==0.4.4
-    # via awscli
 datarobot==3.3.1
     # via
     #   -r dr_requirements.in
@@ -58,8 +47,6 @@ distro==1.9.0
     # via openai
 docker==4.4.4
     # via datarobot-drum
-docutils==0.16
-    # via awscli
 exceptiongroup==1.2.0
     # via anyio
 flask==2.2.5
@@ -95,10 +82,6 @@ jinja2==3.1.4
     # via
     #   datarobot-drum
     #   flask
-jmespath==1.0.1
-    # via
-    #   boto3
-    #   botocore
 julia==0.5.7
     # via datarobot-drum
 markupsafe==2.1.3
@@ -123,7 +106,7 @@ numpy==1.26.4
     #   pandas
     #   pyarrow
     #   scipy
-openai==1.14.1
+openai==1.39.0
     # via -r dr_requirements.in
 orjson==3.9.15
     # via datarobot-mlops
@@ -141,7 +124,6 @@ progress==1.6
     # via datarobot-drum
 psutil==5.9.8
     # via
-    #   -r dr_requirements.in
     #   memory-profiler
     #   mlpiper
 py4j==0.10.9.7
@@ -151,23 +133,19 @@ py4j==0.10.9.7
     #   mlpiper
 pyarrow==14.0.1
     # via datarobot-drum
-pyasn1==0.5.1
-    # via rsa
 pydantic==2.6.4
     # via openai
 pydantic-core==2.16.3
     # via pydantic
 python-dateutil==2.9.0.post0
     # via
-    #   botocore
     #   datarobot-mlops
     #   pandas
     #   strictyaml
 pytz==2024.1
     # via pandas
 pyyaml==6.0.1
     # via
-    #   awscli
     #   datarobot
     #   datarobot-drum
     #   datarobot-mlops
@@ -182,16 +160,10 @@ requests-toolbelt==1.0.0
     # via
     #   datarobot
     #   datarobot-mlops-connected-client
-rsa==4.7.2
-    # via awscli
 ruamel-yaml==0.17.4
     # via strictyaml
 ruamel-yaml-clib==0.2.8
     # via ruamel-yaml
-s3transfer==0.10.1
-    # via
-    #   awscli
-    #   boto3
 scipy==1.12.0
     # via datarobot-drum
 six==1.16.0
@@ -230,7 +202,6 @@ tzdata==2024.1
     # via pandas
 urllib3==1.26.19
     # via
-    #   botocore
     #   datarobot
     #   requests
 websocket-client==1.7.0

diff --git a/public_dropin_gpu_environments/vllm/Dockerfile b/public_dropin_gpu_environments/vllm/Dockerfile
@@ -1,14 +1,10 @@
-FROM vllm/vllm-openai:v0.5.4
+FROM vllm/vllm-openai:v0.5.5
 USER root
 RUN apt-get update && apt-get install -y \
     python3-pip \
     python3-venv \
-    zstd \
   && rm -rf /var/lib/apt/lists/*
 
-# Add support for Quantization: https://docs.vllm.ai/en/latest/quantization/auto_awq.html
-RUN pip install --no-cache-dir autoawq
-
 # Don't send any telemetry data (vLLM or HuggingFace libraries)
 ENV DO_NOT_TRACK=1
 
@@ -22,6 +18,12 @@ COPY dr_requirements.txt dr_requirements.txt
 RUN $DATAROBOT_VENV_PATH/bin/pip install -r dr_requirements.txt
 
 ENV CODE_DIR=/opt/code ADDRESS=0.0.0.0:8080
+
+# Make sure these cache dirs are writable by the vLLM process
+ENV HF_HOME=$CODE_DIR/.cache/huggingface
+ENV NUMBA_CACHE_DIR=$CODE_DIR/.cache/numba
+ENV OUTLINES_CACHE_DIR=$CODE_DIR/.cache/outlines
+
 WORKDIR ${CODE_DIR}
 COPY --chown=1000:1000 ./*.sh ./*.py ${CODE_DIR}/
 

diff --git a/public_dropin_gpu_environments/vllm/dr_requirements.in b/public_dropin_gpu_environments/vllm/dr_requirements.in
@@ -2,5 +2,4 @@ datarobot
 datarobot-mlops
 datarobot-mlops-connected-client
 datarobot-drum
-openai
-uwsgi
+openai>=1.17.0
diff --git a/public_dropin_gpu_environments/vllm/dr_requirements.txt b/public_dropin_gpu_environments/vllm/dr_requirements.txt
@@ -2,7 +2,7 @@
 # This file is autogenerated by pip-compile with Python 3.9
 # by the following command:
 #
-#    pip-compile dr_requirements.in
+#    pip-compile --no-emit-index-url --no-emit-trusted-host dr_requirements.in
 #
 aiohappyeyeballs==2.3.5
     # via aiohttp
@@ -204,8 +204,6 @@ urllib3==1.26.19
     # via
     #   datarobot
     #   requests
-uwsgi==2.0.25.1
-    # via -r dr_requirements.in
 websocket-client==1.7.0
     # via docker
 werkzeug==3.0.3

diff --git a/tests/functional/run_integration_tests_in_framework_container.sh b/tests/functional/run_integration_tests_in_framework_container.sh
@@ -6,7 +6,7 @@
 #
 # Released under the terms of DataRobot Tool and Utility Agreement.
 
-SKIP_SECRETS="AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|NGC_CLI_API_KEY|HF_TOKEN"
+SKIP_SECRETS="AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|NGC_CLI_API_KEY|NGC_API_KEY|HF_TOKEN"
 
 echo "--- env ----"
 export | grep -Ev $SKIP_SECRETS