Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement chat in BaseOpenAiGpuPredictor #1122

Merged
merged 26 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,12 @@ def __init__(self):
except ImportError:
raise DrumCommonException("OpenAI Python SDK is not installed")

def _supports_chat(self):
return True

def _chat(self, completion_create_params):
return self.ai_client.chat.completions.create(**completion_create_params)

elatt marked this conversation as resolved.
Show resolved Hide resolved
def has_read_input_data_hook(self):
return False

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,12 +143,21 @@ def _init_mlops(self):
self._mlops.init()

def _configure_mlops_for_chat(self):
self._mlops.set_channel_config("spooler_type=API")
# If monitor_settings were provided (e.g. for testing) use them, otherwise we will
# use the API spooler as the default config.
if self._params.get("monitor_settings"):
self._mlops.set_channel_config(self._params["monitor_settings"])
else:
self._mlops.set_api_spooler(
# TODO: when 10.2.0 has been released...
# mlops_service_url=self._params["external_webserver_url"],
# mlops_api_token=self._params["api_token"],
)

self._prompt_column_name = self._get_prompt_column_name()
self._prompt_column_name = self.get_prompt_column_name()
logger.debug("Prompt column name: %s", self._prompt_column_name)

def _get_prompt_column_name(self):
def get_prompt_column_name(self):
baekdahl marked this conversation as resolved.
Show resolved Hide resolved
if not self._params.get("deployment_id", None):
logger.error(
"No deployment ID found while configuring mlops for chat. "
Expand Down Expand Up @@ -241,7 +250,7 @@ def chat(self, completion_create_params):
else:

def generator():
message_content = ""
message_content = []
baekdahl marked this conversation as resolved.
Show resolved Hide resolved
try:
for chunk in response:
message_content += (
Expand All @@ -255,7 +264,7 @@ def generator():
raise

self._mlops_report_chat_prediction(
completion_create_params, start_time, message_content
completion_create_params, start_time, "".join(message_content)
)

return generator()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ def __init__(
fail_on_shutdown_error=True,
pass_args_as_env_vars=False,
verbose: bool = True,
logging_level: Optional[str] = None,
append_cmd: Optional[str] = None,
user_secrets_mount_path: Optional[str] = None,
thread_class=Thread,
Expand All @@ -100,6 +101,7 @@ def __init__(
self._with_nginx = nginx
self._fail_on_shutdown_error = fail_on_shutdown_error
self._verbose = verbose
self._log_level = logging_level or logging.getLevelName(logging.root.level).lower()

self._pass_args_as_env_vars = pass_args_as_env_vars
self._custom_model_dir = custom_model_dir
Expand Down Expand Up @@ -175,8 +177,7 @@ def server_thread(self):
return self._server_thread

def get_command(self):
log_level = logging.getLevelName(logging.root.level).lower()
cmd = "{} server --logging-level={}".format(ArgumentsOptions.MAIN_COMMAND, log_level)
cmd = f"{ArgumentsOptions.MAIN_COMMAND} server --logging-level={self._log_level}"

if self._pass_args_as_env_vars:
os.environ[ArgumentOptionsEnvVars.CODE_DIR] = str(self._custom_model_dir)
Expand Down
5 changes: 1 addition & 4 deletions public_dropin_gpu_environments/nim_llm/dr_requirements.in
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
psutil
boto3
openai
awscli
datarobot
datarobot-mlops
datarobot-mlops-connected-client
datarobot-drum
openai>=1.17.0
33 changes: 2 additions & 31 deletions public_dropin_gpu_environments/nim_llm/dr_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile dr_requirements.in
# pip-compile --no-emit-index-url --no-emit-trusted-host dr_requirements.in
#
aiohappyeyeballs==2.3.5
# via aiohttp
Expand All @@ -22,15 +22,6 @@ async-timeout==4.0.3
# via aiohttp
attrs==23.2.0
# via aiohttp
awscli==1.32.65
# via -r dr_requirements.in
boto3==1.34.65
# via -r dr_requirements.in
botocore==1.34.65
# via
# awscli
# boto3
# s3transfer
certifi==2024.7.4
# via
# httpcore
Expand All @@ -40,8 +31,6 @@ charset-normalizer==3.3.2
# via requests
click==8.1.7
# via flask
colorama==0.4.4
# via awscli
datarobot==3.3.1
# via
# -r dr_requirements.in
Expand All @@ -58,8 +47,6 @@ distro==1.9.0
# via openai
docker==4.4.4
# via datarobot-drum
docutils==0.16
# via awscli
exceptiongroup==1.2.0
# via anyio
flask==2.2.5
Expand Down Expand Up @@ -95,10 +82,6 @@ jinja2==3.1.4
# via
# datarobot-drum
# flask
jmespath==1.0.1
# via
# boto3
# botocore
julia==0.5.7
# via datarobot-drum
markupsafe==2.1.3
Expand All @@ -123,7 +106,7 @@ numpy==1.26.4
# pandas
# pyarrow
# scipy
openai==1.14.1
openai==1.39.0
# via -r dr_requirements.in
orjson==3.9.15
# via datarobot-mlops
Expand All @@ -141,7 +124,6 @@ progress==1.6
# via datarobot-drum
psutil==5.9.8
# via
# -r dr_requirements.in
# memory-profiler
# mlpiper
py4j==0.10.9.7
Expand All @@ -151,23 +133,19 @@ py4j==0.10.9.7
# mlpiper
pyarrow==14.0.1
# via datarobot-drum
pyasn1==0.5.1
# via rsa
pydantic==2.6.4
# via openai
pydantic-core==2.16.3
# via pydantic
python-dateutil==2.9.0.post0
# via
# botocore
# datarobot-mlops
# pandas
# strictyaml
pytz==2024.1
# via pandas
pyyaml==6.0.1
# via
# awscli
# datarobot
# datarobot-drum
# datarobot-mlops
Expand All @@ -182,16 +160,10 @@ requests-toolbelt==1.0.0
# via
# datarobot
# datarobot-mlops-connected-client
rsa==4.7.2
# via awscli
ruamel-yaml==0.17.4
# via strictyaml
ruamel-yaml-clib==0.2.8
# via ruamel-yaml
s3transfer==0.10.1
# via
# awscli
# boto3
scipy==1.12.0
# via datarobot-drum
six==1.16.0
Expand Down Expand Up @@ -230,7 +202,6 @@ tzdata==2024.1
# via pandas
urllib3==1.26.19
# via
# botocore
# datarobot
# requests
websocket-client==1.7.0
Expand Down
12 changes: 7 additions & 5 deletions public_dropin_gpu_environments/vllm/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
FROM vllm/vllm-openai:v0.5.4
FROM vllm/vllm-openai:v0.5.5
USER root
RUN apt-get update && apt-get install -y \
python3-pip \
python3-venv \
zstd \
&& rm -rf /var/lib/apt/lists/*

# Add support for Quantization: https://docs.vllm.ai/en/latest/quantization/auto_awq.html
RUN pip install --no-cache-dir autoawq

# Don't send any telemetry data (vLLM or HuggingFace libraries)
ENV DO_NOT_TRACK=1

Expand All @@ -22,6 +18,12 @@ COPY dr_requirements.txt dr_requirements.txt
RUN $DATAROBOT_VENV_PATH/bin/pip install -r dr_requirements.txt

ENV CODE_DIR=/opt/code ADDRESS=0.0.0.0:8080

# Make sure these cache dirs are writable by the vLLM process
ENV HF_HOME=$CODE_DIR/.cache/huggingface
ENV NUMBA_CACHE_DIR=$CODE_DIR/.cache/numba
ENV OUTLINES_CACHE_DIR=$CODE_DIR/.cache/outlines

WORKDIR ${CODE_DIR}
COPY --chown=1000:1000 ./*.sh ./*.py ${CODE_DIR}/

Expand Down
3 changes: 1 addition & 2 deletions public_dropin_gpu_environments/vllm/dr_requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,4 @@ datarobot
datarobot-mlops
datarobot-mlops-connected-client
datarobot-drum
openai
uwsgi
openai>=1.17.0
4 changes: 1 addition & 3 deletions public_dropin_gpu_environments/vllm/dr_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# This file is autogenerated by pip-compile with Python 3.9
# by the following command:
#
# pip-compile dr_requirements.in
# pip-compile --no-emit-index-url --no-emit-trusted-host dr_requirements.in
#
aiohappyeyeballs==2.3.5
# via aiohttp
Expand Down Expand Up @@ -204,8 +204,6 @@ urllib3==1.26.19
# via
# datarobot
# requests
uwsgi==2.0.25.1
# via -r dr_requirements.in
websocket-client==1.7.0
# via docker
werkzeug==3.0.3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#
# Released under the terms of DataRobot Tool and Utility Agreement.

SKIP_SECRETS="AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|NGC_CLI_API_KEY|HF_TOKEN"
SKIP_SECRETS="AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|NGC_CLI_API_KEY|NGC_API_KEY|HF_TOKEN"

echo "--- env ----"
export | grep -Ev $SKIP_SECRETS
Expand Down
Loading