Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support huggingface snapshot locations in pyfunc log_model artifacts parameter #9362

Merged
merged 20 commits into from
Aug 24, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions examples/gateway/mlflow_serving/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -197,22 +197,22 @@ class MPT(mlflow.pyfunc.PythonModel):
def load_context(self, context):
"""
This method initializes the tokenizer and language model
using the specified model repository.
using the specified model snapshot directory.
"""
# Initialize tokenizer and language model
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
context.artifacts["repository"], padding_side="left"
context.artifacts["snapshot"], padding_side="left"
)

config = transformers.AutoConfig.from_pretrained(
context.artifacts["repository"], trust_remote_code=True
context.artifacts["snapshot"], trust_remote_code=True
)
# Comment out this configuration setting if not running on a GPU or if triton is not installed.
# Note that triton dramatically improves the inference speed performance
config.attn_config["attn_impl"] = "triton"

self.model = transformers.AutoModelForCausalLM.from_pretrained(
context.artifacts["repository"],
context.artifacts["snapshot"],
config=config,
torch_dtype=torch.bfloat16,
trust_remote_code=True,
Expand Down Expand Up @@ -242,7 +242,7 @@ class MPT(mlflow.pyfunc.PythonModel):
{RESPONSE_KEY}
"""

def predict(self, context, model_input):
def predict(self, context, model_input, params=None):
"""
This method generates prediction for the given input.
"""
Expand Down Expand Up @@ -305,7 +305,7 @@ with mlflow.start_run():
mlflow.pyfunc.log_model(
"mpt-7b-instruct",
python_model=MPT(),
artifacts={"repository": snapshot_location},
artifacts={"snapshot": snapshot_location},
pip_requirements=[
"torch",
"transformers",
Expand Down
5 changes: 5 additions & 0 deletions mlflow/deployments/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,16 @@ def get_predictions(self, predictions_format="dataframe", dtype=None):
"""
import numpy as np
import pandas as pd
from pandas.core.dtypes.common import is_list_like

if predictions_format == "dataframe":
predictions = self["predictions"]
if isinstance(predictions, str):
return pd.DataFrame(data=[predictions])
if isinstance(predictions, dict) and not any(
is_list_like(p) and getattr(p, "ndim", 1) == 1 for p in predictions.values()
):
return pd.DataFrame(data=predictions, index=[0])
return pd.DataFrame(data=predictions)
elif predictions_format == "ndarray":
return np.array(self["predictions"], dtype)
Expand Down
32 changes: 18 additions & 14 deletions mlflow/pyfunc/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,11 +196,14 @@ def _save_model_with_class_artifacts_params(
:param python_model: An instance of a subclass of :class:`~PythonModel`. ``python_model``
defines how the model loads artifacts and how it performs inference.
:param artifacts: A dictionary containing ``<name, artifact_uri>`` entries.
Remote artifact URIs
are resolved to absolute filesystem paths, producing a dictionary of
``<name, absolute_path>`` entries. ``python_model`` can reference these
resolved entries as the ``artifacts`` property of the ``context``
attribute. If ``None``, no artifacts are added to the model.
Remote artifact URIs are resolved to absolute filesystem paths, producing
a dictionary of ``<name, absolute_path>`` entries,
e.g. {"file": "aboslute_path"}). ``python_model`` can reference these resolved
serena-ruan marked this conversation as resolved.
Show resolved Hide resolved
entries as the ``artifacts`` property of the ``context`` attribute. If
``<snapshot, snapshot_location>``
(e.g. {"snapshot": "absolute_snapshot_location"}) is provided, then the model
can be fetched from `snapshot_location` directly.
If ``None``, no artifacts are added to the model.
:param conda_env: Either a dictionary representation of a Conda environment or the
path to a Conda environment yaml file. If provided, this decsribes the
environment this model should be run in. At minimum, it should specify
Expand Down Expand Up @@ -228,17 +231,18 @@ def _save_model_with_class_artifacts_params(
if artifacts:
saved_artifacts_config = {}
with TempDir() as tmp_artifacts_dir:
tmp_artifacts_config = {}
saved_artifacts_dir_subpath = "artifacts"
harupy marked this conversation as resolved.
Show resolved Hide resolved
for artifact_name, artifact_uri in artifacts.items():
tmp_artifact_path = _download_artifact_from_uri(
artifact_uri=artifact_uri, output_path=tmp_artifacts_dir.path()
)
tmp_artifacts_config[artifact_name] = tmp_artifact_path
saved_artifact_subpath = posixpath.join(
saved_artifacts_dir_subpath,
os.path.relpath(path=tmp_artifact_path, start=tmp_artifacts_dir.path()),
)
if artifact_name == "snapshot":
serena-ruan marked this conversation as resolved.
Show resolved Hide resolved
saved_artifact_subpath = artifact_uri
serena-ruan marked this conversation as resolved.
Show resolved Hide resolved
else:
tmp_artifact_path = _download_artifact_from_uri(
artifact_uri=artifact_uri, output_path=tmp_artifacts_dir.path()
)
saved_artifact_subpath = posixpath.join(
saved_artifacts_dir_subpath,
os.path.relpath(path=tmp_artifact_path, start=tmp_artifacts_dir.path()),
)
saved_artifacts_config[artifact_name] = {
CONFIG_KEY_ARTIFACT_RELATIVE_PATH: saved_artifact_subpath,
CONFIG_KEY_ARTIFACT_URI: artifact_uri,
Expand Down
80 changes: 79 additions & 1 deletion tests/transformers/test_transformers_model_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import transformers
import huggingface_hub
from huggingface_hub import ModelCard, scan_cache_dir
from huggingface_hub import ModelCard, scan_cache_dir, snapshot_download
from datasets import load_dataset

import mlflow
Expand Down Expand Up @@ -3653,3 +3653,81 @@ def test_whisper_model_supports_timestamps(raw_audio_file, whisper_pipeline):
first_timestamp = prediction["chunks"][0]["timestamp"]
assert isinstance(first_timestamp, tuple)
assert prediction_inference["chunks"][0]["timestamp"][1] == first_timestamp[1]


def test_pyfunc_model_log_load_with_artifacts_snapshot(tmp_path):
snapshot_location = snapshot_download(
repo_id="prajjwal1/bert-tiny",
local_dir=tmp_path.joinpath("bert-tiny"),
# to avoid tmpdir OSError: [Errno 30] Read-only file system
local_dir_use_symlinks=False,
)

tokenizer = transformers.AutoTokenizer.from_pretrained(snapshot_location)
model = transformers.MobileBertForQuestionAnswering.from_pretrained(snapshot_location)
serena-ruan marked this conversation as resolved.
Show resolved Hide resolved
bert_tiny_pipeline = transformers.pipeline(
task="question-answering", model=model, tokenizer=tokenizer
)

class QAModel(mlflow.pyfunc.PythonModel):
def load_context(self, context):
"""
This method initializes the tokenizer and language model
using the specified snapshot location.
"""
# Initialize tokenizer and language model
tokenizer = transformers.AutoTokenizer.from_pretrained(context.artifacts["snapshot"])

model = transformers.MobileBertForQuestionAnswering.from_pretrained(
context.artifacts["snapshot"]
)

self.pipeline = transformers.pipeline(
task="question-answering", model=model, tokenizer=tokenizer
)

def predict(self, context, model_input, params=None):
question = model_input["question"][0]
if isinstance(question, np.ndarray):
question = question.item()
ctx = model_input["context"][0]
if isinstance(ctx, np.ndarray):
ctx = ctx.item()
return self.pipeline(question=question, context=ctx)

data = {"question": "Who's house?", "context": "The house is owned by Run."}
pyfunc_artifact_path = "question_answering_model"
with mlflow.start_run() as run:
model_info = mlflow.pyfunc.log_model(
artifact_path=pyfunc_artifact_path,
python_model=QAModel(),
artifacts={"snapshot": snapshot_location},
input_example=data,
signature=infer_signature(
data, mlflow.transformers.generate_signature_output(bert_tiny_pipeline, data)
),
)

pyfunc_model_uri = f"runs:/{run.info.run_id}/{pyfunc_artifact_path}"
assert model_info.model_uri == pyfunc_model_uri
pyfunc_model_path = _download_artifact_from_uri(
f"runs:/{run.info.run_id}/{pyfunc_artifact_path}"
)
assert len(os.listdir(os.path.join(pyfunc_model_path, "artifacts"))) == 0
model_config = Model.load(os.path.join(pyfunc_model_path, "MLmodel"))

loaded_pyfunc_model = mlflow.pyfunc.load_model(model_uri=pyfunc_model_uri)
assert model_config.to_yaml() == loaded_pyfunc_model.metadata.to_yaml()
assert loaded_pyfunc_model.predict(data)["answer"] != ""

# Test model serving
inference_payload = json.dumps({"inputs": data})
response = pyfunc_serve_and_score_model(
model_info.model_uri,
data=inference_payload,
content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
extra_args=["--env-manager", "local"],
)
values = PredictionsResponse.from_json(response.content.decode("utf-8")).get_predictions()

assert values.to_dict(orient="records")[0]["answer"] != ""