mlflow · serena-ruan · Aug 24, 2023 · Aug 17, 2023 · Aug 18, 2023 · Aug 18, 2023
diff --git a/examples/gateway/mlflow_serving/README.md b/examples/gateway/mlflow_serving/README.md
@@ -197,22 +197,22 @@ class MPT(mlflow.pyfunc.PythonModel):
     def load_context(self, context):
         """
         This method initializes the tokenizer and language model
-        using the specified model repository.
+        using the specified model snapshot directory.
         """
         # Initialize tokenizer and language model
         self.tokenizer = transformers.AutoTokenizer.from_pretrained(
-            context.artifacts["repository"], padding_side="left"
+            context.artifacts["snapshot"], padding_side="left"
         )
 
         config = transformers.AutoConfig.from_pretrained(
-            context.artifacts["repository"], trust_remote_code=True
+            context.artifacts["snapshot"], trust_remote_code=True
         )
         # Comment out this configuration setting if not running on a GPU or if triton is not installed.
         # Note that triton dramatically improves the inference speed performance
         config.attn_config["attn_impl"] = "triton"
 
         self.model = transformers.AutoModelForCausalLM.from_pretrained(
-            context.artifacts["repository"],
+            context.artifacts["snapshot"],
             config=config,
             torch_dtype=torch.bfloat16,
             trust_remote_code=True,
@@ -242,7 +242,7 @@ class MPT(mlflow.pyfunc.PythonModel):
         {RESPONSE_KEY}
         """
 
-    def predict(self, context, model_input):
+    def predict(self, context, model_input, params=None):
         """
         This method generates prediction for the given input.
         """
@@ -305,7 +305,7 @@ with mlflow.start_run():
     mlflow.pyfunc.log_model(
         "mpt-7b-instruct",
         python_model=MPT(),
-        artifacts={"repository": snapshot_location},
+        artifacts={"snapshot": snapshot_location},
         pip_requirements=[
             "torch",
             "transformers",

diff --git a/mlflow/deployments/__init__.py b/mlflow/deployments/__init__.py
@@ -40,11 +40,16 @@ def get_predictions(self, predictions_format="dataframe", dtype=None):
         """
         import numpy as np
         import pandas as pd
+        from pandas.core.dtypes.common import is_list_like
 
         if predictions_format == "dataframe":
             predictions = self["predictions"]
             if isinstance(predictions, str):
                 return pd.DataFrame(data=[predictions])
+            if isinstance(predictions, dict) and not any(
+                is_list_like(p) and getattr(p, "ndim", 1) == 1 for p in predictions.values()
+            ):
+                return pd.DataFrame(data=predictions, index=[0])
             return pd.DataFrame(data=predictions)
         elif predictions_format == "ndarray":
             return np.array(self["predictions"], dtype)

diff --git a/mlflow/pyfunc/model.py b/mlflow/pyfunc/model.py
@@ -196,11 +196,14 @@ def _save_model_with_class_artifacts_params(
     :param python_model: An instance of a subclass of :class:`~PythonModel`. ``python_model``
                         defines how the model loads artifacts and how it performs inference.
     :param artifacts: A dictionary containing ``<name, artifact_uri>`` entries.
-                      Remote artifact URIs
-                      are resolved to absolute filesystem paths, producing a dictionary of
-                      ``<name, absolute_path>`` entries. ``python_model`` can reference these
-                      resolved entries as the ``artifacts`` property of the ``context``
-                      attribute. If ``None``, no artifacts are added to the model.
+                      Remote artifact URIs are resolved to absolute filesystem paths, producing
+                      a dictionary of ``<name, absolute_path>`` entries,
+                      e.g. {"file": "aboslute_path"}). ``python_model`` can reference these resolved
+                      entries as the ``artifacts`` property of the ``context`` attribute. If
+                      ``<snapshot, snapshot_location>``
+                      (e.g. {"snapshot": "absolute_snapshot_location"}) is provided, then the model
+                      can be fetched from `snapshot_location` directly.
+                      If ``None``, no artifacts are added to the model.
     :param conda_env: Either a dictionary representation of a Conda environment or the
                       path to a Conda environment yaml file. If provided, this decsribes the
                       environment this model should be run in. At minimum, it should specify
@@ -228,17 +231,18 @@ def _save_model_with_class_artifacts_params(
     if artifacts:
         saved_artifacts_config = {}
         with TempDir() as tmp_artifacts_dir:
-            tmp_artifacts_config = {}
             saved_artifacts_dir_subpath = "artifacts"
             for artifact_name, artifact_uri in artifacts.items():
-                tmp_artifact_path = _download_artifact_from_uri(
-                    artifact_uri=artifact_uri, output_path=tmp_artifacts_dir.path()
-                )
-                tmp_artifacts_config[artifact_name] = tmp_artifact_path
-                saved_artifact_subpath = posixpath.join(
-                    saved_artifacts_dir_subpath,
-                    os.path.relpath(path=tmp_artifact_path, start=tmp_artifacts_dir.path()),
-                )
+                if artifact_name == "snapshot":
+                    saved_artifact_subpath = artifact_uri
+                else:
+                    tmp_artifact_path = _download_artifact_from_uri(
+                        artifact_uri=artifact_uri, output_path=tmp_artifacts_dir.path()
+                    )
+                    saved_artifact_subpath = posixpath.join(
+                        saved_artifacts_dir_subpath,
+                        os.path.relpath(path=tmp_artifact_path, start=tmp_artifacts_dir.path()),
+                    )
                 saved_artifacts_config[artifact_name] = {
                     CONFIG_KEY_ARTIFACT_RELATIVE_PATH: saved_artifact_subpath,
                     CONFIG_KEY_ARTIFACT_URI: artifact_uri,

diff --git a/tests/transformers/test_transformers_model_export.py b/tests/transformers/test_transformers_model_export.py
@@ -19,7 +19,7 @@
 
 import transformers
 import huggingface_hub
-from huggingface_hub import ModelCard, scan_cache_dir
+from huggingface_hub import ModelCard, scan_cache_dir, snapshot_download
 from datasets import load_dataset
 
 import mlflow
@@ -3653,3 +3653,81 @@ def test_whisper_model_supports_timestamps(raw_audio_file, whisper_pipeline):
     first_timestamp = prediction["chunks"][0]["timestamp"]
     assert isinstance(first_timestamp, tuple)
     assert prediction_inference["chunks"][0]["timestamp"][1] == first_timestamp[1]
+
+
+def test_pyfunc_model_log_load_with_artifacts_snapshot(tmp_path):
+    snapshot_location = snapshot_download(
+        repo_id="prajjwal1/bert-tiny",
+        local_dir=tmp_path.joinpath("bert-tiny"),
+        # to avoid tmpdir OSError: [Errno 30] Read-only file system
+        local_dir_use_symlinks=False,
+    )
+
+    tokenizer = transformers.AutoTokenizer.from_pretrained(snapshot_location)
+    model = transformers.MobileBertForQuestionAnswering.from_pretrained(snapshot_location)
+    bert_tiny_pipeline = transformers.pipeline(
+        task="question-answering", model=model, tokenizer=tokenizer
+    )
+
+    class QAModel(mlflow.pyfunc.PythonModel):
+        def load_context(self, context):
+            """
+            This method initializes the tokenizer and language model
+            using the specified snapshot location.
+            """
+            # Initialize tokenizer and language model
+            tokenizer = transformers.AutoTokenizer.from_pretrained(context.artifacts["snapshot"])
+
+            model = transformers.MobileBertForQuestionAnswering.from_pretrained(
+                context.artifacts["snapshot"]
+            )
+
+            self.pipeline = transformers.pipeline(
+                task="question-answering", model=model, tokenizer=tokenizer
+            )
+
+        def predict(self, context, model_input, params=None):
+            question = model_input["question"][0]
+            if isinstance(question, np.ndarray):
+                question = question.item()
+            ctx = model_input["context"][0]
+            if isinstance(ctx, np.ndarray):
+                ctx = ctx.item()
+            return self.pipeline(question=question, context=ctx)
+
+    data = {"question": "Who's house?", "context": "The house is owned by Run."}
+    pyfunc_artifact_path = "question_answering_model"
+    with mlflow.start_run() as run:
+        model_info = mlflow.pyfunc.log_model(
+            artifact_path=pyfunc_artifact_path,
+            python_model=QAModel(),
+            artifacts={"snapshot": snapshot_location},
+            input_example=data,
+            signature=infer_signature(
+                data, mlflow.transformers.generate_signature_output(bert_tiny_pipeline, data)
+            ),
+        )
+
+        pyfunc_model_uri = f"runs:/{run.info.run_id}/{pyfunc_artifact_path}"
+        assert model_info.model_uri == pyfunc_model_uri
+        pyfunc_model_path = _download_artifact_from_uri(
+            f"runs:/{run.info.run_id}/{pyfunc_artifact_path}"
+        )
+        assert len(os.listdir(os.path.join(pyfunc_model_path, "artifacts"))) == 0
+        model_config = Model.load(os.path.join(pyfunc_model_path, "MLmodel"))
+
+    loaded_pyfunc_model = mlflow.pyfunc.load_model(model_uri=pyfunc_model_uri)
+    assert model_config.to_yaml() == loaded_pyfunc_model.metadata.to_yaml()
+    assert loaded_pyfunc_model.predict(data)["answer"] != ""
+
+    # Test model serving
+    inference_payload = json.dumps({"inputs": data})
+    response = pyfunc_serve_and_score_model(
+        model_info.model_uri,
+        data=inference_payload,
+        content_type=pyfunc_scoring_server.CONTENT_TYPE_JSON,
+        extra_args=["--env-manager", "local"],
+    )
+    values = PredictionsResponse.from_json(response.content.decode("utf-8")).get_predictions()
+
+    assert values.to_dict(orient="records")[0]["answer"] != ""