lm-sys · merrymercy · Feb 9, 2024 · Feb 6, 2024 · Feb 6, 2024
diff --git a/docs/mlx_integration.md b/docs/mlx_integration.md
@@ -13,11 +13,11 @@ Note that for Apple Silicon Macs with less memory, smaller models (or quantized
 1. Install MLX.
 
    ```
-   pip install mlx-lm
+   pip install "mlx-lm>=0.0.6"
    ```
 
-2. When you launch a model worker, replace the normal worker (`fastchat.serve.model_worker`) with the MLX worker (`fastchat.serve.mlx_worker`).
+2. When you launch a model worker, replace the normal worker (`fastchat.serve.model_worker`) with the MLX worker (`fastchat.serve.mlx_worker`). Remember to launch a model worker after you have launched the controller ([instructions](../README.md))
 
    ```
-   python3 -m fastchat.serve.mlx_worker --model-path microsoft/phi-2
+   python3 -m fastchat.serve.mlx_worker --model-path TinyLlama/TinyLlama-1.1B-Chat-v1.0
    ```
diff --git a/fastchat/serve/mlx_worker.py b/fastchat/serve/mlx_worker.py
@@ -124,7 +124,7 @@ async def generate_stream(self, params):
         )
 
         for i in range(max_new_tokens):
-            token = await run_in_threadpool(next, iterator)
+            (token, _) = await run_in_threadpool(next, iterator)
             if token == self.mlx_tokenizer.eos_token_id:
                 finish_reason = "stop"
                 break