dstackai · Bihan · Nov 12, 2024 · peterschmidt85 · Nov 14, 2024
diff --git a/docs/examples.md b/docs/examples.md
@@ -13,6 +13,37 @@ hide:
 }
 </style>
 
+## Deployment
+<div class="tx-landing__highlights_grid">
+    <a href="/examples/deployment/vllm" 
+       class="feature-cell">
+       <h3>
+           vLLM
+       </h3>
+       <p>
+            Deploy Llama with vLLM
+        </p>
+    </a>
+    <a href="/examples/deployment/tgi" 
+       class="feature-cell">
+       <h3>
+           TGI
+       </h3>
+       <p>
+            Deploy Mistral with TGI
+        </p>
+    </a>
+    <a href="/examples/deployment/nim" 
+       class="feature-cell">
+       <h3>
+           NIM
+       </h3>
+       <p>
+            Deploy Llama3-8b-instruct with Nvidia NIM
+        </p>
+    </a>
+</div>
+
 ## Fine-tuning
 
 <div class="tx-landing__highlights_grid">
@@ -27,17 +58,6 @@ hide:
         </p>
     </a>
 
-    <a href="/examples/fine-tuning/alignment-handbook"
-       class="feature-cell">
-        <h3>
-            Alignment Handbook
-        </h3>
-
-        <p>
-            Fine-tune Gemma 7B on a custom dataset.
-        </p>
-    </a>
-
     <a href="/examples/fine-tuning/trl"
        class="feature-cell">
         <h3>

diff --git a/...s/fine-tuning/alignment-handbook/index.md → docs/examples/deployment/nim/index.md b/...s/fine-tuning/alignment-handbook/index.md → docs/examples/deployment/nim/index.md
diff --git a/docs/examples/deployment/tgi/index.md b/docs/examples/deployment/tgi/index.md
diff --git a/docs/examples/deployment/vllm/index.md b/docs/examples/deployment/vllm/index.md
diff --git a/docs/overrides/main.html b/docs/overrides/main.html
@@ -116,6 +116,7 @@
 
                 <div class="tx-footer__section">
                     <div class="tx-footer__section-title">Examples</div>
+                    <a href="/examples#deployment" class="tx-footer__section-link">Deployment</a>
                     <a href="/examples#fine-tuning" class="tx-footer__section-link">Fine-tuning</a>
                     <a href="/examples#accelerators" class="tx-footer__section-link">Accelerators</a>
                     <a href="/examples#llms" class="tx-footer__section-link">LLMs</a>

diff --git a/examples/deployment/tgi/README.md b/examples/deployment/tgi/README.md
@@ -1,23 +1,79 @@
 # Text Generation Inference
 
-## Service
+This example shows how to deploy `mistralai/Mistral-7B-Instruct-v0.2` with `dstack` using [TGI :material-arrow-top-right-thin:{ .external }](https://huggingface.co/docs/text-generation-inference/en/index)
 
-The following command deploys Llama 7B Instruct as a service:
+??? info "Prerequisites"
+    Once `dstack` is [installed](https://dstack.ai/docs/installation), go ahead clone the repo, and run `dstack init`.
 
-```shell
-dstack run . -f examples/deployment/tgi/serve.dstack.yml
+    <div class="termy">
+
+    ```shell
+    $ git clone https://github.com/dstackai/dstack
+    $ cd dstack
+    $ dstack init
+    ```
+
+    </div>
+
+## Deployment
+
+### Running as a task
+If you'd like to run mistralai/Mistral-7B-Instruct-v0.2 for development purposes, consider using `dstack` [tasks](https://dstack.ai/docs/tasks/).
+<div editor-title="examples/deployment/tgi/serve-task.dstack.yml">
+
+```yaml
+type: task
+# This task runs Llama 2 with TGI
+
+image: ghcr.io/huggingface/text-generation-inference:latest
+env:
+  - HF_TOKEN
+  - MODEL_ID=mistralai/Mistral-7B-Instruct-v0.2
+commands:
+  - text-generation-launcher --port 8000 --trust-remote-code
+ports:
+  - 8000
+
+resources:
+  gpu: 24GB
 ```
+</div>
+
+### Deploying as a service
 
-See the configuration at [serve.dstack.yml](serve.dstack.yml).
+If you'd like to deploy the model as an auto-scalable and secure endpoint,
+use the [service](https://dstack.ai/docs/services) configuration. You can find it at [`examples/deployment/tgi/serve.dstack.yml` :material-arrow-top-right-thin:{ .external }](https://github.com/dstackai/dstack/blob/master/examples/deployment/tgi/serve.dstack.yml)
 
-## Task
+### Running a configuration
 
-The following command runs Llama 7B Instruct as a task:
+To run a configuration, use the [`dstack apply`](https://dstack.ai/docs/reference/cli/index.md#dstack-apply) command. 
+
+<div class="termy">
 
 ```shell
-dstack run . -f examples/deployment/tgi/serve-task.dstack.yml
+$ HF_TOKEN=...
+$ dstack apply -f examples/deployment/tgi/serve-task.dstack.yml
+
+ #  BACKEND     REGION        RESOURCES                      SPOT  PRICE    
+ 1  tensordock  unitedstates  2xCPU, 10GB, 1xRTX3090 (24GB)  no    $0.231   
+ 2  tensordock  unitedstates  2xCPU, 10GB, 1xRTX3090 (24GB)  no    $0.242   
+ 3  tensordock  india         2xCPU, 38GB, 1xA5000 (24GB)    no    $0.283  
+
+Submit a new run? [y/n]: y
+
+Provisioning...
+---> 100%
 ```
+</div>
+
+## Source code
+
+The source-code of this example can be found in 
+[`examples/deployment/tgi` :material-arrow-top-right-thin:{ .external }](https://github.com/dstackai/dstack/blob/master/examples/deployment/tgi).
 
-See the configuration at [serve.dstack.yml](serve-task.dstack.yml).
+## What's next?
 
-For more details, refer to [services](https://dstack.ai/docs/services) or [tasks](https://dstack.ai/docs/tasks).
+1. Check [dev environments](https://dstack.ai/docs/dev-environments), [tasks](https://dstack.ai/docs/tasks), 
+   [services](https://dstack.ai/docs/services), and [protips](https://dstack.ai/docs/protips).
+2. Browse [Deployment on AMD :material-arrow-top-right-thin:{ .external }](https://dstack.ai/examples/accelerators/amd/) and
+   [Deployment on TPU :material-arrow-top-right-thin:{ .external }](https://dstack.ai/examples/accelerators/tpu/).
diff --git a/examples/deployment/vllm/README.md b/examples/deployment/vllm/README.md
@@ -1,23 +1,79 @@
 # vLLM
+This example shows how to deploy `NousResearch/Llama-2-7b-chat-hf` with `dstack` using [vLLM :material-arrow-top-right-thin:{ .external }](https://docs.vllm.ai/en/latest/)
 
-## Service
+??? info "Prerequisites"
+    Once `dstack` is [installed](https://dstack.ai/docs/installation), go ahead clone the repo, and run `dstack init`.
 
-The following command deploys Llama 7B Instruct as a service:
+    <div class="termy">
+
+    ```shell
+    $ git clone https://github.com/dstackai/dstack
+    $ cd dstack
+    $ dstack init
+    ```
+
+    </div>
 
-```shell
-dstack run . -f examples/deployment/vllm/serve.dstack.yml
+## Deployment
+
+### Running as a task
+If you'd like to run NousResearch/Llama-2-7b-chat-hf for development purposes, consider using `dstack` [tasks](https://dstack.ai/docs/tasks/).
+<div editor-title="examples/deployment/vllm/serve-task.dstack.yml">
+
+```yaml
+type: task
+# This task runs Llama 2 with vllm
+
+image: vllm/vllm-openai:latest
+env:
+  - MODEL=NousResearch/Llama-2-7b-chat-hf
+  - PYTHONPATH=/workspace
+commands:
+  - python3 -m vllm.entrypoints.openai.api_server --model $MODEL --port 8000
+ports:
+  - 8000
+
+resources:
+  gpu: 24GB
 ```
 
-See the configuration at [serve.dstack.yml](serve.dstack.yml).
+</div>
+
+### Deploying as a service
+
+If you'd like to deploy the model as an auto-scalable and secure endpoint,
+use the [service](https://dstack.ai/docs/services) configuration. You can find it at [`examples/deployment/vllm/serve.dstack.yml` :material-arrow-top-right-thin:{ .external }](https://github.com/dstackai/dstack/blob/master/examples/deployment/vllm/serve.dstack.yml)
 
-## Task
+### Running a configuration
 
-The following command runs Llama 7B Instruct as a task:
+To run a configuration, use the [`dstack apply`](https://dstack.ai/docs/reference/cli/index.md#dstack-apply) command. 
+
+<div class="termy">
 
 ```shell
-dstack run . -f examples/deployment/vllm/serve-task.dstack.yml
+$ dstack apply -f examples/deployment/vllm/serve-task.dstack.yml
+
+ #  BACKEND  REGION         INSTANCE         RESOURCES   SPOT  PRICE     
+ 1  cudo     ca-montreal-1  intel-broadwell  2xCPU, 8GB,  no   $0.0276   
+ 2  cudo     ca-montreal-2  intel-broadwell  2xCPU, 8GB,  no   $0.0286   
+ 3  cudo     fi-tampere-1   intel-broadwell  2xCPU, 8GB,  no   $0.0383  
+
+Submit a new run? [y/n]: y
+
+Provisioning...
+---> 100%
 ```
+</div>
+
+## Source code
+
+The source-code of this example can be found in 
+[`examples/deployment/vllm` :material-arrow-top-right-thin:{ .external }](https://github.com/dstackai/dstack/blob/master/examples/deployment/vllm).
 
-See the configuration at [serve.dstack.yml](serve-task.dstack.yml).
+## What's next?
 
-For more details, refer to [services](https://dstack.ai/docs/services) or [tasks](https://dstack.ai/docs/tasks).
+1. Check [dev environments](https://dstack.ai/docs/dev-environments), [tasks](https://dstack.ai/docs/tasks), 
+   [services](https://dstack.ai/docs/services), and [protips](https://dstack.ai/docs/protips).
+2. Browse [Deployment on AMD :material-arrow-top-right-thin:{ .external }](https://dstack.ai/examples/accelerators/amd/) and
+   [Deployment on TPU :material-arrow-top-right-thin:{ .external }](https://dstack.ai/examples/accelerators/tpu/).
+
diff --git a/examples/deployment/vllm/serve-task.dstack.yml b/examples/deployment/vllm/serve-task.dstack.yml
@@ -10,5 +10,5 @@ commands:
 ports:
   - 8000
 
-resources:
-  gpu: 24GB
+#resources:
+#  gpu: 24GB
diff --git a/mkdocs.yml b/mkdocs.yml
@@ -117,6 +117,9 @@ plugins:
         'docs/examples/llms/llama31.md': 'examples/llms/llama31/index.md'
         'docs/examples/llms/llama32.md': 'examples/llms/llama32/index.md'
         'docs/examples/accelerators/amd/index.md': 'examples/accelerators/amd/index.md'
+        'docs/examples/deployment/nim/index.md': 'examples/deployment/nim/index.md'
+        'docs/examples/deployment/vllm/index.md': 'examples/deployment/vllm/index.md'
+        'docs/examples/deployment/tgi/index.md': 'examples/deployment/tgi/index.md'
         'providers.md': 'partners.md'
         'backends.md': 'partners.md'
   - typeset
@@ -232,9 +235,12 @@ nav:
               - REST API: docs/reference/api/rest/index.md
   - Examples:
     - examples.md
+    - Deployment:
+        - vLLM: examples/deployment/vllm/index.md
+        - TGI:  examples/deployment/tgi/index.md
+        - NIM: examples/deployment/nim/index.md
     - Fine-tuning:
         - Axolotl: examples/fine-tuning/axolotl/index.md
-        - Alignment Handbook: examples/fine-tuning/alignment-handbook/index.md
         - TRL: examples/fine-tuning/trl/index.md
     - Accelerators:
         - AMD: examples/accelerators/amd/index.md