From 423fc50ecfe7adbd8cdee468ac74fd2929a8f595 Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Mon, 20 May 2024 16:35:54 -0500 Subject: [PATCH 1/3] Add Stream API Example --- src/litserve/examples/simple_example.py | 34 +++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/litserve/examples/simple_example.py b/src/litserve/examples/simple_example.py index 3d664df8..84cbec64 100644 --- a/src/litserve/examples/simple_example.py +++ b/src/litserve/examples/simple_example.py @@ -68,3 +68,37 @@ def predict(self, x): def encode_response(self, output): # float will take the output value directly onto CPU memory return {"output": float(output)} + + +class SimpleStreamAPI(ls.LitAPI): + """ + Run as: + ``` + server = ls.LitServer(SimpleStreamAPI(), stream=True) + server.run(port=8000) + ``` + + Then, in a new Python session, retrieve the responses as follows: + ``` + import requests + url = "http://127.0.0.1:8000/predict" + + resp = requests.post(url, json={"input": "1, 2, 3]"}, headers=None, stream=True) + for line in resp.iter_content(5000): + if line: + print(line.decode("utf-8")) + ``` + """ + def setup(self, device) -> None: + self.model = lambda x: str(x) + + def decode_request(self, request): + return request["input"] + + def predict(self, x): + for i in range(10): + yield self.model(i) + + def encode_response(self, output_stream): + for output in output_stream: + yield {"output": output} From 8b17115363bb465e287393b1d9efa853857c9f75 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 20 May 2024 21:36:28 +0000 Subject: [PATCH 2/3] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- README.md | 6 +++--- src/litserve/examples/simple_example.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 4dbb2f01..98efe46e 100644 --- a/README.md +++ b/README.md @@ -8,9 +8,9 @@ **High-throughput serving engine for AI models**
-✅ Batching       ✅ Streaming          ✅ Auto-GPU, multi-GPU 
-✅ Multi-modal    ✅ PyTorch/JAX/TF     ✅ Full control        
-✅ Auth           ✅ Built on Fast API                         
+✅ Batching       ✅ Streaming          ✅ Auto-GPU, multi-GPU
+✅ Multi-modal    ✅ PyTorch/JAX/TF     ✅ Full control
+✅ Auth           ✅ Built on Fast API
 
diff --git a/src/litserve/examples/simple_example.py b/src/litserve/examples/simple_example.py index 84cbec64..2fa0d76d 100644 --- a/src/litserve/examples/simple_example.py +++ b/src/litserve/examples/simple_example.py @@ -89,6 +89,7 @@ class SimpleStreamAPI(ls.LitAPI): print(line.decode("utf-8")) ``` """ + def setup(self, device) -> None: self.model = lambda x: str(x) From d13f4b707986c925bd43c40f7a96883632c72fbe Mon Sep 17 00:00:00 2001 From: Sebastian Raschka Date: Mon, 20 May 2024 17:09:39 -0500 Subject: [PATCH 3/3] Modify example so it uses the requests.post input --- src/litserve/examples/simple_example.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/litserve/examples/simple_example.py b/src/litserve/examples/simple_example.py index 2fa0d76d..26867db3 100644 --- a/src/litserve/examples/simple_example.py +++ b/src/litserve/examples/simple_example.py @@ -77,13 +77,11 @@ class SimpleStreamAPI(ls.LitAPI): server = ls.LitServer(SimpleStreamAPI(), stream=True) server.run(port=8000) ``` - Then, in a new Python session, retrieve the responses as follows: ``` import requests url = "http://127.0.0.1:8000/predict" - - resp = requests.post(url, json={"input": "1, 2, 3]"}, headers=None, stream=True) + resp = requests.post(url, json={"input": "Hello world"}, headers=None, stream=True) for line in resp.iter_content(5000): if line: print(line.decode("utf-8")) @@ -91,14 +89,14 @@ class SimpleStreamAPI(ls.LitAPI): """ def setup(self, device) -> None: - self.model = lambda x: str(x) + self.model = lambda x, y: f"{x}: {y}" def decode_request(self, request): return request["input"] def predict(self, x): - for i in range(10): - yield self.model(i) + for i in range(3): + yield self.model(i, x.encode("utf-8").decode()) def encode_response(self, output_stream): for output in output_stream: