-
Notifications
You must be signed in to change notification settings - Fork 136
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add Ollama Support Signed-off-by: lvliang-intel <[email protected]>
- Loading branch information
1 parent
f37ce2c
commit a00e364
Showing
6 changed files
with
155 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
|
||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
FROM langchain/langchain:latest | ||
|
||
RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \ | ||
libgl1-mesa-glx \ | ||
libjemalloc-dev \ | ||
vim \ | ||
curl | ||
|
||
RUN useradd -m -s /bin/bash user && \ | ||
mkdir -p /home/user && \ | ||
chown -R user /home/user/ | ||
|
||
USER user | ||
|
||
COPY comps /home/user/comps | ||
|
||
RUN pip install --no-cache-dir --upgrade pip && \ | ||
pip install --no-cache-dir -r /home/user/comps/llms/text-generation/ollama/requirements.txt | ||
|
||
ENV PYTHONPATH=$PYTHONPATH:/home/user | ||
|
||
WORKDIR /home/user/comps/llms/text-generation/ollama | ||
|
||
ENTRYPOINT ["python", "llm.py"] |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
# Copyright (C) 2024 Intel Corporation | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
from fastapi.responses import StreamingResponse | ||
from langchain_community.llms import Ollama | ||
from langsmith import traceable | ||
|
||
from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice | ||
|
||
|
||
@register_microservice( | ||
name="opea_service@llm_ollama", | ||
service_type=ServiceType.LLM, | ||
endpoint="/v1/chat/completions", | ||
host="0.0.0.0", | ||
port=9000, | ||
) | ||
@traceable(run_type="llm") | ||
def llm_generate(input: LLMParamsDoc): | ||
ollama = Ollama( | ||
model="llama3", | ||
num_predict=input.max_new_tokens, | ||
top_k=input.top_k, | ||
top_p=input.top_p, | ||
temperature=input.temperature, | ||
repeat_penalty=input.repetition_penalty, | ||
) | ||
# assuming you have Ollama installed and have llama3 model pulled with `ollama pull llama3` | ||
if input.streaming: | ||
|
||
async def stream_generator(): | ||
chat_response = "" | ||
async for text in ollama.astream(input.query): | ||
chat_response += text | ||
chunk_repr = repr(text.encode("utf-8")) | ||
print(f"[llm - chat_stream] chunk:{chunk_repr}") | ||
yield f"data: {chunk_repr}\n\n" | ||
print(f"[llm - chat_stream] stream response: {chat_response}") | ||
yield "data: [DONE]\n\n" | ||
|
||
return StreamingResponse(stream_generator(), media_type="text/event-stream") | ||
else: | ||
response = ollama.invoke(input.query) | ||
return GeneratedDoc(text=response, prompt=input.query) | ||
|
||
|
||
if __name__ == "__main__": | ||
opea_microservices["opea_service@llm_ollma"].start() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
docarray[full] | ||
fastapi | ||
huggingface_hub | ||
langchain==0.1.16 | ||
langserve | ||
langsmith | ||
opentelemetry-api | ||
opentelemetry-exporter-otlp | ||
opentelemetry-sdk | ||
shortuuid | ||
transformers |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
# Introduction | ||
|
||
[Ollama](https://github.com/ollama/ollama) allows you to run open-source large language models, such as Llama 3, locally. Ollama bundles model weights, configuration, and data into a single package, defined by a Modelfile. Ollama is a lightweight, extensible framework for building and running language models on the local machine. It provides a simple API for creating, running, and managing models, as well as a library of pre-built models that can be easily used in a variety of applications. It's the best choice to deploy large language models on AIPC locally. | ||
|
||
# Get Started | ||
|
||
## Setup | ||
|
||
Follow [these instructions](https://github.com/ollama/ollama) to set up and run a local Ollama instance. | ||
|
||
- Download and install Ollama onto the available supported platforms (including Windows) | ||
- Fetch available LLM model via ollama pull <name-of-model>. View a list of available models via the model library and pull to use locally with the command `ollama pull llama3` | ||
- This will download the default tagged version of the model. Typically, the default points to the latest, smallest sized-parameter model. | ||
|
||
Note: | ||
Special settings are necessary to pull models behind the proxy. | ||
|
||
```bash | ||
sudo vim /etc/systemd/system/ollama.service | ||
``` | ||
|
||
Add your proxy to the above configure file. | ||
|
||
```markdown | ||
[Service] | ||
Environment="http_proxy=${your_proxy}" | ||
Environment="https_proxy=${your_proxy}" | ||
``` | ||
|
||
## Usage | ||
|
||
Here are a few ways to interact with pulled local models: | ||
|
||
### In the terminal | ||
|
||
All of your local models are automatically served on localhost:11434. Run ollama run <name-of-model> to start interacting via the command line directly. | ||
|
||
### API access | ||
|
||
Send an application/json request to the API endpoint of Ollama to interact. | ||
|
||
```bash | ||
curl http://localhost:11434/api/generate -d '{ | ||
"model": "llama3", | ||
"prompt":"Why is the sky blue?" | ||
}' | ||
``` | ||
|
||
# Build Docker Image | ||
|
||
```bash | ||
cd GenAIComps/ | ||
docker build --no-cache -t opea/llm-ollama:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/llms/text-generation/ollama/Dockerfile . | ||
``` | ||
|
||
# Run the Ollama Microservice | ||
|
||
```bash | ||
docker run --network host opea/llm-ollama:latest | ||
``` | ||
|
||
# Consume the Ollama Microservice | ||
|
||
```bash | ||
curl http://127.0.0.1:9000/v1/chat/completions -X POST -d '{"query":"What is Deep Learning?","max_new_tokens":32,"top_k":10,"top_p":0.95,"typical_p":0.95,"temperature":0.01,"repetition_penalty":1.03,"streaming":true}' -H 'Content-Type: application/json' | ||
``` |