Skip to content

Commit

Permalink
Merge pull request #18 from OpenGenenerativeAI/add-hf-inference-endpoint
Browse files Browse the repository at this point in the history
Add hf inference endpoint
  • Loading branch information
StanGirard authored Jul 28, 2023
2 parents e238743 + 24e3a43 commit fd60f05
Show file tree
Hide file tree
Showing 14 changed files with 174 additions and 62 deletions.
6 changes: 6 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ repos:
language: system
types: [python]
stages: [commit]
- id: isort
name: Ordering imports (isort)
entry: isort
language: system
types: [python]
stages: [commit]
- id: ruff
name: Linter (ruff)
entry: ruff
Expand Down
1 change: 1 addition & 0 deletions demo/.env_example
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
HUGGINGFACEHUB_API_TOKEN=<your token>
OPENAI_API_KEY=<your token>
CUSTOM_HF_ENDPOINT_URL=<your url>
Empty file added demo/constants/__init__.py
Empty file.
50 changes: 50 additions & 0 deletions demo/constants/model_configs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import openai
from pydantic import BaseModel, SecretStr

from demo.constants.paths import GENOSS_URL
from demo.constants.settings import SETTINGS


class ModelConfig(BaseModel):
display_name: str
model_name: str
api_key: SecretStr
endpoint_url: str

def configure_open_ai_module(self) -> None:
openai.api_key = self.api_key.get_secret_value()
openai.api_base = self.endpoint_url


AVAILABLE_MODELS = [
ModelConfig(
display_name="OpenAI-GPT-4",
model_name="gpt-4",
api_key=SETTINGS.openai_api_key,
endpoint_url=openai.api_base,
),
ModelConfig(
display_name="OpenAI-GPT-4 (through Genoss)",
model_name="gpt-4",
api_key=SETTINGS.openai_api_key,
endpoint_url=GENOSS_URL,
),
ModelConfig(
display_name="hf-gpt2",
model_name="hf-gpt2",
api_key=SETTINGS.huggingfacehub_api_token,
endpoint_url=GENOSS_URL,
),
ModelConfig(
display_name="hf-llama2",
model_name="hf-llama2",
api_key=SETTINGS.huggingfacehub_api_token,
endpoint_url=GENOSS_URL,
),
ModelConfig(
display_name="hf-custom/llama",
model_name=f"hf-inference-endpoint/{SETTINGS.custom_hf_endpoint_url}",
api_key=SETTINGS.huggingfacehub_api_token,
endpoint_url=GENOSS_URL,
),
]
4 changes: 4 additions & 0 deletions demo/constants/paths.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from pathlib import Path

ROOT_FOLDER = Path(__file__).parent.parent.parent
GENOSS_URL = "http://localhost:4321"
15 changes: 15 additions & 0 deletions demo/constants/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from pydantic import BaseSettings, HttpUrl, SecretStr

from demo.constants.paths import ROOT_FOLDER


class Settings(BaseSettings):
class Config:
env_file = ROOT_FOLDER / "demo" / ".env"

huggingfacehub_api_token: SecretStr
openai_api_key: SecretStr
custom_hf_endpoint_url: HttpUrl


SETTINGS = Settings()
54 changes: 24 additions & 30 deletions demo/main.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,35 @@
import os
"""Streamlit app for Genoss demo.
Start from project root with :
```bash
PYTHONPATH=. streamlit run demo/main.py
```
Don't forget to set .env variables before running the app.
"""

import openai
import streamlit as st
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()
api_key = None
# Get API keys from environment variables
huggingface_api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")
openai_api_key = os.getenv("OPENAI_API_KEY")
from demo.constants.model_configs import AVAILABLE_MODELS, ModelConfig
from demo.constants.paths import ROOT_FOLDER

st.set_page_config(
"Genoss Demo",
layout="wide",
initial_sidebar_state="expanded",
page_icon=str(ROOT_FOLDER / "doc/assets/logo.png"),
)


with st.sidebar:
model_name = st.selectbox(
selected_model: ModelConfig = st.selectbox(
"Chat API Endpoint",
options=["gpt-4", "hf-gpt2", "hf-llama2"],
options=AVAILABLE_MODELS,
index=0,
format_func=lambda model: model.display_name,
)
selected_model.configure_open_ai_module()

genoss_endpoint = "http://localhost:4321"

st.title("🐂🌈 Genoss")
if "messages" not in st.session_state:
Expand All @@ -34,23 +45,9 @@
st.chat_message("user").write(prompt)
msg = ""

# Use the user-provided API key if available,
# otherwise use the API key from the .env file
api_key = (
api_key
if api_key
else (huggingface_api_key if model_name.startswith("hf") else openai_api_key)
)
if api_key == "" or api_key is None:
st.error("Please provide an API key")
st.stop()

openai.api_key = api_key
openai.api_base = genoss_endpoint

try:
response = openai.ChatCompletion.create(
model=model_name,
model=selected_model.model_name,
messages=st.session_state.messages,
)
msg = response.choices[0].message
Expand All @@ -61,7 +58,4 @@
st.empty()

st.session_state.messages.append(msg)
try:
st.chat_message("assistant").write(msg["content"])
except Exception as e:
st.error(f"Error: {e}, {msg}")
st.chat_message("assistant").write(msg["content"])
6 changes: 2 additions & 4 deletions genoss/api/embeddings_routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,10 @@ async def post_embeddings(
model: str,
input: str,
) -> list[float]:
gpt = None
if model == "gpt4all":
gpt = Gpt4AllLLM(name="gpt4all")
else:
raise NotImplementedError("Model can not be anything else than gpt4all.")

if gpt is None:
return [0.0, 0.0, 0.0]
response = gpt.generate_embedding(input)

return response
2 changes: 2 additions & 0 deletions genoss/entities/chat/chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from genoss.entities.chat.message import Message


# TODO: why is this nested classes ?
# TODO: why don't we use a pydantic ?
class ChatCompletion:
class Choice:
def __init__(
Expand Down
17 changes: 3 additions & 14 deletions genoss/llm/hf_hub/base_hf_hub.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from abc import ABC
from typing import Any

from fastapi import HTTPException
from langchain import HuggingFaceHub, LLMChain
from pydantic import Field

from genoss.entities.chat.chat_completion import ChatCompletion
from genoss.llm.base_genoss import BaseGenossLLM
Expand All @@ -14,22 +12,13 @@ class BaseHuggingFaceHubLLM(BaseGenossLLM, ABC):
"""Class for interacting with Hugging Face Inference APIs."""

# Sub classes must define these
huggingfacehub_api_token: str | None = Field(None)
repo_id: str | None = None

def __init__(self, api_key: str | None, *args: Any, **kwargs: Any):
super().__init__(*args, **kwargs)

if api_key is None:
# TODO: is this the right way to make it fail?
raise HTTPException(status_code=403, detail="API key missing")

self.huggingfacehub_api_token = api_key
api_key: str | None = None
repo_id: str

def generate_answer(self, question: str) -> dict[str, Any]:
"""Generate answer from prompt."""
llm = HuggingFaceHub(
repo_id=self.repo_id, huggingfacehub_api_token=self.huggingfacehub_api_token
repo_id=self.repo_id, huggingfacehub_api_token=self.api_key
)
llm_chain = LLMChain(prompt=prompt_template, llm=llm)

Expand Down
52 changes: 52 additions & 0 deletions genoss/llm/hf_inference_endpoint/hf_inference_endpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
from abc import ABC
from typing import Any, Literal
from unittest import mock

from langchain import LLMChain
from langchain.llms import HuggingFaceEndpoint

from genoss.entities.chat.chat_completion import ChatCompletion
from genoss.llm.base_genoss import BaseGenossLLM
from genoss.prompts.prompt_template import prompt_template


class HuggingFaceInferenceEndpointLLM(BaseGenossLLM, ABC):
"""Class for interacting with Hugging Face Inference APIs."""

# Subclasses must define these
name = "HF Inference Endpoint"
api_key: str | None = None
endpoint_url: str
description: str = "Hugging Face Inference API custom endpoint."
task: Literal[
"text-generation", "text-generation", "summarization"
] = "text-generation"

@mock.patch(
"huggingface_hub.inference_api.INFERENCE_ENDPOINT", "http://0.0.0.0:8080"
)
def generate_answer(self, question: str) -> dict[str, Any]:
"""Generate answer from prompt."""
llm = HuggingFaceEndpoint(
endpoint_url=self.endpoint_url,
huggingfacehub_api_token=self.api_key,
task=self.task,
)
llm_chain = LLMChain(prompt=prompt_template, llm=llm)

response_text = llm_chain(question)

answer = response_text["text"]

chat_completion = ChatCompletion(
model=self.name, question=question, answer=answer
)

return chat_completion.to_dict()

def generate_embedding(self, text: str) -> list[float]:
"""Dummy method to satisfy base class requirement."""
# TODO: why is this necessary? Architecture issue?
raise NotImplementedError(
"This method is not used for Hugging Face Inference API."
)
14 changes: 2 additions & 12 deletions genoss/llm/openai/openai_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,20 +16,10 @@ class OpenAILLM(BaseGenossLLM):
name: str = "openai"
description: str = "OpenAI LLM"
model_name: str = Field("gpt-3.5-turbo", description="OpenAI model name")
openai_api_key: str | None = Field(None)

def __init__(self, model_name: str, api_key: str | None, *args: Any, **kwargs: Any):
super().__init__(*args, **kwargs)

if api_key is None:
raise ValueError("API key missing")

self.openai_api_key = api_key
self.model_name = model_name
api_key: str

def generate_answer(self, question: str) -> dict[str, Any]:

llm = ChatOpenAI(model_name=self.model_name, openai_api_key=self.openai_api_key)
llm = ChatOpenAI(model_name=self.model_name, openai_api_key=self.api_key)

llm_chain = LLMChain(llm=llm, prompt=prompt_template)
response_text = llm_chain(question)
Expand Down
10 changes: 9 additions & 1 deletion genoss/services/model_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
from genoss.llm.hf_hub.falcon import HuggingFaceHubFalconLLM
from genoss.llm.hf_hub.gpt2 import HuggingFaceHubGPT2LLM
from genoss.llm.hf_hub.llama2 import HuggingFaceHubLlama2LLM
from genoss.llm.hf_inference_endpoint.hf_inference_endpoint import (
HuggingFaceInferenceEndpointLLM,
)
from genoss.llm.local.gpt4all import Gpt4AllLLM
from genoss.llm.openai.openai_llm import OpenAILLM

Expand All @@ -24,6 +27,11 @@ def get_model_from_name(
return HuggingFaceHubGPT2LLM(api_key=api_key)
if name.lower().startswith("hf-falcon"):
return HuggingFaceHubFalconLLM(api_key=api_key)
elif name == FAKE_LLM_NAME:
if name == FAKE_LLM_NAME:
return FakeLLM()
if name.lower().startswith("hf-inference-endpoint/"):
endpoint_url = name.split("/", maxsplit=1)[1]
return HuggingFaceInferenceEndpointLLM(
api_key=api_key, endpoint_url=endpoint_url
)
return None
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ skip_empty = true
fail_under = 50.00
precision = 1

## black
## prettier
[tool.isort]
profile = "black"

[tool.black]
target-version = ['py311']
Expand Down Expand Up @@ -95,6 +97,7 @@ ignore = [
"D101",
"D102",
"D103",
"D104",
"D106",
"D107",
]
Expand Down

0 comments on commit fd60f05

Please sign in to comment.