Merge pull request #18 from OpenGenenerativeAI/add-hf-inference-endpoint

Add hf inference endpoint
theodo-group · Jul 28, 2023 · fd60f05 · fd60f05
2 parents e238743 + 24e3a43
commit fd60f05
Show file tree

Hide file tree

Showing 14 changed files with 174 additions and 62 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -21,6 +21,12 @@ repos:
     language: system
     types: [python]
     stages: [commit]
+  - id: isort
+    name: Ordering imports (isort)
+    entry: isort
+    language: system
+    types: [python]
+    stages: [commit]
   - id: ruff
     name: Linter (ruff)
     entry: ruff

diff --git a/demo/.env_example b/demo/.env_example
@@ -1,2 +1,3 @@
 HUGGINGFACEHUB_API_TOKEN=<your token>
 OPENAI_API_KEY=<your token>
+CUSTOM_HF_ENDPOINT_URL=<your url>
diff --git a/demo/constants/__init__.py b/demo/constants/__init__.py
diff --git a/demo/constants/model_configs.py b/demo/constants/model_configs.py
@@ -0,0 +1,50 @@
+import openai
+from pydantic import BaseModel, SecretStr
+
+from demo.constants.paths import GENOSS_URL
+from demo.constants.settings import SETTINGS
+
+
+class ModelConfig(BaseModel):
+    display_name: str
+    model_name: str
+    api_key: SecretStr
+    endpoint_url: str
+
+    def configure_open_ai_module(self) -> None:
+        openai.api_key = self.api_key.get_secret_value()
+        openai.api_base = self.endpoint_url
+
+
+AVAILABLE_MODELS = [
+    ModelConfig(
+        display_name="OpenAI-GPT-4",
+        model_name="gpt-4",
+        api_key=SETTINGS.openai_api_key,
+        endpoint_url=openai.api_base,
+    ),
+    ModelConfig(
+        display_name="OpenAI-GPT-4 (through Genoss)",
+        model_name="gpt-4",
+        api_key=SETTINGS.openai_api_key,
+        endpoint_url=GENOSS_URL,
+    ),
+    ModelConfig(
+        display_name="hf-gpt2",
+        model_name="hf-gpt2",
+        api_key=SETTINGS.huggingfacehub_api_token,
+        endpoint_url=GENOSS_URL,
+    ),
+    ModelConfig(
+        display_name="hf-llama2",
+        model_name="hf-llama2",
+        api_key=SETTINGS.huggingfacehub_api_token,
+        endpoint_url=GENOSS_URL,
+    ),
+    ModelConfig(
+        display_name="hf-custom/llama",
+        model_name=f"hf-inference-endpoint/{SETTINGS.custom_hf_endpoint_url}",
+        api_key=SETTINGS.huggingfacehub_api_token,
+        endpoint_url=GENOSS_URL,
+    ),
+]
diff --git a/demo/constants/paths.py b/demo/constants/paths.py
@@ -0,0 +1,4 @@
+from pathlib import Path
+
+ROOT_FOLDER = Path(__file__).parent.parent.parent
+GENOSS_URL = "http://localhost:4321"
diff --git a/demo/constants/settings.py b/demo/constants/settings.py
@@ -0,0 +1,15 @@
+from pydantic import BaseSettings, HttpUrl, SecretStr
+
+from demo.constants.paths import ROOT_FOLDER
+
+
+class Settings(BaseSettings):
+    class Config:
+        env_file = ROOT_FOLDER / "demo" / ".env"
+
+    huggingfacehub_api_token: SecretStr
+    openai_api_key: SecretStr
+    custom_hf_endpoint_url: HttpUrl
+
+
+SETTINGS = Settings()
diff --git a/demo/main.py b/demo/main.py
@@ -1,24 +1,35 @@
-import os
+"""Streamlit app for Genoss demo.
+
+Start from project root with :
+```bash
+PYTHONPATH=. streamlit run demo/main.py
+```
+Don't forget to set .env variables before running the app.
+"""
 
 import openai
 import streamlit as st
-from dotenv import load_dotenv
 
-# Load environment variables from .env file
-load_dotenv()
-api_key = None
-# Get API keys from environment variables
-huggingface_api_key = os.getenv("HUGGINGFACEHUB_API_TOKEN")
-openai_api_key = os.getenv("OPENAI_API_KEY")
+from demo.constants.model_configs import AVAILABLE_MODELS, ModelConfig
+from demo.constants.paths import ROOT_FOLDER
+
+st.set_page_config(
+    "Genoss Demo",
+    layout="wide",
+    initial_sidebar_state="expanded",
+    page_icon=str(ROOT_FOLDER / "doc/assets/logo.png"),
+)
+
 
 with st.sidebar:
-    model_name = st.selectbox(
+    selected_model: ModelConfig = st.selectbox(
         "Chat API Endpoint",
-        options=["gpt-4", "hf-gpt2", "hf-llama2"],
+        options=AVAILABLE_MODELS,
         index=0,
+        format_func=lambda model: model.display_name,
     )
+    selected_model.configure_open_ai_module()
 
-genoss_endpoint = "http://localhost:4321"
 
 st.title("🐂🌈 Genoss")
 if "messages" not in st.session_state:
@@ -34,23 +45,9 @@
     st.chat_message("user").write(prompt)
     msg = ""
 
-    # Use the user-provided API key if available,
-    # otherwise use the API key from the .env file
-    api_key = (
-        api_key
-        if api_key
-        else (huggingface_api_key if model_name.startswith("hf") else openai_api_key)
-    )
-    if api_key == "" or api_key is None:
-        st.error("Please provide an API key")
-        st.stop()
-
-    openai.api_key = api_key
-    openai.api_base = genoss_endpoint
-
     try:
         response = openai.ChatCompletion.create(
-            model=model_name,
+            model=selected_model.model_name,
             messages=st.session_state.messages,
         )
         msg = response.choices[0].message
@@ -61,7 +58,4 @@
     st.empty()
 
     st.session_state.messages.append(msg)
-    try:
-        st.chat_message("assistant").write(msg["content"])
-    except Exception as e:
-        st.error(f"Error: {e}, {msg}")
+    st.chat_message("assistant").write(msg["content"])
diff --git a/genoss/api/embeddings_routes.py b/genoss/api/embeddings_routes.py
@@ -13,12 +13,10 @@ async def post_embeddings(
     model: str,
     input: str,
 ) -> list[float]:
-    gpt = None
     if model == "gpt4all":
         gpt = Gpt4AllLLM(name="gpt4all")
+    else:
+        raise NotImplementedError("Model can not be anything else than gpt4all.")
 
-    if gpt is None:
-        return [0.0, 0.0, 0.0]
     response = gpt.generate_embedding(input)
-
     return response
diff --git a/genoss/entities/chat/chat_completion.py b/genoss/entities/chat/chat_completion.py
@@ -5,6 +5,8 @@
 from genoss.entities.chat.message import Message
 
 
+# TODO: why is this nested classes ?
+# TODO: why don't we use a pydantic ?
 class ChatCompletion:
     class Choice:
         def __init__(

diff --git a/genoss/llm/hf_hub/base_hf_hub.py b/genoss/llm/hf_hub/base_hf_hub.py
@@ -1,9 +1,7 @@
 from abc import ABC
 from typing import Any
 
-from fastapi import HTTPException
 from langchain import HuggingFaceHub, LLMChain
-from pydantic import Field
 
 from genoss.entities.chat.chat_completion import ChatCompletion
 from genoss.llm.base_genoss import BaseGenossLLM
@@ -14,22 +12,13 @@ class BaseHuggingFaceHubLLM(BaseGenossLLM, ABC):
     """Class for interacting with Hugging Face Inference APIs."""
 
     # Sub classes must define these
-    huggingfacehub_api_token: str | None = Field(None)
-    repo_id: str | None = None
-
-    def __init__(self, api_key: str | None, *args: Any, **kwargs: Any):
-        super().__init__(*args, **kwargs)
-
-        if api_key is None:
-            # TODO: is this the right way to make it fail?
-            raise HTTPException(status_code=403, detail="API key missing")
-
-        self.huggingfacehub_api_token = api_key
+    api_key: str | None = None
+    repo_id: str
 
     def generate_answer(self, question: str) -> dict[str, Any]:
         """Generate answer from prompt."""
         llm = HuggingFaceHub(
-            repo_id=self.repo_id, huggingfacehub_api_token=self.huggingfacehub_api_token
+            repo_id=self.repo_id, huggingfacehub_api_token=self.api_key
         )
         llm_chain = LLMChain(prompt=prompt_template, llm=llm)
 

diff --git a/genoss/llm/hf_inference_endpoint/hf_inference_endpoint.py b/genoss/llm/hf_inference_endpoint/hf_inference_endpoint.py
@@ -0,0 +1,52 @@
+from abc import ABC
+from typing import Any, Literal
+from unittest import mock
+
+from langchain import LLMChain
+from langchain.llms import HuggingFaceEndpoint
+
+from genoss.entities.chat.chat_completion import ChatCompletion
+from genoss.llm.base_genoss import BaseGenossLLM
+from genoss.prompts.prompt_template import prompt_template
+
+
+class HuggingFaceInferenceEndpointLLM(BaseGenossLLM, ABC):
+    """Class for interacting with Hugging Face Inference APIs."""
+
+    # Subclasses must define these
+    name = "HF Inference Endpoint"
+    api_key: str | None = None
+    endpoint_url: str
+    description: str = "Hugging Face Inference API custom endpoint."
+    task: Literal[
+        "text-generation", "text-generation", "summarization"
+    ] = "text-generation"
+
+    @mock.patch(
+        "huggingface_hub.inference_api.INFERENCE_ENDPOINT", "http://0.0.0.0:8080"
+    )
+    def generate_answer(self, question: str) -> dict[str, Any]:
+        """Generate answer from prompt."""
+        llm = HuggingFaceEndpoint(
+            endpoint_url=self.endpoint_url,
+            huggingfacehub_api_token=self.api_key,
+            task=self.task,
+        )
+        llm_chain = LLMChain(prompt=prompt_template, llm=llm)
+
+        response_text = llm_chain(question)
+
+        answer = response_text["text"]
+
+        chat_completion = ChatCompletion(
+            model=self.name, question=question, answer=answer
+        )
+
+        return chat_completion.to_dict()
+
+    def generate_embedding(self, text: str) -> list[float]:
+        """Dummy method to satisfy base class requirement."""
+        # TODO: why is this necessary? Architecture issue?
+        raise NotImplementedError(
+            "This method is not used for Hugging Face Inference API."
+        )
diff --git a/genoss/llm/openai/openai_llm.py b/genoss/llm/openai/openai_llm.py
@@ -16,20 +16,10 @@ class OpenAILLM(BaseGenossLLM):
     name: str = "openai"
     description: str = "OpenAI LLM"
     model_name: str = Field("gpt-3.5-turbo", description="OpenAI model name")
-    openai_api_key: str | None = Field(None)
-
-    def __init__(self, model_name: str, api_key: str | None, *args: Any, **kwargs: Any):
-        super().__init__(*args, **kwargs)
-
-        if api_key is None:
-            raise ValueError("API key missing")
-
-        self.openai_api_key = api_key
-        self.model_name = model_name
+    api_key: str
 
     def generate_answer(self, question: str) -> dict[str, Any]:
-
-        llm = ChatOpenAI(model_name=self.model_name, openai_api_key=self.openai_api_key)
+        llm = ChatOpenAI(model_name=self.model_name, openai_api_key=self.api_key)
 
         llm_chain = LLMChain(llm=llm, prompt=prompt_template)
         response_text = llm_chain(question)

diff --git a/genoss/services/model_factory.py b/genoss/services/model_factory.py
@@ -3,6 +3,9 @@
 from genoss.llm.hf_hub.falcon import HuggingFaceHubFalconLLM
 from genoss.llm.hf_hub.gpt2 import HuggingFaceHubGPT2LLM
 from genoss.llm.hf_hub.llama2 import HuggingFaceHubLlama2LLM
+from genoss.llm.hf_inference_endpoint.hf_inference_endpoint import (
+    HuggingFaceInferenceEndpointLLM,
+)
 from genoss.llm.local.gpt4all import Gpt4AllLLM
 from genoss.llm.openai.openai_llm import OpenAILLM
 
@@ -24,6 +27,11 @@ def get_model_from_name(
             return HuggingFaceHubGPT2LLM(api_key=api_key)
         if name.lower().startswith("hf-falcon"):
             return HuggingFaceHubFalconLLM(api_key=api_key)
-        elif name == FAKE_LLM_NAME:
+        if name == FAKE_LLM_NAME:
             return FakeLLM()
+        if name.lower().startswith("hf-inference-endpoint/"):
+            endpoint_url = name.split("/", maxsplit=1)[1]
+            return HuggingFaceInferenceEndpointLLM(
+                api_key=api_key, endpoint_url=endpoint_url
+            )
         return None
diff --git a/pyproject.toml b/pyproject.toml
@@ -55,7 +55,9 @@ skip_empty = true
 fail_under = 50.00
 precision = 1
 
-## black
+## prettier
+[tool.isort]
+profile = "black"
 
 [tool.black]
 target-version = ['py311']
@@ -95,6 +97,7 @@ ignore = [
     "D101",
     "D102",
     "D103",
+    "D104",
     "D106",
     "D107",
 ]